Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_TriDiContainer_def.hpp
1/*@HEADER
2// ***********************************************************************
3//
4// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40//@HEADER
41*/
42
43#ifndef IFPACK2_TRIDICONTAINER_DEF_HPP
44#define IFPACK2_TRIDICONTAINER_DEF_HPP
45
46#include "Teuchos_LAPACK.hpp"
47
48#ifdef HAVE_MPI
49# include <mpi.h>
50# include "Teuchos_DefaultMpiComm.hpp"
51#else
52# include "Teuchos_DefaultSerialComm.hpp"
53#endif // HAVE_MPI
54
55
56namespace Ifpack2 {
57
58template<class MatrixType, class LocalScalarType>
60TriDiContainer (const Teuchos::RCP<const row_matrix_type>& matrix,
61 const Teuchos::Array<Teuchos::Array<LO> >& partitions,
62 const Teuchos::RCP<const import_type>&,
63 bool pointIndexed) :
64 ContainerImpl<MatrixType, LocalScalarType>(matrix, partitions, pointIndexed),
65 ipiv_(this->blockRows_.size() * this->scalarsPerRow_),
66 scalarOffsets_ (this->numBlocks_)
67{
68 TEUCHOS_TEST_FOR_EXCEPTION(
69 ! matrix->hasColMap (), std::invalid_argument, "Ifpack2::TriDiContainer: "
70 "The constructor's input matrix must have a column Map.");
71 // FIXME (mfh 25 Aug 2013) What if the matrix's row Map has a
72 // different index base than zero?
73 //compute scalar array offsets (probably different from blockOffsets_)
74 LO scalarTotal = 0;
75 for(LO i = 0; i < this->numBlocks_; i++)
76 {
77 scalarOffsets_[i] = scalarTotal;
78 //actualBlockRows is how many scalars it takes to store the diagonal for block i
79 LO actualBlockRows = this->scalarsPerRow_ * this->blockSizes_[i];
80 if(actualBlockRows == 1)
81 {
82 scalarTotal += 1;
83 }
84 else
85 {
86 //this formula is exact for any block size of 1 or more
87 //includes 1 subdiagonal and 2 superdiagonals.
88 scalarTotal += 4 * (actualBlockRows - 1);
89 }
90 }
91 //Allocate scalar arrays
92 scalars_.resize(scalarTotal);
93 diagBlocks_.reserve(this->numBlocks_);
94 for(int i = 0; i < this->numBlocks_; i++)
95 {
96 diagBlocks_.emplace_back(Teuchos::View, scalars_.data() + scalarOffsets_[i], this->blockSizes_[i] * this->scalarsPerRow_);
97 }
98}
99
100template<class MatrixType, class LocalScalarType>
102~TriDiContainer () {}
103
104template<class MatrixType, class LocalScalarType>
106{
107 for(int i = 0; i < this->numBlocks_; i++)
108 diagBlocks_[i].putScalar(Teuchos::ScalarTraits<LSC>::zero());
109 this->IsInitialized_ = true;
110 // We assume that if you called this method, you intend to recompute
111 // everything.
112 this->IsComputed_ = false;
113}
114
115template<class MatrixType, class LocalScalarType>
117{
118 #ifdef HAVE_IFPACK2_DEBUG
119 TEUCHOS_TEST_FOR_EXCEPTION(
120 ipiv_.size () != this->blockRows_.size() * this->scalarsPerRow_, std::logic_error,
121 "Ifpack2::TriDiContainer::compute: ipiv_ array has the wrong size. "
122 "Please report this bug to the Ifpack2 developers.");
123 #endif
124
125 this->IsComputed_ = false;
126 if (! this->isInitialized ()) {
127 this->initialize ();
128 }
129 extract (); // extract the submatrices
130 factor (); // factor them
131 this->IsComputed_ = true;
132}
133
134template<class MatrixType, class LocalScalarType>
136{
137 using Teuchos::Array;
138 using Teuchos::ArrayView;
139 SC zero = Teuchos::ScalarTraits<SC>::zero();
140 const LO INVALID = Teuchos::OrdinalTraits<LO>::invalid();
141 //To extract diagonal blocks, need to translate local rows to local columns.
142 //Strategy: make a lookup table that translates local cols in the matrix to offsets in blockRows_:
143 //blockOffsets_[b] <= offset < blockOffsets_[b+1]: tests whether the column is in block b.
144 //offset - blockOffsets_[b]: gives the column within block b.
145 //
146 //This provides the block and col within a block in O(1).
147 if(this->scalarsPerRow_ > 1)
148 {
149 Array<LO> colToBlockOffset(this->inputBlockMatrix_->getLocalNumCols(), INVALID);
150 for(int i = 0; i < this->numBlocks_; i++)
151 {
152 //Get the interval where block i is defined in blockRows_
153 LO blockStart = this->blockOffsets_[i];
154 LO blockEnd = blockStart + this->blockSizes_[i];
155 ArrayView<const LO> blockRows = this->getBlockRows(i);
156 //Set the lookup table entries for the columns appearing in block i.
157 //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
158 //this is OK. The values updated here are only needed to process block i's entries.
159 for(size_t j = 0; j < size_t(blockRows.size()); j++)
160 {
161 LO localCol = this->translateRowToCol(blockRows[j]);
162 colToBlockOffset[localCol] = blockStart + j;
163 }
164 using h_inds_type = typename block_crs_matrix_type::local_inds_host_view_type;
165 using h_vals_type = typename block_crs_matrix_type::values_host_view_type;
166 for(LO blockRow = 0; blockRow < LO(blockRows.size()); blockRow++)
167 {
168 //get a raw view of the whole block row
169 h_inds_type indices;
170 h_vals_type values;
171 LO inputRow = this->blockRows_[blockStart + blockRow];
172 this->inputBlockMatrix_->getLocalRowView(inputRow, indices, values);
173 LO numEntries = (LO) indices.size();
174 for(LO k = 0; k < numEntries; k++)
175 {
176 LO colOffset = colToBlockOffset[indices[k]];
177 if(blockStart <= colOffset && colOffset < blockEnd)
178 {
179 //This entry does appear in the diagonal block.
180 //(br, bc) identifies the scalar's position in the BlockCrs block.
181 //Convert this to (r, c) which is its position in the container block.
182 LO blockCol = colOffset - blockStart;
183 for(LO bc = 0; bc < this->bcrsBlockSize_; bc++)
184 {
185 for(LO br = 0; br < this->bcrsBlockSize_; br++)
186 {
187 LO r = this->bcrsBlockSize_ * blockRow + br;
188 LO c = this->bcrsBlockSize_ * blockCol + bc;
189 auto val = values[k * (this->bcrsBlockSize_ * this->bcrsBlockSize_) + (br + this->bcrsBlockSize_ * bc)];
190 if(val != zero)
191 diagBlocks_[i](r, c) = val;
192 }
193 }
194 }
195 }
196 }
197 }
198 }
199 else
200 {
201 //get the mapping from point-indexed matrix columns to offsets in blockRows_
202 //(this includes regular CrsMatrix columns, in which case bcrsBlockSize_ == 1)
203 Array<LO> colToBlockOffset(this->inputMatrix_->getLocalNumCols() * this->bcrsBlockSize_, INVALID);
204 for(int i = 0; i < this->numBlocks_; i++)
205 {
206 //Get the interval where block i is defined in blockRows_
207 LO blockStart = this->blockOffsets_[i];
208 LO blockEnd = blockStart + this->blockSizes_[i];
209 ArrayView<const LO> blockRows = this->getBlockRows(i);
210 //Set the lookup table entries for the columns appearing in block i.
211 //If OverlapLevel_ > 0, then this may overwrite values for previous blocks, but
212 //this is OK. The values updated here are only needed to process block i's entries.
213 for(size_t j = 0; j < size_t(blockRows.size()); j++)
214 {
215 //translateRowToCol will return the corresponding split column
216 LO localCol = this->translateRowToCol(blockRows[j]);
217 colToBlockOffset[localCol] = blockStart + j;
218 }
219 for(size_t blockRow = 0; blockRow < size_t(blockRows.size()); blockRow++)
220 {
221 //get a view of the split row
222 LO inputPointRow = this->blockRows_[blockStart + blockRow];
223 auto rowView = this->getInputRowView(inputPointRow);
224 for(size_t k = 0; k < rowView.size(); k++)
225 {
226 LO colOffset = colToBlockOffset[rowView.ind(k)];
227 if(blockStart <= colOffset && colOffset < blockEnd)
228 {
229 LO blockCol = colOffset - blockStart;
230 auto val = rowView.val(k);
231 if(val != zero)
232 diagBlocks_[i](blockRow, blockCol) = rowView.val(k);
233 }
234 }
235 }
236 }
237 }
238}
239
240template<class MatrixType, class LocalScalarType>
241void TriDiContainer<MatrixType, LocalScalarType>::clearBlocks ()
242{
243 diagBlocks_.clear();
244 scalars_.clear();
245 ContainerImpl<MatrixType, LocalScalarType>::clearBlocks();
246}
247
248template<class MatrixType, class LocalScalarType>
249void TriDiContainer<MatrixType, LocalScalarType>::factor ()
250{
251 for(int i = 0; i < this->numBlocks_; i++)
252 {
253 Teuchos::LAPACK<int, LSC> lapack;
254 int INFO = 0;
255 int* blockIpiv = &ipiv_[this->blockOffsets_[i] * this->scalarsPerRow_];
256 lapack.GTTRF (diagBlocks_[i].numRowsCols (),
257 diagBlocks_[i].DL(),
258 diagBlocks_[i].D(),
259 diagBlocks_[i].DU(),
260 diagBlocks_[i].DU2(),
261 blockIpiv, &INFO);
262 // INFO < 0 is a bug.
263 TEUCHOS_TEST_FOR_EXCEPTION(
264 INFO < 0, std::logic_error, "Ifpack2::TriDiContainer::factor: "
265 "LAPACK's _GTTRF (LU factorization with partial pivoting) was called "
266 "incorrectly. INFO = " << INFO << " < 0. "
267 "Please report this bug to the Ifpack2 developers.");
268 // INFO > 0 means the matrix is singular. This is probably an issue
269 // either with the choice of rows the rows we extracted, or with the
270 // input matrix itself.
271 TEUCHOS_TEST_FOR_EXCEPTION(
272 INFO > 0, std::runtime_error, "Ifpack2::TriDiContainer::factor: "
273 "LAPACK's _GTTRF (LU factorization with partial pivoting) reports that the "
274 "computed U factor is exactly singular. U(" << INFO << "," << INFO << ") "
275 "(one-based index i) is exactly zero. This probably means that the input "
276 "matrix has a singular diagonal block.");
277 }
278}
279
280template<class MatrixType, class LocalScalarType>
282solveBlock(ConstHostSubviewLocal X,
283 HostSubviewLocal Y,
284 int blockIndex,
285 Teuchos::ETransp mode,
286 LSC alpha,
287 LSC beta) const
288{
289 LSC zero = Teuchos::ScalarTraits<LSC>::zero();
290 size_t numVecs = X.extent(1);
291 size_t numRows = X.extent(0);
292
293 #ifdef HAVE_IFPACK2_DEBUG
294 TEUCHOS_TEST_FOR_EXCEPTION(
295 X.extent (0) != Y.extent (0),
296 std::logic_error, "Ifpack2::TriDiContainer::solveBlock: X and Y have "
297 "incompatible dimensions (" << X.extent (0) << " resp. "
298 << Y.extent (0) << "). Please report this bug to "
299 "the Ifpack2 developers.");
300 TEUCHOS_TEST_FOR_EXCEPTION(
301 X.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
302 std::logic_error, "Ifpack2::TriDiContainer::solveBlock: The input "
303 "multivector X has incompatible dimensions from those of the "
304 "inverse operator (" << X.extent (0) << " vs. "
305 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols () : diagBlocks_[blockIndex].numRowsCols())
306 << "). Please report this bug to the Ifpack2 developers.");
307 TEUCHOS_TEST_FOR_EXCEPTION(
308 Y.extent (0) != static_cast<size_t> (diagBlocks_[blockIndex].numRowsCols()),
309 std::logic_error, "Ifpack2::TriDiContainer::solveBlock: The output "
310 "multivector Y has incompatible dimensions from those of the "
311 "inverse operator (" << Y.extent (0) << " vs. "
312 << (mode == Teuchos::NO_TRANS ? diagBlocks_[blockIndex].numRowsCols() : diagBlocks_[blockIndex].numRowsCols ())
313 << "). Please report this bug to the Ifpack2 developers.");
314 #endif
315
316 if(alpha == zero) { // don't need to solve the linear system
317 if(beta == zero) {
318 // Use BLAS AXPY semantics for beta == 0: overwrite, clobbering
319 // any Inf or NaN values in Y (rather than multiplying them by
320 // zero, resulting in NaN values).
321 for(size_t j = 0; j < numVecs; j++)
322 for(size_t i = 0; i < numRows; i++)
323 Y(i, j) = zero;
324 }
325 else { // beta != 0
326 for(size_t j = 0; j < numVecs; j++)
327 for(size_t i = 0; i < numRows; i++)
328 Y(i, j) *= ISC(beta);
329 }
330 }
331 else { // alpha != 0; must solve the linear system
332 Teuchos::LAPACK<int, LSC> lapack;
333 // If beta is nonzero or Y is not constant stride, we have to use
334 // a temporary output multivector. It gets a copy of X, since
335 // GETRS overwrites its (multi)vector input with its output.
336
337 std::vector<LSC> yTemp(numVecs * numRows);
338 for(size_t j = 0; j < numVecs; j++)
339 {
340 for(size_t i = 0; i < numRows; i++)
341 yTemp[j * numRows + i] = X(i, j);
342 }
343
344 int INFO = 0;
345 const char trans =
346 (mode == Teuchos::CONJ_TRANS ? 'C' : (mode == Teuchos::TRANS ? 'T' : 'N'));
347 int* blockIpiv = &ipiv_[this->blockOffsets_[blockIndex] * this->scalarsPerRow_];
348 lapack.GTTRS (trans,
349 diagBlocks_[blockIndex].numRowsCols(),
350 numVecs,
351 diagBlocks_[blockIndex].DL(),
352 diagBlocks_[blockIndex].D(),
353 diagBlocks_[blockIndex].DU(),
354 diagBlocks_[blockIndex].DU2(),
355 blockIpiv,
356 yTemp.data(),
357 numRows,
358 &INFO);
359
360 if (beta != zero) {
361 for(size_t j = 0; j < numVecs; j++)
362 {
363 for(size_t i = 0; i < numRows; i++)
364 {
365 Y(i, j) *= ISC(beta);
366 Y(i, j) += ISC(alpha * yTemp[j * numRows + i]);
367 }
368 }
369 }
370 else {
371 for(size_t j = 0; j < numVecs; j++)
372 {
373 for(size_t i = 0; i < numRows; i++)
374 Y(i, j) = ISC(yTemp[j * numRows + i]);
375 }
376 }
377
378 TEUCHOS_TEST_FOR_EXCEPTION(
379 INFO != 0, std::runtime_error, "Ifpack2::TriDiContainer::solveBlock: "
380 "LAPACK's _GETRS (solve using LU factorization with partial pivoting) "
381 "failed with INFO = " << INFO << " != 0.");
382 }
383}
384
385template<class MatrixType, class LocalScalarType>
386std::ostream& TriDiContainer<MatrixType, LocalScalarType>::print(std::ostream& os) const
387{
388 Teuchos::FancyOStream fos(Teuchos::rcp(&os,false));
389 fos.setOutputToRootOnly(0);
390 describe(fos);
391 return(os);
392}
393
394template<class MatrixType, class LocalScalarType>
396{
397 std::ostringstream oss;
398 oss << Teuchos::Describable::description();
399 if (this->isInitialized()) {
400 if (this->isComputed()) {
401 oss << "{status = initialized, computed";
402 }
403 else {
404 oss << "{status = initialized, not computed";
405 }
406 }
407 else {
408 oss << "{status = not initialized, not computed";
409 }
410
411 oss << "}";
412 return oss.str();
413}
414
415template<class MatrixType, class LocalScalarType>
416void
418describe (Teuchos::FancyOStream& os,
419 const Teuchos::EVerbosityLevel verbLevel) const
420{
421 using std::endl;
422 if(verbLevel==Teuchos::VERB_NONE) return;
423 os << "================================================================================" << endl;
424 os << "Ifpack2::TriDiContainer" << endl;
425 os << "Number of blocks = " << this->numBlocks_ << endl;
426 os << "isInitialized() = " << this->IsInitialized_ << endl;
427 os << "isComputed() = " << this->IsComputed_ << endl;
428 os << "================================================================================" << endl;
429 os << endl;
430}
431
432template<class MatrixType, class LocalScalarType>
434{
435 return "TriDi";
436}
437
438#define IFPACK2_TRIDICONTAINER_INSTANT(S,LO,GO,N) \
439 template class Ifpack2::TriDiContainer< Tpetra::RowMatrix<S, LO, GO, N>, S >;
440
441} // namespace Ifpack2
442
443#endif
The implementation of the numerical features of Container (Jacobi, Gauss-Seidel, SGS)....
Definition: Ifpack2_Container_decl.hpp:344
int numBlocks_
The number of blocks (partitions) in the container.
Definition: Ifpack2_Container_decl.hpp:292
typename Kokkos::Details::ArithTraits< SC >::val_type ISC
Internal representation of Scalar in Kokkos::View.
Definition: Ifpack2_Container_decl.hpp:135
LO scalarsPerRow_
Definition: Ifpack2_Container_decl.hpp:317
Teuchos::Array< LO > blockSizes_
Number of rows in each block.
Definition: Ifpack2_Container_decl.hpp:296
Store and solve a local TriDi linear problem.
Definition: Ifpack2_TriDiContainer_decl.hpp:107
virtual std::string description() const
A one-line description of this object.
Definition: Ifpack2_TriDiContainer_def.hpp:395
virtual void initialize()
Do all set-up operations that only require matrix structure.
Definition: Ifpack2_TriDiContainer_def.hpp:105
virtual void compute()
Extract the local diagonal block and prepare the solver.
Definition: Ifpack2_TriDiContainer_def.hpp:116
virtual void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object with some verbosity level to the given FancyOStream.
Definition: Ifpack2_TriDiContainer_def.hpp:418
virtual ~TriDiContainer()
Destructor (declared virtual for memory safety of derived classes).
Definition: Ifpack2_TriDiContainer_def.hpp:102
virtual std::ostream & print(std::ostream &os) const
Print information about this object to the given output stream.
Definition: Ifpack2_TriDiContainer_def.hpp:386
static std::string getName()
Get the name of this container type for Details::constructContainer()
Definition: Ifpack2_TriDiContainer_def.hpp:433
TriDiContainer(const Teuchos::RCP< const row_matrix_type > &matrix, const Teuchos::Array< Teuchos::Array< LO > > &partitions, const Teuchos::RCP< const import_type > &importer, bool pointIndexed)
Constructor.
Definition: Ifpack2_TriDiContainer_def.hpp:60
void solveBlock(ConstHostSubviewLocal X, HostSubviewLocal Y, int blockIndex, Teuchos::ETransp mode, LSC alpha, LSC beta) const
Definition: Ifpack2_TriDiContainer_def.hpp:282
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74