Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_RELAXATION_DEF_HPP
44 #define IFPACK2_RELAXATION_DEF_HPP
45 
46 #include "Teuchos_StandardParameterEntryValidators.hpp"
47 #include "Teuchos_TimeMonitor.hpp"
48 #include "Tpetra_CrsMatrix.hpp"
49 #include "Tpetra_Experimental_BlockCrsMatrix.hpp"
50 #include "Ifpack2_Utilities.hpp"
51 #include "Ifpack2_Relaxation_decl.hpp"
52 
53 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
54 # include "KokkosKernels_GaussSeidel.hpp"
55 #endif // HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
56 
57 #ifdef HAVE_IFPACK2_DUMP_MTX_MATRIX
58 # include "MatrixMarket_Tpetra.hpp"
59 #endif
60 
61 // mfh 28 Mar 2013: Uncomment out these three lines to compute
62 // statistics on diagonal entries in compute().
63 // #ifndef IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
64 // # define IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS 1
65 // #endif // IFPACK2_RELAXATION_COMPUTE_DIAGONAL_STATS
66 
67 namespace {
68  // Validate that a given int is nonnegative.
69  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
70  public:
71  // Constructor (does nothing).
72  NonnegativeIntValidator () {}
73 
74  // ParameterEntryValidator wants this method.
75  Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
76  return Teuchos::null;
77  }
78 
79  // Actually validate the parameter's value.
80  void
81  validate (const Teuchos::ParameterEntry& entry,
82  const std::string& paramName,
83  const std::string& sublistName) const
84  {
85  using std::endl;
86  Teuchos::any anyVal = entry.getAny (true);
87  const std::string entryName = entry.getAny (false).typeName ();
88 
89  TEUCHOS_TEST_FOR_EXCEPTION(
90  anyVal.type () != typeid (int),
91  Teuchos::Exceptions::InvalidParameterType,
92  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
93  << "\" has the wrong type." << endl << "Parameter: " << paramName
94  << endl << "Type specified: " << entryName << endl
95  << "Type required: int" << endl);
96 
97  const int val = Teuchos::any_cast<int> (anyVal);
98  TEUCHOS_TEST_FOR_EXCEPTION(
99  val < 0, Teuchos::Exceptions::InvalidParameterValue,
100  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
101  << "\" is negative." << endl << "Parameter: " << paramName
102  << endl << "Value specified: " << val << endl
103  << "Required range: [0, INT_MAX]" << endl);
104  }
105 
106  // ParameterEntryValidator wants this method.
107  const std::string getXMLTypeName () const {
108  return "NonnegativeIntValidator";
109  }
110 
111  // ParameterEntryValidator wants this method.
112  void
113  printDoc (const std::string& docString,
114  std::ostream &out) const
115  {
116  Teuchos::StrUtils::printLines (out, "# ", docString);
117  out << "#\tValidator Used: " << std::endl;
118  out << "#\t\tNonnegativeIntValidator" << std::endl;
119  }
120  };
121 
122  // A way to get a small positive number (eps() for floating-point
123  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
124  // define it (for example, for integer values).
125  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
126  class SmallTraits {
127  public:
128  // Return eps if Scalar is a floating-point type, else return 1.
129  static const Scalar eps ();
130  };
131 
132  // Partial specialization for when Scalar is not a floating-point type.
133  template<class Scalar>
134  class SmallTraits<Scalar, true> {
135  public:
136  static const Scalar eps () {
137  return Teuchos::ScalarTraits<Scalar>::one ();
138  }
139  };
140 
141  // Partial specialization for when Scalar is a floating-point type.
142  template<class Scalar>
143  class SmallTraits<Scalar, false> {
144  public:
145  static const Scalar eps () {
146  return Teuchos::ScalarTraits<Scalar>::eps ();
147  }
148  };
149 } // namespace (anonymous)
150 
151 namespace Ifpack2 {
152 
153 template<class MatrixType>
155 setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
156 {
157  if (A.getRawPtr () != A_.getRawPtr ()) { // it's a different matrix
158  Importer_ = Teuchos::null;
159  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
160  isInitialized_ = false;
161  IsComputed_ = false;
162  diagOffsets_ = Kokkos::View<size_t*, typename node_type::device_type> ();
163  savedDiagOffsets_ = false;
164  hasBlockCrsMatrix_ = false;
165  if (! A.is_null ()) {
166  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
167  }
168  A_ = A;
169  }
170 }
171 
172 
173 template<class MatrixType>
175 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
176 : A_ (A),
177  Time_ (Teuchos::rcp (new Teuchos::Time ("Ifpack2::Relaxation"))),
178  NumSweeps_ (1),
179  PrecType_ (Ifpack2::Details::JACOBI),
180  DampingFactor_ (STS::one ()),
181  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
182  false : // a reasonable default if there's no communicator
183  A->getRowMap ()->getComm ()->getSize () > 1),
184  ZeroStartingSolution_ (true),
185  DoBackwardGS_ (false),
186  DoL1Method_ (false),
187  L1Eta_ (Teuchos::as<magnitude_type> (1.5)),
188  MinDiagonalValue_ (STS::zero ()),
189  fixTinyDiagEntries_ (false),
190  checkDiagEntries_ (false),
191  isInitialized_ (false),
192  IsComputed_ (false),
193  NumInitialize_ (0),
194  NumCompute_ (0),
195  NumApply_ (0),
196  InitializeTime_ (0.0), // Times are double anyway, so no need for ScalarTraits.
197  ComputeTime_ (0.0),
198  ApplyTime_ (0.0),
199  ComputeFlops_ (0.0),
200  ApplyFlops_ (0.0),
201  globalMinMagDiagEntryMag_ (STM::zero ()),
202  globalMaxMagDiagEntryMag_ (STM::zero ()),
203  globalNumSmallDiagEntries_ (0),
204  globalNumZeroDiagEntries_ (0),
205  globalNumNegDiagEntries_ (0),
206  globalDiagNormDiff_(Teuchos::ScalarTraits<magnitude_type>::zero()),
207  savedDiagOffsets_ (false),
208  hasBlockCrsMatrix_ (false)
209 {
210  this->setObjectLabel ("Ifpack2::Relaxation");
211 }
212 
213 //==========================================================================
214 template<class MatrixType>
216 }
217 
218 template<class MatrixType>
219 Teuchos::RCP<const Teuchos::ParameterList>
221 {
222  using Teuchos::Array;
223  using Teuchos::ParameterList;
224  using Teuchos::parameterList;
225  using Teuchos::RCP;
226  using Teuchos::rcp;
227  using Teuchos::rcp_const_cast;
228  using Teuchos::rcp_implicit_cast;
229  using Teuchos::setStringToIntegralParameter;
230  typedef Teuchos::ParameterEntryValidator PEV;
231 
232  if (validParams_.is_null ()) {
233  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
234 
235  // Set a validator that automatically converts from the valid
236  // string options to their enum values.
237  Array<std::string> precTypes (5);
238  precTypes[0] = "Jacobi";
239  precTypes[1] = "Gauss-Seidel";
240  precTypes[2] = "Symmetric Gauss-Seidel";
241  precTypes[3] = "MT Gauss-Seidel";
242  precTypes[4] = "MT Symmetric Gauss-Seidel";
243  Array<Details::RelaxationType> precTypeEnums (5);
244  precTypeEnums[0] = Details::JACOBI;
245  precTypeEnums[1] = Details::GS;
246  precTypeEnums[2] = Details::SGS;
247  precTypeEnums[3] = Details::MTGS;
248  precTypeEnums[4] = Details::MTSGS;
249  const std::string defaultPrecType ("Jacobi");
250  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
251  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
252  pl.getRawPtr ());
253 
254  const int numSweeps = 1;
255  RCP<PEV> numSweepsValidator =
256  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
257  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
258  rcp_const_cast<const PEV> (numSweepsValidator));
259 
260  const scalar_type dampingFactor = STS::one ();
261  pl->set ("relaxation: damping factor", dampingFactor);
262 
263  const bool zeroStartingSolution = true;
264  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
265 
266  const bool doBackwardGS = false;
267  pl->set ("relaxation: backward mode", doBackwardGS);
268 
269  const bool doL1Method = false;
270  pl->set ("relaxation: use l1", doL1Method);
271 
272  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
273  (STM::one() + STM::one()); // 1.5
274  pl->set ("relaxation: l1 eta", l1eta);
275 
276  const scalar_type minDiagonalValue = STS::zero ();
277  pl->set ("relaxation: min diagonal value", minDiagonalValue);
278 
279  const bool fixTinyDiagEntries = false;
280  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
281 
282  const bool checkDiagEntries = false;
283  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
284 
285  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
286  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
287 
288  validParams_ = rcp_const_cast<const ParameterList> (pl);
289  }
290  return validParams_;
291 }
292 
293 
294 template<class MatrixType>
295 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
296 {
297  using Teuchos::getIntegralValue;
298  using Teuchos::ParameterList;
299  using Teuchos::RCP;
300  typedef scalar_type ST; // just to make code below shorter
301 
302  pl.validateParametersAndSetDefaults (* getValidParameters ());
303 
304  const Details::RelaxationType precType =
305  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
306  const int numSweeps = pl.get<int> ("relaxation: sweeps");
307  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
308  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
309  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
310  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
311  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
312  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
313  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
314  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
315  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
316 
317 
318  // "Commit" the changes, now that we've validated everything.
319  PrecType_ = precType;
320  NumSweeps_ = numSweeps;
321  DampingFactor_ = dampingFactor;
322  ZeroStartingSolution_ = zeroStartSol;
323  DoBackwardGS_ = doBackwardGS;
324  DoL1Method_ = doL1Method;
325  L1Eta_ = l1Eta;
326  MinDiagonalValue_ = minDiagonalValue;
327  fixTinyDiagEntries_ = fixTinyDiagEntries;
328  checkDiagEntries_ = checkDiagEntries;
329  localSmoothingIndices_ = localSmoothingIndices;
330 }
331 
332 
333 template<class MatrixType>
334 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
335 {
336  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
337  // but otherwise, we will get [unused] in pl
338  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
339 }
340 
341 
342 template<class MatrixType>
343 Teuchos::RCP<const Teuchos::Comm<int> >
345  TEUCHOS_TEST_FOR_EXCEPTION(
346  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
347  "The input matrix A is null. Please call setMatrix() with a nonnull "
348  "input matrix before calling this method.");
349  return A_->getRowMap ()->getComm ();
350 }
351 
352 
353 template<class MatrixType>
354 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
356  return A_;
357 }
358 
359 
360 template<class MatrixType>
361 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
362  typename MatrixType::global_ordinal_type,
363  typename MatrixType::node_type> >
365  TEUCHOS_TEST_FOR_EXCEPTION(
366  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
367  "The input matrix A is null. Please call setMatrix() with a nonnull "
368  "input matrix before calling this method.");
369  return A_->getDomainMap ();
370 }
371 
372 
373 template<class MatrixType>
374 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
375  typename MatrixType::global_ordinal_type,
376  typename MatrixType::node_type> >
378  TEUCHOS_TEST_FOR_EXCEPTION(
379  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
380  "The input matrix A is null. Please call setMatrix() with a nonnull "
381  "input matrix before calling this method.");
382  return A_->getRangeMap ();
383 }
384 
385 
386 template<class MatrixType>
388  return true;
389 }
390 
391 
392 template<class MatrixType>
394  return(NumInitialize_);
395 }
396 
397 
398 template<class MatrixType>
400  return(NumCompute_);
401 }
402 
403 
404 template<class MatrixType>
406  return(NumApply_);
407 }
408 
409 
410 template<class MatrixType>
412  return(InitializeTime_);
413 }
414 
415 
416 template<class MatrixType>
418  return(ComputeTime_);
419 }
420 
421 
422 template<class MatrixType>
424  return(ApplyTime_);
425 }
426 
427 
428 template<class MatrixType>
430  return(ComputeFlops_);
431 }
432 
433 
434 template<class MatrixType>
436  return(ApplyFlops_);
437 }
438 
439 
440 template<class MatrixType>
441 void
443 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
444  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
445  Teuchos::ETransp mode,
446  scalar_type alpha,
447  scalar_type beta) const
448 {
449  using Teuchos::as;
450  using Teuchos::RCP;
451  using Teuchos::rcp;
452  using Teuchos::rcpFromRef;
453  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
455  TEUCHOS_TEST_FOR_EXCEPTION(
456  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
457  "The input matrix A is null. Please call setMatrix() with a nonnull "
458  "input matrix, then call compute(), before calling this method.");
459  TEUCHOS_TEST_FOR_EXCEPTION(
460  ! isComputed (),
461  std::runtime_error,
462  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
463  "preconditioner instance before you may call apply(). You may call "
464  "isComputed() to find out if compute() has been called already.");
465  TEUCHOS_TEST_FOR_EXCEPTION(
466  X.getNumVectors() != Y.getNumVectors(),
467  std::runtime_error,
468  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
469  "X has " << X.getNumVectors() << " columns, but Y has "
470  << Y.getNumVectors() << " columns.");
471  TEUCHOS_TEST_FOR_EXCEPTION(
472  beta != STS::zero (), std::logic_error,
473  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
474  "implemented.");
475  {
476  // Reset the timer each time, since Relaxation uses the same Time
477  // object to track times for different methods.
478  Teuchos::TimeMonitor timeMon (*Time_, true);
479 
480  // Special case: alpha == 0.
481  if (alpha == STS::zero ()) {
482  // No floating-point operations, so no need to update a count.
483  Y.putScalar (STS::zero ());
484  }
485  else {
486  // If X and Y alias one another, then we need to create an
487  // auxiliary vector, Xcopy (a deep copy of X).
488  RCP<const MV> Xcopy;
489  // FIXME (mfh 12 Sep 2014) This test for aliasing is incomplete.
490  {
491  auto X_lcl_host = X.template getLocalView<Kokkos::HostSpace> ();
492  auto Y_lcl_host = Y.template getLocalView<Kokkos::HostSpace> ();
493  if (X_lcl_host.ptr_on_device () == Y_lcl_host.ptr_on_device ()) {
494  Xcopy = rcp (new MV (X, Teuchos::Copy));
495  } else {
496  Xcopy = rcpFromRef (X);
497  }
498  }
499 
500  // Each of the following methods updates the flop count itself.
501  // All implementations handle zeroing out the starting solution
502  // (if necessary) themselves.
503  switch (PrecType_) {
504  case Ifpack2::Details::JACOBI:
505  ApplyInverseJacobi(*Xcopy,Y);
506  break;
507  case Ifpack2::Details::GS:
508  ApplyInverseGS(*Xcopy,Y);
509  break;
510  case Ifpack2::Details::SGS:
511  ApplyInverseSGS(*Xcopy,Y);
512  break;
513  case Ifpack2::Details::MTSGS:
514  ApplyInverseMTSGS_CrsMatrix(*Xcopy,Y);
515  break;
516  case Ifpack2::Details::MTGS:
517  ApplyInverseMTGS_CrsMatrix(*Xcopy,Y);
518  break;
519 
520  default:
521  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
522  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
523  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
524  }
525  if (alpha != STS::one ()) {
526  Y.scale (alpha);
527  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
528  const double numVectors = as<double> (Y.getNumVectors ());
529  ApplyFlops_ += numGlobalRows * numVectors;
530  }
531  }
532  }
533  ApplyTime_ += Time_->totalElapsedTime ();
534  ++NumApply_;
535 }
536 
537 
538 template<class MatrixType>
539 void
541 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
542  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
543  Teuchos::ETransp mode) const
544 {
545  TEUCHOS_TEST_FOR_EXCEPTION(
546  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
547  "The input matrix A is null. Please call setMatrix() with a nonnull "
548  "input matrix, then call compute(), before calling this method.");
549  TEUCHOS_TEST_FOR_EXCEPTION(
550  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
551  "isComputed() must be true before you may call applyMat(). "
552  "Please call compute() before calling this method.");
553  TEUCHOS_TEST_FOR_EXCEPTION(
554  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
555  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
556  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
557  A_->apply (X, Y, mode);
558 }
559 
560 
561 template<class MatrixType>
563 {
564 
565  TEUCHOS_TEST_FOR_EXCEPTION(
566  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::initialize: "
567  "The input matrix A is null. Please call setMatrix() with a nonnull "
568  "input matrix before calling this method.");
569  Teuchos::TimeMonitor timeMon (*Time_, true);
570 
571 
572  if (A_.is_null ()) {
573  hasBlockCrsMatrix_ = false;
574  }
575  else { // A_ is not null
576  Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
577  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
578  if (A_bcrs.is_null ()) {
579  hasBlockCrsMatrix_ = false;
580  }
581  else { // A_ is a block_crs_matrix_type
582  hasBlockCrsMatrix_ = true;
583  }
584  }
585 
586 
587 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
588  //KokkosKernels GaussSiedel Initialization.
589  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
590  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
591  TEUCHOS_TEST_FOR_EXCEPTION(
592  crsMat == NULL, std::runtime_error, "Ifpack2::Relaxation::compute: "
593  "MT methods works for CRSMatrix Only.");
594 
595 #ifdef HAVE_IFPACK2_DUMP_MTX_MATRIX
596  Tpetra::MatrixMarket::Writer<crs_matrix_type> crs_writer;
597  std::string file_name = "Ifpack2_MT_GS.mtx";
598  Teuchos::RCP<const crs_matrix_type> rcp_crs_mat = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
599  crs_writer.writeSparseFile(file_name, rcp_crs_mat);
600 #endif
601 
602  this->kh = Teuchos::rcp(new KernelHandle());
603  if (kh->get_gs_handle() == NULL){
604  kh->create_gs_handle();
605  }
606  kokkos_csr_matrix kcsr = crsMat->getLocalMatrix ();
607 
608  bool is_symmetric = false;
609  if (PrecType_ == Ifpack2::Details::MTSGS){
610  is_symmetric = true;
611  }
612  KokkosKernels::Experimental::Graph::gauss_seidel_symbolic
613  <KernelHandle, lno_row_view_t, lno_nonzero_view_t>
614  (kh.getRawPtr(), A_->getNodeNumRows(),
615  A_->getNodeNumCols(),
616  kcsr.graph.row_map,
617  kcsr.graph.entries,
618  is_symmetric);
619 
620  }
621 
622 #endif
623  // Initialization for Relaxation is trivial, so we say it takes zero time.
624  InitializeTime_ += Time_->totalElapsedTime ();
625  ++NumInitialize_;
626  isInitialized_ = true;
627 
628 }
629 
630 namespace Impl {
631 template <typename BlockDiagView>
632 struct InvertDiagBlocks {
633  typedef int value_type;
634  typedef typename BlockDiagView::size_type Size;
635 
636 private:
637  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
638  template <typename View>
639  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
640  typename View::device_type, Unmanaged>;
641 
642  typedef typename BlockDiagView::non_const_value_type Scalar;
643  typedef typename BlockDiagView::device_type Device;
644  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
645  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
646 
647  UnmanagedView<BlockDiagView> block_diag_;
648  // TODO Use thread team and scratch memory space. In this first
649  // pass, provide workspace for each block.
650  RWrk rwrk_buf_;
651  UnmanagedView<RWrk> rwrk_;
652  IWrk iwrk_buf_;
653  UnmanagedView<IWrk> iwrk_;
654 
655 public:
656  InvertDiagBlocks (BlockDiagView& block_diag)
657  : block_diag_(block_diag)
658  {
659  const auto blksz = block_diag.dimension_1();
660  Kokkos::resize(rwrk_buf_, block_diag_.dimension_0(), blksz);
661  rwrk_ = rwrk_buf_;
662  Kokkos::resize(iwrk_buf_, block_diag_.dimension_0(), blksz);
663  iwrk_ = iwrk_buf_;
664  }
665 
666  KOKKOS_INLINE_FUNCTION
667  void operator() (const Size i, int& jinfo) const {
668  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
669  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
670  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
671  int info = 0;
672  Tpetra::Experimental::GETF2(D_cur, ipiv, info);
673  if (info) {
674  ++jinfo;
675  return;
676  }
677  Tpetra::Experimental::GETRI(D_cur, ipiv, work, info);
678  if (info) ++jinfo;
679  }
680 
681  // Report the number of blocks with errors.
682  KOKKOS_INLINE_FUNCTION
683  void join (volatile value_type& dst, volatile value_type const& src) const { dst += src; }
684 };
685 }
686 
687 template<class MatrixType>
688 void Relaxation<MatrixType>::computeBlockCrs ()
689 {
690  using Kokkos::ALL;
691  using Teuchos::Array;
692  using Teuchos::ArrayRCP;
693  using Teuchos::ArrayView;
694  using Teuchos::as;
695  using Teuchos::Comm;
696  using Teuchos::RCP;
697  using Teuchos::rcp;
698  using Teuchos::REDUCE_MAX;
699  using Teuchos::REDUCE_MIN;
700  using Teuchos::REDUCE_SUM;
701  using Teuchos::rcp_dynamic_cast;
702  using Teuchos::reduceAll;
703  typedef local_ordinal_type LO;
704  typedef typename node_type::device_type device_type;
705 
706  {
707  // Reset the timer each time, since Relaxation uses the same Time
708  // object to track times for different methods.
709  Teuchos::TimeMonitor timeMon (*Time_, true);
710 
711  TEUCHOS_TEST_FOR_EXCEPTION(
712  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
713  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
714  "with a nonnull input matrix, then call initialize(), before calling "
715  "this method.");
716  const block_crs_matrix_type* blockCrsA =
717  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
718  TEUCHOS_TEST_FOR_EXCEPTION(
719  blockCrsA == NULL, std::logic_error, "Ifpack2::Relaxation::"
720  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
721  "got this far. Please report this bug to the Ifpack2 developers.");
722 
723  const scalar_type one = STS::one ();
724 
725  // Reset state.
726  IsComputed_ = false;
727 
728  const LO lclNumMeshRows =
729  blockCrsA->getCrsGraph ().getNodeNumRows ();
730  const LO blockSize = blockCrsA->getBlockSize ();
731 
732  if (! savedDiagOffsets_) {
733  blockDiag_ = block_diag_type (); // clear it before reallocating
734  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
735  lclNumMeshRows, blockSize, blockSize);
736  if (Teuchos::as<LO>(diagOffsets_.dimension_0 () ) < lclNumMeshRows) {
737  // Clear diagOffsets_ first (by assigning an empty View to it)
738  // to save memory, before reallocating.
739  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
740  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
741  }
742  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
743  TEUCHOS_TEST_FOR_EXCEPTION
744  (static_cast<size_t> (diagOffsets_.dimension_0 ()) !=
745  static_cast<size_t> (blockDiag_.dimension_0 ()),
746  std::logic_error, "diagOffsets_.dimension_0() = " <<
747  diagOffsets_.dimension_0 () << " != blockDiag_.dimension_0() = "
748  << blockDiag_.dimension_0 () <<
749  ". Please report this bug to the Ifpack2 developers.");
750  savedDiagOffsets_ = true;
751  }
752  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
753 
754  // Use an unmanaged View in this method, so that when we take
755  // subviews of it (to get each diagonal block), we don't have to
756  // touch the reference count. Reference count updates are a
757  // thread scalability bottleneck and have a performance cost even
758  // without using threads.
759  unmanaged_block_diag_type blockDiag = blockDiag_;
760 
761  if (DoL1Method_ && IsParallel_) {
762  const scalar_type two = one + one;
763  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
764  Array<LO> indices (maxLength);
765  Array<scalar_type> values (maxLength * blockSize * blockSize);
766  size_t numEntries = 0;
767 
768  for (LO i = 0; i < lclNumMeshRows; ++i) {
769  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
770  blockCrsA->getLocalRowCopy (i, indices (), values (), numEntries);
771 
772  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
773  for (LO subRow = 0; subRow < blockSize; ++subRow) {
774  magnitude_type diagonal_boost = STM::zero ();
775  for (size_t k = 0 ; k < numEntries ; ++k) {
776  if (indices[k] > lclNumMeshRows) {
777  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
778  for (LO subCol = 0; subCol < blockSize; ++subCol) {
779  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
780  }
781  }
782  }
783  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
784  diagBlock(subRow, subRow) += diagonal_boost;
785  }
786  }
787  }
788  }
789 
790  int info = 0;
791  {
792  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
793  Kokkos::parallel_reduce(lclNumMeshRows, idb, info);
794  TEUCHOS_TEST_FOR_EXCEPTION
795  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
796  << " diagonal blocks.");
797  }
798 
799  // In a debug build, do an extra test to make sure that all the
800  // factorizations were computed correctly.
801 #ifdef HAVE_IFPACK2_DEBUG
802  const int numResults = 2;
803  // Use "max = -min" trick to get min and max in a single all-reduce.
804  int lclResults[2], gblResults[2];
805  lclResults[0] = info;
806  lclResults[1] = -info;
807  gblResults[0] = 0;
808  gblResults[1] = 0;
809  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
810  numResults, lclResults, gblResults);
811  TEUCHOS_TEST_FOR_EXCEPTION
812  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
813  "Ifpack2::Relaxation::compute: When processing the input "
814  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
815  "failed on one or more (MPI) processes.");
816 #endif // HAVE_IFPACK2_DEBUG
817 
818  Importer_ = A_->getGraph ()->getImporter ();
819  } // end TimeMonitor scope
820 
821  ComputeTime_ += Time_->totalElapsedTime ();
822  ++NumCompute_;
823  IsComputed_ = true;
824 }
825 
826 template<class MatrixType>
828 {
829  using Teuchos::Array;
830  using Teuchos::ArrayRCP;
831  using Teuchos::ArrayView;
832  using Teuchos::as;
833  using Teuchos::Comm;
834  using Teuchos::RCP;
835  using Teuchos::rcp;
836  using Teuchos::REDUCE_MAX;
837  using Teuchos::REDUCE_MIN;
838  using Teuchos::REDUCE_SUM;
839  using Teuchos::rcp_dynamic_cast;
840  using Teuchos::reduceAll;
841  typedef Tpetra::Vector<scalar_type, local_ordinal_type,
842  global_ordinal_type, node_type> vector_type;
843  typedef typename vector_type::device_type device_type;
844  const scalar_type zero = STS::zero ();
845  const scalar_type one = STS::one ();
846 
847  // We don't count initialization in compute() time.
848  if (! isInitialized ()) {
849  initialize ();
850  }
851 
852  if (hasBlockCrsMatrix_) {
853  computeBlockCrs ();
854  return;
855  }
856 
857 
858 
859  {
860  // Reset the timer each time, since Relaxation uses the same Time
861  // object to track times for different methods.
862  Teuchos::TimeMonitor timeMon (*Time_, true);
863 
864  TEUCHOS_TEST_FOR_EXCEPTION(
865  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::compute: "
866  "The input matrix A is null. Please call setMatrix() with a nonnull "
867  "input matrix, then call initialize(), before calling this method.");
868 
869  // Reset state.
870  IsComputed_ = false;
871 
872  TEUCHOS_TEST_FOR_EXCEPTION(
873  NumSweeps_ < 0, std::logic_error,
874  "Ifpack2::Relaxation::compute: NumSweeps_ = " << NumSweeps_ << " < 0. "
875  "Please report this bug to the Ifpack2 developers.");
876 
877  Diagonal_ = rcp (new vector_type (A_->getRowMap ()));
878 
879  // Extract the diagonal entries. The CrsMatrix static graph
880  // version is faster for subsequent calls to compute(), since it
881  // caches the diagonal offsets.
882  //
883  // isStaticGraph() == true means that the matrix was created with
884  // a const graph. The only requirement is that the structure of
885  // the matrix never changes, so isStaticGraph() == true is a bit
886  // more conservative than we need. However, Tpetra doesn't (as of
887  // 05 Apr 2013) have a way to tell if the graph hasn't changed
888  // since the last time we used it.
889  {
890  // NOTE (mfh 07 Jul 2013): We must cast here to CrsMatrix
891  // instead of MatrixType, because isStaticGraph is a CrsMatrix
892  // method (not inherited from RowMatrix's interface). It's
893  // perfectly valid to do relaxation on a RowMatrix which is not
894  // a CrsMatrix.
895  const crs_matrix_type* crsMat =
896  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
897  if (crsMat == NULL || ! crsMat->isStaticGraph ()) {
898  A_->getLocalDiagCopy (*Diagonal_); // slow path
899  } else {
900  if (! savedDiagOffsets_) { // we haven't precomputed offsets
901  const size_t lclNumRows = A_->getRowMap ()->getNodeNumElements ();
902  if (diagOffsets_.dimension_0 () < lclNumRows) {
903  typedef typename node_type::device_type DT;
904  diagOffsets_ = Kokkos::View<size_t*, DT> (); // clear 1st to save mem
905  diagOffsets_ = Kokkos::View<size_t*, DT> ("offsets", lclNumRows);
906  }
907  crsMat->getCrsGraph ()->getLocalDiagOffsets (diagOffsets_);
908  savedDiagOffsets_ = true;
909  }
910  crsMat->getLocalDiagCopy (*Diagonal_, diagOffsets_);
911 #ifdef HAVE_IFPACK2_DEBUG
912  // Validate the fast-path diagonal against the slow-path diagonal.
913  vector_type D_copy (A_->getRowMap ());
914  A_->getLocalDiagCopy (D_copy);
915  D_copy.update (STS::one (), *Diagonal_, -STS::one ());
916  const magnitude_type err = D_copy.normInf ();
917  // The two diagonals should be exactly the same, so their
918  // difference should be exactly zero.
919  TEUCHOS_TEST_FOR_EXCEPTION(
920  err != STM::zero(), std::logic_error, "Ifpack2::Relaxation::compute: "
921  "\"fast-path\" diagonal computation failed. \\|D1 - D2\\|_inf = "
922  << err << ".");
923 #endif // HAVE_IFPACK2_DEBUG
924  }
925  }
926 
927  // If we're checking the computed inverse diagonal, then keep a
928  // copy of the original diagonal entries for later comparison.
929  RCP<vector_type> origDiag;
930  if (checkDiagEntries_) {
931  origDiag = rcp (new vector_type (A_->getRowMap ()));
932  Tpetra::deep_copy (*origDiag, *Diagonal_);
933  }
934 
935  const size_t numMyRows = A_->getNodeNumRows ();
936 
937  // We're about to read and write diagonal entries on the host.
938  Diagonal_->template sync<Kokkos::HostSpace> ();
939  Diagonal_->template modify<Kokkos::HostSpace> ();
940  auto diag_2d = Diagonal_->template getLocalView<Kokkos::HostSpace> ();
941  auto diag_1d = Kokkos::subview (diag_2d, Kokkos::ALL (), 0);
942  // FIXME (mfh 12 Jan 2016) temp fix for Kokkos::complex vs. std::complex.
943  scalar_type* const diag = reinterpret_cast<scalar_type*> (diag_1d.ptr_on_device ());
944 
945  // Setup for L1 Methods.
946  // Here we add half the value of the off-processor entries in the row,
947  // but only if diagonal isn't sufficiently large.
948  //
949  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
950  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
951  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
952  if (DoL1Method_ && IsParallel_) {
953  const scalar_type two = one + one;
954  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
955  Array<local_ordinal_type> indices (maxLength);
956  Array<scalar_type> values (maxLength);
957  size_t numEntries;
958 
959  for (size_t i = 0; i < numMyRows; ++i) {
960  A_->getLocalRowCopy (i, indices (), values (), numEntries);
961  magnitude_type diagonal_boost = STM::zero ();
962  for (size_t k = 0 ; k < numEntries ; ++k) {
963  if (static_cast<size_t> (indices[k]) > numMyRows) {
964  diagonal_boost += STS::magnitude (values[k] / two);
965  }
966  }
967  if (STS::magnitude (diag[i]) < L1Eta_ * diagonal_boost) {
968  diag[i] += diagonal_boost;
969  }
970  }
971  }
972 
973  //
974  // Invert the diagonal entries of the matrix (not in place).
975  //
976 
977  // Precompute some quantities for "fixing" zero or tiny diagonal
978  // entries. We'll only use them if this "fixing" is enabled.
979  //
980  // SmallTraits covers for the lack of eps() in
981  // Teuchos::ScalarTraits when its template parameter is not a
982  // floating-point type. (Ifpack2 sometimes gets instantiated for
983  // integer Scalar types.)
984  const scalar_type oneOverMinDiagVal = (MinDiagonalValue_ == zero) ?
985  one / SmallTraits<scalar_type>::eps () :
986  one / MinDiagonalValue_;
987  // It's helpful not to have to recompute this magnitude each time.
988  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
989 
990  if (checkDiagEntries_) {
991  //
992  // Check diagonal elements, replace zero elements with the minimum
993  // diagonal value, and store their inverses. Count the number of
994  // "small" and zero diagonal entries while we're at it.
995  //
996  size_t numSmallDiagEntries = 0; // "small" includes zero
997  size_t numZeroDiagEntries = 0; // # zero diagonal entries
998  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
999 
1000  // As we go, keep track of the diagonal entries with the least and
1001  // greatest magnitude. We could use the trick of starting the min
1002  // with +Inf and the max with -Inf, but that doesn't work if
1003  // scalar_type is a built-in integer type. Thus, we have to start
1004  // by reading the first diagonal entry redundantly.
1005  // scalar_type minMagDiagEntry = zero;
1006  // scalar_type maxMagDiagEntry = zero;
1007  magnitude_type minMagDiagEntryMag = STM::zero ();
1008  magnitude_type maxMagDiagEntryMag = STM::zero ();
1009  if (numMyRows > 0) {
1010  const scalar_type d_0 = diag[0];
1011  const magnitude_type d_0_mag = STS::magnitude (d_0);
1012  // minMagDiagEntry = d_0;
1013  // maxMagDiagEntry = d_0;
1014  minMagDiagEntryMag = d_0_mag;
1015  maxMagDiagEntryMag = d_0_mag;
1016  }
1017 
1018  // Go through all the diagonal entries. Compute counts of
1019  // small-magnitude, zero, and negative-real-part entries. Invert
1020  // the diagonal entries that aren't too small. For those that are
1021  // too small in magnitude, replace them with 1/MinDiagonalValue_
1022  // (or 1/eps if MinDiagonalValue_ happens to be zero).
1023  for (size_t i = 0 ; i < numMyRows; ++i) {
1024  const scalar_type d_i = diag[i];
1025  const magnitude_type d_i_mag = STS::magnitude (d_i);
1026  const magnitude_type d_i_real = STS::real (d_i);
1027 
1028  // We can't compare complex numbers, but we can compare their
1029  // real parts.
1030  if (d_i_real < STM::zero ()) {
1031  ++numNegDiagEntries;
1032  }
1033  if (d_i_mag < minMagDiagEntryMag) {
1034  // minMagDiagEntry = d_i;
1035  minMagDiagEntryMag = d_i_mag;
1036  }
1037  if (d_i_mag > maxMagDiagEntryMag) {
1038  // maxMagDiagEntry = d_i;
1039  maxMagDiagEntryMag = d_i_mag;
1040  }
1041 
1042  if (fixTinyDiagEntries_) {
1043  // <= not <, in case minDiagValMag is zero.
1044  if (d_i_mag <= minDiagValMag) {
1045  ++numSmallDiagEntries;
1046  if (d_i_mag == STM::zero ()) {
1047  ++numZeroDiagEntries;
1048  }
1049  diag[i] = oneOverMinDiagVal;
1050  } else {
1051  diag[i] = one / d_i;
1052  }
1053  }
1054  else { // Don't fix zero or tiny diagonal entries.
1055  // <= not <, in case minDiagValMag is zero.
1056  if (d_i_mag <= minDiagValMag) {
1057  ++numSmallDiagEntries;
1058  if (d_i_mag == STM::zero ()) {
1059  ++numZeroDiagEntries;
1060  }
1061  }
1062  diag[i] = one / d_i;
1063  }
1064  }
1065 
1066  // Count floating-point operations of computing the inverse diagonal.
1067  //
1068  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1069  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1070  ComputeFlops_ += 4.0 * numMyRows;
1071  } else {
1072  ComputeFlops_ += numMyRows;
1073  }
1074 
1075  // Collect global data about the diagonal entries. Only do this
1076  // (which involves O(1) all-reduces) if the user asked us to do
1077  // the extra work.
1078  //
1079  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1080  // zero rows. Fixing this requires one of two options:
1081  //
1082  // 1. Use a custom reduction operation that ignores processes
1083  // with zero diagonal entries.
1084  // 2. Split the communicator, compute all-reduces using the
1085  // subcommunicator over processes that have a nonzero number
1086  // of diagonal entries, and then broadcast from one of those
1087  // processes (if there is one) to the processes in the other
1088  // subcommunicator.
1089  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1090 
1091  // Compute global min and max magnitude of entries.
1092  Array<magnitude_type> localVals (2);
1093  localVals[0] = minMagDiagEntryMag;
1094  // (- (min (- x))) is the same as (max x).
1095  localVals[1] = -maxMagDiagEntryMag;
1096  Array<magnitude_type> globalVals (2);
1097  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1098  localVals.getRawPtr (),
1099  globalVals.getRawPtr ());
1100  globalMinMagDiagEntryMag_ = globalVals[0];
1101  globalMaxMagDiagEntryMag_ = -globalVals[1];
1102 
1103  Array<size_t> localCounts (3);
1104  localCounts[0] = numSmallDiagEntries;
1105  localCounts[1] = numZeroDiagEntries;
1106  localCounts[2] = numNegDiagEntries;
1107  Array<size_t> globalCounts (3);
1108  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1109  localCounts.getRawPtr (),
1110  globalCounts.getRawPtr ());
1111  globalNumSmallDiagEntries_ = globalCounts[0];
1112  globalNumZeroDiagEntries_ = globalCounts[1];
1113  globalNumNegDiagEntries_ = globalCounts[2];
1114 
1115  // Forestall "set but not used" compiler warnings.
1116  // (void) minMagDiagEntry;
1117  // (void) maxMagDiagEntry;
1118 
1119  // Compute and save the difference between the computed inverse
1120  // diagonal, and the original diagonal's inverse.
1121  //
1122  // NOTE (mfh 11 Jan 2016) We need to sync Diagonal_ back from
1123  // host to device for the update kernel below, and we don't need
1124  // to modify it or sync it back again here.
1125  vector_type diff (A_->getRowMap ());
1126  diff.reciprocal (*origDiag);
1127  Diagonal_->template sync<device_type> ();
1128  diff.update (-one, *Diagonal_, one);
1129  globalDiagNormDiff_ = diff.norm2 ();
1130  }
1131  else { // don't check diagonal elements
1132  if (fixTinyDiagEntries_) {
1133  // Go through all the diagonal entries. Invert those that
1134  // aren't too small in magnitude. For those that are too
1135  // small in magnitude, replace them with oneOverMinDiagVal.
1136  for (size_t i = 0 ; i < numMyRows; ++i) {
1137  const scalar_type d_i = diag[i];
1138  const magnitude_type d_i_mag = STS::magnitude (d_i);
1139 
1140  // <= not <, in case minDiagValMag is zero.
1141  if (d_i_mag <= minDiagValMag) {
1142  diag[i] = oneOverMinDiagVal;
1143  } else {
1144  diag[i] = one / d_i;
1145  }
1146  }
1147  }
1148  else { // don't fix tiny or zero diagonal entries
1149  for (size_t i = 0 ; i < numMyRows; ++i) {
1150  diag[i] = one / diag[i];
1151  }
1152  }
1153  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1154  ComputeFlops_ += 4.0 * numMyRows;
1155  } else {
1156  ComputeFlops_ += numMyRows;
1157  }
1158  }
1159 
1160  if (IsParallel_ && (PrecType_ == Ifpack2::Details::GS ||
1161  PrecType_ == Ifpack2::Details::SGS)) {
1162  // mfh 21 Mar 2013: The Import object may be null, but in that
1163  // case, the domain and column Maps are the same and we don't
1164  // need to Import anyway.
1165  Importer_ = A_->getGraph ()->getImporter ();
1166  Diagonal_->template sync<device_type> ();
1167  }
1168 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
1169  //KokkosKernels GaussSiedel Initialization.
1170  if (PrecType_ == Ifpack2::Details::MTGS || PrecType_ == Ifpack2::Details::MTSGS) {
1171  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
1172  TEUCHOS_TEST_FOR_EXCEPTION(
1173  crsMat == NULL, std::runtime_error, "Ifpack2::Relaxation::compute: "
1174  "MT methods works for CRSMatrix Only.");
1175 
1176  kokkos_csr_matrix kcsr = crsMat->getLocalMatrix ();
1177 
1178  bool is_symmetric = false;
1179  if (PrecType_ == Ifpack2::Details::MTSGS){
1180  is_symmetric = true;
1181  }
1182  KokkosKernels::Experimental::Graph::gauss_seidel_numeric
1183  <KernelHandle, lno_row_view_t, lno_nonzero_view_t, scalar_nonzero_view_t>
1184  (kh.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(), kcsr.graph.row_map,
1185  kcsr.graph.entries, kcsr.values, is_symmetric);
1186  }
1187 #endif
1188  } // end TimeMonitor scope
1189 
1190  ComputeTime_ += Time_->totalElapsedTime ();
1191  ++NumCompute_;
1192  IsComputed_ = true;
1193 }
1194 
1195 
1196 template<class MatrixType>
1197 void
1199 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1200  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1201 {
1202  using Teuchos::as;
1203  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
1204  global_ordinal_type, node_type> MV;
1205 
1206  if (hasBlockCrsMatrix_) {
1207  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1208  return;
1209  }
1210 
1211  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1212  const double numVectors = as<double> (X.getNumVectors ());
1213  if (ZeroStartingSolution_) {
1214  // For the first Jacobi sweep, if we are allowed to assume that
1215  // the initial guess is zero, then Jacobi is just diagonal
1216  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1217  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1218  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1219 
1220  // Count (global) floating-point operations. Ifpack2 represents
1221  // this as a floating-point number rather than an integer, so that
1222  // overflow (for a very large number of calls, or a very large
1223  // problem) is approximate instead of catastrophic.
1224  double flopUpdate = 0.0;
1225  if (DampingFactor_ == STS::one ()) {
1226  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1227  flopUpdate = numGlobalRows * numVectors;
1228  } else {
1229  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1230  // Two multiplies per entry of Y.
1231  flopUpdate = 2.0 * numGlobalRows * numVectors;
1232  }
1233  ApplyFlops_ += flopUpdate;
1234  if (NumSweeps_ == 1) {
1235  return;
1236  }
1237  }
1238  // If we were allowed to assume that the starting guess was zero,
1239  // then we have already done the first sweep above.
1240  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1241  // We don't need to initialize the result MV, since the sparse
1242  // mat-vec will clobber its contents anyway.
1243  MV A_times_Y (Y.getMap (), as<size_t>(numVectors), false);
1244  for (int j = startSweep; j < NumSweeps_; ++j) {
1245  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1246  applyMat (Y, A_times_Y);
1247  A_times_Y.update (STS::one (), X, -STS::one ());
1248  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, A_times_Y, STS::one ());
1249  }
1250 
1251  // For each column of output, for each pass over the matrix:
1252  //
1253  // - One + and one * for each matrix entry
1254  // - One / and one + for each row of the matrix
1255  // - If the damping factor is not one: one * for each row of the
1256  // matrix. (It's not fair to count this if the damping factor is
1257  // one, since the implementation could skip it. Whether it does
1258  // or not is the implementation's choice.)
1259 
1260  // Floating-point operations due to the damping factor, per matrix
1261  // row, per direction, per columm of output.
1262  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1263  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1264  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1265  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1266 }
1267 
1268 template<class MatrixType>
1269 void
1270 Relaxation<MatrixType>::
1271 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1272  local_ordinal_type,
1273  global_ordinal_type,
1274  node_type>& X,
1275  Tpetra::MultiVector<scalar_type,
1276  local_ordinal_type,
1277  global_ordinal_type,
1278  node_type>& Y) const
1279 {
1280  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1281  local_ordinal_type, global_ordinal_type, node_type> BMV;
1282 
1283  const block_crs_matrix_type* blockMatConst =
1284  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1285  TEUCHOS_TEST_FOR_EXCEPTION
1286  (blockMatConst == NULL, std::logic_error, "This method should never be "
1287  "called if the matrix A_ is not a BlockCrsMatrix. Please report this "
1288  "bug to the Ifpack2 developers.");
1289  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1290  // This is because applyBlock() is nonconst (more accurate), while
1291  // apply() is const (required by Tpetra::Operator interface, but a
1292  // lie, because it possibly allocates temporary buffers).
1293  block_crs_matrix_type* blockMat =
1294  const_cast<block_crs_matrix_type*> (blockMatConst);
1295 
1296  auto meshRowMap = blockMat->getRowMap ();
1297  auto meshColMap = blockMat->getColMap ();
1298  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1299 
1300  BMV X_blk (X, *meshColMap, blockSize);
1301  BMV Y_blk (Y, *meshRowMap, blockSize);
1302 
1303  if (ZeroStartingSolution_) {
1304  // For the first sweep, if we are allowed to assume that the
1305  // initial guess is zero, then block Jacobi is just block diagonal
1306  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1307  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1308  if (NumSweeps_ == 1) {
1309  return;
1310  }
1311  }
1312 
1313  auto pointRowMap = Y.getMap ();
1314  const size_t numVecs = X.getNumVectors ();
1315 
1316  // We don't need to initialize the result MV, since the sparse
1317  // mat-vec will clobber its contents anyway.
1318  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1319 
1320  // If we were allowed to assume that the starting guess was zero,
1321  // then we have already done the first sweep above.
1322  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1323 
1324  for (int j = startSweep; j < NumSweeps_; ++j) {
1325  blockMat->applyBlock (Y_blk, A_times_Y);
1326  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1327  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1328  X_blk, A_times_Y, STS::one ());
1329  }
1330 }
1331 
1332 template<class MatrixType>
1333 void
1334 Relaxation<MatrixType>::
1335 ApplyInverseGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1336  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1337 {
1338  typedef Relaxation<MatrixType> this_type;
1339  // The CrsMatrix version is faster, because it can access the sparse
1340  // matrix data directly, rather than by copying out each row's data
1341  // in turn. Thus, we check whether the RowMatrix is really a
1342  // CrsMatrix.
1343  //
1344  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1345  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1346  // will still be correct if the cast fails, but it will use an
1347  // unoptimized kernel.
1348  const block_crs_matrix_type* blockCrsMat =
1349  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1350  const crs_matrix_type* crsMat =
1351  dynamic_cast<const crs_matrix_type*> (A_.getRawPtr ());
1352  if (blockCrsMat != NULL) {
1353  const_cast<this_type*> (this)->ApplyInverseGS_BlockCrsMatrix (*blockCrsMat, X, Y);
1354  } else if (crsMat != NULL) {
1355  ApplyInverseGS_CrsMatrix (*crsMat, X, Y);
1356  } else {
1357  ApplyInverseGS_RowMatrix (X, Y);
1358  }
1359 }
1360 
1361 
1362 template<class MatrixType>
1363 void
1364 Relaxation<MatrixType>::
1365 ApplyInverseGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1366  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1367 {
1368  using Teuchos::Array;
1369  using Teuchos::ArrayRCP;
1370  using Teuchos::ArrayView;
1371  using Teuchos::as;
1372  using Teuchos::RCP;
1373  using Teuchos::rcp;
1374  using Teuchos::rcpFromRef;
1375  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1376 
1377  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1378  // starting multivector itself. The generic RowMatrix version here
1379  // does not, so we have to zero out Y here.
1380  if (ZeroStartingSolution_) {
1381  Y.putScalar (STS::zero ());
1382  }
1383 
1384  const size_t NumVectors = X.getNumVectors ();
1385  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
1386  Array<local_ordinal_type> Indices (maxLength);
1387  Array<scalar_type> Values (maxLength);
1388 
1389  // Local smoothing stuff
1390  const size_t numMyRows = A_->getNodeNumRows();
1391  const local_ordinal_type* rowInd = 0;
1392  size_t numActive = numMyRows;
1393  bool do_local = ! localSmoothingIndices_.is_null ();
1394  if (do_local) {
1395  rowInd = localSmoothingIndices_.getRawPtr ();
1396  numActive = localSmoothingIndices_.size ();
1397  }
1398 
1399  RCP<MV> Y2;
1400  if (IsParallel_) {
1401  if (Importer_.is_null ()) { // domain and column Maps are the same.
1402  // We will copy Y into Y2 below, so no need to fill with zeros here.
1403  Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
1404  } else {
1405  // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
1406  // zeros here, since we are doing an Import into Y2 below
1407  // anyway. However, it doesn't hurt correctness.
1408  Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
1409  }
1410  }
1411  else {
1412  Y2 = rcpFromRef (Y);
1413  }
1414 
1415  // Diagonal
1416  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView ();
1417  ArrayView<const scalar_type> d_ptr = d_rcp();
1418 
1419  // Constant stride check
1420  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
1421 
1422  if (constant_stride) {
1423  // extract 1D RCPs
1424  size_t x_stride = X.getStride();
1425  size_t y2_stride = Y2->getStride();
1426  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
1427  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
1428  ArrayView<scalar_type> y2_ptr = y2_rcp();
1429  ArrayView<const scalar_type> x_ptr = x_rcp();
1430  Array<scalar_type> dtemp(NumVectors,STS::zero());
1431 
1432  for (int j = 0; j < NumSweeps_; ++j) {
1433  // data exchange is here, once per sweep
1434  if (IsParallel_) {
1435  if (Importer_.is_null ()) {
1436  *Y2 = Y; // just copy, since domain and column Maps are the same
1437  } else {
1438  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1439  }
1440  }
1441 
1442  if (! DoBackwardGS_) { // Forward sweep
1443  for (size_t ii = 0; ii < numActive; ++ii) {
1444  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1445  size_t NumEntries;
1446  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1447  dtemp.assign(NumVectors,STS::zero());
1448 
1449  for (size_t k = 0; k < NumEntries; ++k) {
1450  const local_ordinal_type col = Indices[k];
1451  for (size_t m = 0; m < NumVectors; ++m) {
1452  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1453  }
1454  }
1455 
1456  for (size_t m = 0; m < NumVectors; ++m) {
1457  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1458  }
1459  }
1460  }
1461  else { // Backward sweep
1462  // ptrdiff_t is the same size as size_t, but is signed. Being
1463  // signed is important so that i >= 0 is not trivially true.
1464  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1465  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1466  size_t NumEntries;
1467  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1468  dtemp.assign (NumVectors, STS::zero ());
1469 
1470  for (size_t k = 0; k < NumEntries; ++k) {
1471  const local_ordinal_type col = Indices[k];
1472  for (size_t m = 0; m < NumVectors; ++m) {
1473  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
1474  }
1475  }
1476 
1477  for (size_t m = 0; m < NumVectors; ++m) {
1478  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
1479  }
1480  }
1481  }
1482  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1483  if (IsParallel_) {
1484  Tpetra::deep_copy (Y, *Y2);
1485  }
1486  }
1487  }
1488  else {
1489  // extract 2D RCPS
1490  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
1491  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
1492 
1493  for (int j = 0; j < NumSweeps_; ++j) {
1494  // data exchange is here, once per sweep
1495  if (IsParallel_) {
1496  if (Importer_.is_null ()) {
1497  *Y2 = Y; // just copy, since domain and column Maps are the same
1498  } else {
1499  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1500  }
1501  }
1502 
1503  if (! DoBackwardGS_) { // Forward sweep
1504  for (size_t ii = 0; ii < numActive; ++ii) {
1505  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1506  size_t NumEntries;
1507  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1508 
1509  for (size_t m = 0; m < NumVectors; ++m) {
1510  scalar_type dtemp = STS::zero ();
1511  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1512  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1513 
1514  for (size_t k = 0; k < NumEntries; ++k) {
1515  const local_ordinal_type col = Indices[k];
1516  dtemp += Values[k] * y2_local[col];
1517  }
1518  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1519  }
1520  }
1521  }
1522  else { // Backward sweep
1523  // ptrdiff_t is the same size as size_t, but is signed. Being
1524  // signed is important so that i >= 0 is not trivially true.
1525  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
1526  local_ordinal_type i = as<local_ordinal_type> (do_local ? rowInd[ii] : ii);
1527 
1528  size_t NumEntries;
1529  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
1530 
1531  for (size_t m = 0; m < NumVectors; ++m) {
1532  scalar_type dtemp = STS::zero ();
1533  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
1534  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
1535 
1536  for (size_t k = 0; k < NumEntries; ++k) {
1537  const local_ordinal_type col = Indices[k];
1538  dtemp += Values[k] * y2_local[col];
1539  }
1540  y2_local[i] += DampingFactor_ * d_ptr[i] * (x_local[i] - dtemp);
1541  }
1542  }
1543  }
1544 
1545  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1546  if (IsParallel_) {
1547  Tpetra::deep_copy (Y, *Y2);
1548  }
1549  }
1550  }
1551 
1552 
1553  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1554  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1555  const double numVectors = as<double> (X.getNumVectors ());
1556  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1557  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1558  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1559  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1560 }
1561 
1562 
1563 template<class MatrixType>
1564 void
1565 Relaxation<MatrixType>::
1566 ApplyInverseGS_CrsMatrix (const crs_matrix_type& A,
1567  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1568  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1569 {
1570  using Teuchos::as;
1571  const Tpetra::ESweepDirection direction =
1572  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1573  if (localSmoothingIndices_.is_null ()) {
1574  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
1575  NumSweeps_, ZeroStartingSolution_);
1576  }
1577  else {
1578  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
1579  DampingFactor_, direction,
1580  NumSweeps_, ZeroStartingSolution_);
1581  }
1582 
1583  // For each column of output, for each sweep over the matrix:
1584  //
1585  // - One + and one * for each matrix entry
1586  // - One / and one + for each row of the matrix
1587  // - If the damping factor is not one: one * for each row of the
1588  // matrix. (It's not fair to count this if the damping factor is
1589  // one, since the implementation could skip it. Whether it does
1590  // or not is the implementation's choice.)
1591 
1592  // Floating-point operations due to the damping factor, per matrix
1593  // row, per direction, per columm of output.
1594  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1595  const double numVectors = as<double> (X.getNumVectors ());
1596  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1597  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1598  ApplyFlops_ += NumSweeps_ * numVectors *
1599  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1600 }
1601 
1602 template<class MatrixType>
1603 void
1604 Relaxation<MatrixType>::
1605 ApplyInverseGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1606  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1607  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
1608 {
1609  using Teuchos::RCP;
1610  using Teuchos::rcp;
1611  using Teuchos::rcpFromRef;
1612  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
1613  local_ordinal_type, global_ordinal_type, node_type> BMV;
1614  typedef Tpetra::MultiVector<scalar_type,
1615  local_ordinal_type, global_ordinal_type, node_type> MV;
1616 
1617  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1618  //
1619  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1620  // multiple right-hand sides, unless the input or output MultiVector
1621  // does not have constant stride. We should check for that case
1622  // here, in case it doesn't work in localGaussSeidel (which is
1623  // entirely possible).
1624  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
1625  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
1626 
1627  bool performImport = false;
1628  RCP<BMV> yBlockCol;
1629  if (Importer_.is_null ()) {
1630  yBlockCol = rcpFromRef (yBlock);
1631  }
1632  else {
1633  if (yBlockColumnPointMap_.is_null () ||
1634  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
1635  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
1636  yBlockColumnPointMap_ =
1637  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
1638  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
1639  }
1640  yBlockCol = yBlockColumnPointMap_;
1641  performImport = true;
1642  }
1643 
1644  if (ZeroStartingSolution_) {
1645  yBlockCol->putScalar (STS::zero ());
1646  }
1647  else if (performImport) {
1648  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
1649  }
1650 
1651  const Tpetra::ESweepDirection direction =
1652  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
1653 
1654  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1655  if (performImport && sweep > 0) {
1656  yBlockCol->doImport(yBlock, *Importer_, Tpetra::INSERT);
1657  }
1658  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
1659  DampingFactor_, direction);
1660  if (performImport) {
1661  RCP<const MV> yBlockColPointDomain =
1662  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
1663  Tpetra::deep_copy (Y, *yBlockColPointDomain);
1664  }
1665  }
1666 }
1667 
1668 
1669 
1670 
1671 template<class MatrixType>
1672 void Relaxation<MatrixType>::MTGaussSeidel (
1673  const crs_matrix_type* crsMat,
1674  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1675  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1676  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& D,
1677  const scalar_type& dampingFactor,
1678  const Tpetra::ESweepDirection direction,
1679  const int numSweeps,
1680  const bool zeroInitialGuess) const
1681 {
1682 #ifdef HAVE_IFPACK2_EXPERIMENTAL_KOKKOSKERNELS_FEATURES
1683  using Teuchos::null;
1684  using Teuchos::RCP;
1685  using Teuchos::rcp;
1686  using Teuchos::rcpFromRef;
1687  using Teuchos::rcp_const_cast;
1688 
1689  typedef scalar_type Scalar;
1690  typedef local_ordinal_type LocalOrdinal;
1691  typedef global_ordinal_type GlobalOrdinal;
1692  typedef node_type Node;
1693 
1694  //typedef Scalar ST;
1695  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1696  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1697 
1698  TEUCHOS_TEST_FOR_EXCEPTION(
1699  ! crsMat->isFillComplete (), std::runtime_error,
1700  prefix << "The matrix is not fill complete.");
1701  TEUCHOS_TEST_FOR_EXCEPTION(
1702  numSweeps < 0, std::invalid_argument,
1703  prefix << "The number of sweeps must be nonnegative, "
1704  "but you provided numSweeps = " << numSweeps << " < 0.");
1705 
1706 
1707 
1708  if (numSweeps == 0) {
1709  return;
1710  }
1711 
1712 
1713  typedef typename Tpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> MV;
1714  typedef typename crs_matrix_type::import_type import_type;
1715  typedef typename crs_matrix_type::export_type export_type;
1716  typedef typename crs_matrix_type::map_type map_type;
1717 
1718  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1719  RCP<const export_type> exporter = crsMat->getGraph ()->getExporter ();
1720  TEUCHOS_TEST_FOR_EXCEPTION(
1721  ! exporter.is_null (), std::runtime_error,
1722  "This method's implementation currently requires that the matrix's row, "
1723  "domain, and range Maps be the same. This cannot be the case, because "
1724  "the matrix has a nontrivial Export object.");
1725 
1726  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1727  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1728  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1729  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1730 
1731 
1732 #ifdef HAVE_IFPACK2_DEBUG
1733  {
1734  // The relation 'isSameAs' is transitive. It's also a
1735  // collective, so we don't have to do a "shared" test for
1736  // exception (i.e., a global reduction on the test value).
1737  TEUCHOS_TEST_FOR_EXCEPTION(
1738  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1739  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1740  "multivector X be in the domain Map of the matrix.");
1741  TEUCHOS_TEST_FOR_EXCEPTION(
1742  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1743  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1744  "B be in the range Map of the matrix.");
1745  TEUCHOS_TEST_FOR_EXCEPTION(
1746  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1747  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1748  "D be in the row Map of the matrix.");
1749  TEUCHOS_TEST_FOR_EXCEPTION(
1750  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1751  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1752  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1753  TEUCHOS_TEST_FOR_EXCEPTION(
1754  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1755  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1756  "the range Map of the matrix be the same.");
1757  }
1758 #else
1759  // Forestall any compiler warnings for unused variables.
1760  (void) rangeMap;
1761  (void) rowMap;
1762 #endif // HAVE_IFPACK2_DEBUG
1763 
1764  // Fetch a (possibly cached) temporary column Map multivector
1765  // X_colMap, and a domain Map view X_domainMap of it. Both have
1766  // constant stride by construction. We know that the domain Map
1767  // must include the column Map, because our Gauss-Seidel kernel
1768  // requires that the row Map, domain Map, and range Map are all
1769  // the same, and that each process owns all of its own diagonal
1770  // entries of the matrix.
1771 
1772  RCP<MV> X_colMap;
1773  RCP<MV> X_domainMap;
1774  bool copyBackOutput = false;
1775  if (importer.is_null ()) {
1776  if (X.isConstantStride ()) {
1777  X_colMap = rcpFromRef (X);
1778  X_domainMap = rcpFromRef (X);
1779 
1780  // Column Map and domain Map are the same, so there are no
1781  // remote entries. Thus, if we are not setting the initial
1782  // guess to zero, we don't have to worry about setting remote
1783  // entries to zero, even though we are not doing an Import in
1784  // this case.
1785  if (zeroInitialGuess) {
1786  X_colMap->putScalar (ZERO);
1787  }
1788  // No need to copy back to X at end.
1789  }
1790  else {
1791  // We must copy X into a constant stride multivector.
1792  // Just use the cached column Map multivector for that.
1793  // force=true means fill with zeros, so no need to fill
1794  // remote entries (not in domain Map) with zeros.
1795  //X_colMap = crsMat->getColumnMapMultiVector (X, true);
1796  X_colMap = rcp (new MV (colMap, X.getNumVectors ()));
1797  // X_domainMap is always a domain Map view of the column Map
1798  // multivector. In this case, the domain and column Maps are
1799  // the same, so X_domainMap _is_ X_colMap.
1800  X_domainMap = X_colMap;
1801  if (! zeroInitialGuess) { // Don't copy if zero initial guess
1802  try {
1803  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
1804  } catch (std::exception& e) {
1805  std::ostringstream os;
1806  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1807  "deep_copy(*X_domainMap, X) threw an exception: "
1808  << e.what () << ".";
1809  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1810  }
1811  }
1812  copyBackOutput = true; // Don't forget to copy back at end.
1813  /*
1814  TPETRA_EFFICIENCY_WARNING(
1815  ! X.isConstantStride (),
1816  std::runtime_error,
1817  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
1818  "kernel requires that X and B both have constant stride. Since X "
1819  "does not have constant stride, we had to make a copy. This is a "
1820  "limitation of the current implementation and not your fault, but we "
1821  "still report it as an efficiency warning for your information.");
1822  */
1823  }
1824  }
1825  else { // Column Map and domain Map are _not_ the same.
1826  //X_colMap = crsMat->getColumnMapMultiVector (X);
1827  X_colMap = rcp (new MV (colMap, X.getNumVectors ()));
1828 
1829  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1830 
1831 #ifdef HAVE_IFPACK2_DEBUG
1832  auto X_colMap_host_view = X_colMap->template getLocalView<Kokkos::HostSpace> ();
1833  auto X_domainMap_host_view = X_domainMap->template getLocalView<Kokkos::HostSpace> ();
1834 
1835  if (X_colMap->getLocalLength () != 0 && X_domainMap->getLocalLength ()) {
1836  TEUCHOS_TEST_FOR_EXCEPTION(
1837  X_colMap_host_view.ptr_on_device () != X_domainMap_host_view.ptr_on_device (),
1838  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1839  "Pointer to start of column Map view of X is not equal to pointer to "
1840  "start of (domain Map view of) X. This may mean that "
1841  "Tpetra::MultiVector::offsetViewNonConst is broken. "
1842  "Please report this bug to the Tpetra developers.");
1843  }
1844 
1845  TEUCHOS_TEST_FOR_EXCEPTION(
1846  X_colMap_host_view.dimension_0 () < X_domainMap_host_view.dimension_0 () ||
1847  X_colMap->getLocalLength () < X_domainMap->getLocalLength (),
1848  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1849  "X_colMap has fewer local rows than X_domainMap. "
1850  "X_colMap_host_view.dimension_0() = " << X_colMap_host_view.dimension_0 ()
1851  << ", X_domainMap_host_view.dimension_0() = "
1852  << X_domainMap_host_view.dimension_0 ()
1853  << ", X_colMap->getLocalLength() = " << X_colMap->getLocalLength ()
1854  << ", and X_domainMap->getLocalLength() = "
1855  << X_domainMap->getLocalLength ()
1856  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1857  "is broken. Please report this bug to the Tpetra developers.");
1858 
1859  TEUCHOS_TEST_FOR_EXCEPTION(
1860  X_colMap->getNumVectors () != X_domainMap->getNumVectors (),
1861  std::logic_error, "Ifpack2::Relaxation::MTGaussSeidel: "
1862  "X_colMap has a different number of columns than X_domainMap. "
1863  "X_colMap->getNumVectors() = " << X_colMap->getNumVectors ()
1864  << " != X_domainMap->getNumVectors() = "
1865  << X_domainMap->getNumVectors ()
1866  << ". This means that Tpetra::MultiVector::offsetViewNonConst "
1867  "is broken. Please report this bug to the Tpetra developers.");
1868 #endif // HAVE_IFPACK2_DEBUG
1869 
1870  if (zeroInitialGuess) {
1871  // No need for an Import, since we're filling with zeros.
1872  X_colMap->putScalar (ZERO);
1873  } else {
1874  // We could just copy X into X_domainMap. However, that
1875  // wastes a copy, because the Import also does a copy (plus
1876  // communication). Since the typical use case for
1877  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1878  // don't want to waste that copy. Thus, we do the Import
1879  // here, and skip the first Import in the first sweep.
1880  // Importing directly from X effects the copy into X_domainMap
1881  // (which is a view of X_colMap).
1882  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
1883  }
1884  copyBackOutput = true; // Don't forget to copy back at end.
1885  } // if column and domain Maps are (not) the same
1886 
1887  // The Gauss-Seidel / SOR kernel expects multivectors of constant
1888  // stride. X_colMap is by construction, but B might not be. If
1889  // it's not, we have to make a copy.
1890  RCP<const MV> B_in;
1891  if (B.isConstantStride ()) {
1892  B_in = rcpFromRef (B);
1893  }
1894  else {
1895  // Range Map and row Map are the same in this case, so we can
1896  // use the cached row Map multivector to store a constant stride
1897  // copy of B.
1898  //RCP<MV> B_in_nonconst = crsMat->getRowMapMultiVector (B, true);
1899  RCP<MV> B_in_nonconst = rcp (new MV (rowMap, B.getNumVectors()));
1900  try {
1901  deep_copy (*B_in_nonconst, B);
1902  } catch (std::exception& e) {
1903  std::ostringstream os;
1904  os << "Ifpack2::Relaxation::MTGaussSeidel: "
1905  "deep_copy(*B_in_nonconst, B) threw an exception: "
1906  << e.what () << ".";
1907  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
1908  }
1909  B_in = rcp_const_cast<const MV> (B_in_nonconst);
1910 
1911  /*
1912  TPETRA_EFFICIENCY_WARNING(
1913  ! B.isConstantStride (),
1914  std::runtime_error,
1915  "MTGaussSeidel: The current implementation requires that B have "
1916  "constant stride. Since B does not have constant stride, we had to "
1917  "copy it into a separate constant-stride multivector. This is a "
1918  "limitation of the current implementation and not your fault, but we "
1919  "still report it as an efficiency warning for your information.");
1920  */
1921  }
1922 
1923  kokkos_csr_matrix kcsr = crsMat->getLocalMatrix ();
1924  const size_t NumVectors = X.getNumVectors ();
1925 
1926  bool update_y_vector = true;
1927  //false as it was done up already, and we dont want to zero it in each sweep.
1928  bool zero_x_vector = false;
1929  for (int sweep = 0; sweep < numSweeps; ++sweep) {
1930  if (! importer.is_null () && sweep > 0) {
1931  // We already did the first Import for the zeroth sweep above,
1932  // if it was necessary.
1933  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
1934  }
1935 
1936 
1937  //if (rowIndices.is_null ()) {
1938 
1939 
1940  /*
1941  std::cout << "X_colMap->getNumVectors:" << X_colMap->getNumVectors()
1942  <<" X_colMap->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ()->dimension_0():"
1943  << X_colMap->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ().dimension_0()
1944  <<" X_colMap->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ()->dimension_1():"
1945  << X_colMap->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ().dimension_1()
1946  << " B_in->getNumVectors:" << B_in->getNumVectors()
1947 
1948  <<" B_in->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ()->dimension_0():"
1949  << B_in->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ().dimension_0()
1950  <<" B_in->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ()->dimension_1():"
1951  << B_in->template getLocalView<Kokkos::CudaUVMSpace::memory_space> ().dimension_1()
1952  << std::endl;
1953 
1954 
1955  KokkosKernels::Experimental::Util::print_1Dview(entries);
1956  std::cout << std::endl;
1957  KokkosKernels::Experimental::Util::print_1Dview(vals);
1958  std::cout << std::endl;
1959  KokkosKernels::Experimental::Util::print_1Dview(Kokkos::subview(X_colMap->template getLocalView<Kokkos::CudaUVMSpace::memory_space> (), Kokkos::ALL (), 0));
1960  std::cout << std::endl;
1961  KokkosKernels::Experimental::Util::print_1Dview(Kokkos::subview(B_in->template getLocalView<Kokkos::CudaUVMSpace::memory_space> (), Kokkos::ALL (), 0));
1962  std::cout << std::endl;
1963  */
1964  //typedef typename MV::dual_view_type dual_view_type;
1965  //typedef typename dual_view_type::t_dev device_view_type;
1966  //device_view_type KernelB = Kokkos::subview(B_in->template getLocalView<Kokkos::CudaUVMSpace::memory_space> (), 0, Kokkos::ALL ());
1967  //std::cout << " 5" << std::endl;
1968  //scalar_nonzero_view_t KernelXcolMap = Kokkos::subview(X_colMap->template getLocalView<Kokkos::CudaUVMSpace::memory_space> (), 0, Kokkos::ALL ());
1969  //std::cout << " 6" << std::endl;
1970 
1971  /*
1972 
1973  crsMat->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
1974  dampingFactor,
1975  KokkosClassic::Forward);
1976  // mfh 18 Mar 2013: Aztec's implementation of "symmetric
1977  // Gauss-Seidel" does _not_ do an Import between the forward
1978  // and backward sweeps. This makes symmetric Gauss-Seidel a
1979  // symmetric preconditioner if the matrix A is symmetric. We
1980  // imitate Aztec's behavior here.
1981  crsMat->template localGaussSeidel<ST, ST> (*B_in, *X_colMap, D,
1982  dampingFactor,
1983  KokkosClassic::Backward);
1984  */
1985 
1986 
1987  for (size_t indVec = 0; indVec < NumVectors; ++indVec){
1988  if (direction == Tpetra::Symmetric) {
1989  KokkosKernels::Experimental::Graph::symmetric_gauss_seidel_apply
1990  (kh.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1991  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
1992  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1993  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
1994  zero_x_vector, update_y_vector);
1995  }
1996  else if (direction == Tpetra::Forward) {
1997  KokkosKernels::Experimental::Graph::forward_sweep_gauss_seidel_apply
1998  (kh.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
1999  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2000  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2001  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2002  zero_x_vector, update_y_vector);
2003  }
2004  else if (direction == Tpetra::Backward) {
2005  KokkosKernels::Experimental::Graph::backward_sweep_gauss_seidel_apply
2006  (kh.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2007  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2008  Kokkos::subview(X_colMap->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec ),
2009  Kokkos::subview(B_in->template getLocalView<MyExecSpace> (), Kokkos::ALL (), indVec),
2010  zero_x_vector, update_y_vector);
2011  }
2012  else {
2013  TEUCHOS_TEST_FOR_EXCEPTION(
2014  true, std::invalid_argument,
2015  prefix << "The 'direction' enum does not have any of its valid "
2016  "values: Forward, Backward, or Symmetric.");
2017  }
2018  }
2019 
2020  if (NumVectors > 1){
2021  update_y_vector = true;
2022  }
2023  else {
2024  update_y_vector = false;
2025  }
2026 
2027  /*
2028  std::cout << "after" << std::endl;
2029  KokkosKernels::Experimental::Util::print_1Dview(Kokkos::subview(X_colMap->template getLocalView<Kokkos::CudaUVMSpace::memory_space> (), Kokkos::ALL (), 0));
2030  std::cout << std::endl;
2031  KokkosKernels::Experimental::Util::print_1Dview(Kokkos::subview(B_in->template getLocalView<Kokkos::CudaUVMSpace::memory_space> (), Kokkos::ALL (), 0));
2032  std::cout << std::endl;
2033  */
2034 
2035  /*
2036  else {
2037  crsMat->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
2038  D, rowIndices,
2039  dampingFactor,
2040  KokkosClassic::Forward);
2041  crsMat->template reorderedLocalGaussSeidel<ST, ST> (*B_in, *X_colMap,
2042  D, rowIndices,
2043  dampingFactor,
2044  KokkosClassic::Backward);
2045 
2046  }
2047  */
2048  //}
2049  }
2050 
2051  if (copyBackOutput) {
2052  try {
2053  deep_copy (X , *X_domainMap); // Copy result back into X.
2054  } catch (std::exception& e) {
2055  TEUCHOS_TEST_FOR_EXCEPTION(
2056  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2057  "threw an exception: " << e.what ());
2058  }
2059  }
2060 #endif
2061 }
2062 
2063 template<class MatrixType>
2064 void Relaxation<MatrixType>::ApplyInverseMTSGS_CrsMatrix (
2065  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2066  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const {
2067 
2068  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2069  TEUCHOS_TEST_FOR_EXCEPTION(
2070  crsMat == NULL, std::runtime_error, "Ifpack2::Relaxation::compute: "
2071  "MT methods works for CRSMatrix Only.");
2072 
2073  using Teuchos::as;
2074  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2075 
2076  Teuchos::ArrayView<local_ordinal_type> rowIndices;
2077  if (!localSmoothingIndices_.is_null ()) {
2078  std::cerr << "MT GaussSeidel ignores the given order" << std::endl;
2079  }
2080  this->MTGaussSeidel (
2081  crsMat,
2082  X, B,
2083  *Diagonal_,
2084  DampingFactor_,
2085  direction, NumSweeps_,
2086  ZeroStartingSolution_);
2087 
2088  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2089  const double numVectors = as<double> (X.getNumVectors ());
2090  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2091  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2092  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2093  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2094 }
2095 
2096 
2097 template<class MatrixType>
2098 void Relaxation<MatrixType>::ApplyInverseMTGS_CrsMatrix (
2099  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
2100  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X) const {
2101 
2102  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2103  TEUCHOS_TEST_FOR_EXCEPTION(
2104  crsMat == NULL, std::runtime_error, "Ifpack2::Relaxation::compute: "
2105  "MT methods works for CRSMatrix Only.");
2106 
2107  using Teuchos::as;
2108  const Tpetra::ESweepDirection direction =
2109  DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward;
2110 
2111  Teuchos::ArrayView<local_ordinal_type> rowIndices;
2112  if (!localSmoothingIndices_.is_null ()) {
2113  std::cerr << "MT GaussSeidel ignores the given order" << std::endl;
2114  }
2115  this->MTGaussSeidel (
2116  crsMat,
2117  X, B,
2118  *Diagonal_,
2119  DampingFactor_,
2120  direction, NumSweeps_,
2121  ZeroStartingSolution_);
2122 
2123  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2124  const double numVectors = as<double> (X.getNumVectors ());
2125  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2126  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2127  ApplyFlops_ += NumSweeps_ * numVectors *
2128  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2129 }
2130 
2131 template<class MatrixType>
2132 void
2133 Relaxation<MatrixType>::
2134 ApplyInverseSGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2135  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2136 {
2137  typedef Relaxation<MatrixType> this_type;
2138  // The CrsMatrix version is faster, because it can access the sparse
2139  // matrix data directly, rather than by copying out each row's data
2140  // in turn. Thus, we check whether the RowMatrix is really a
2141  // CrsMatrix.
2142  //
2143  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
2144  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
2145  // will still be correct if the cast fails, but it will use an
2146  // unoptimized kernel.
2147  const block_crs_matrix_type* blockCrsMat = dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr());
2148  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (&(*A_));
2149  if (blockCrsMat != NULL) {
2150  const_cast<this_type*> (this)->ApplyInverseSGS_BlockCrsMatrix(*blockCrsMat, X, Y);
2151  }
2152  else if (crsMat != NULL) {
2153  ApplyInverseSGS_CrsMatrix (*crsMat, X, Y);
2154  } else {
2155  ApplyInverseSGS_RowMatrix (X, Y);
2156  }
2157 }
2158 
2159 
2160 template<class MatrixType>
2161 void
2162 Relaxation<MatrixType>::
2163 ApplyInverseSGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2164  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2165 {
2166  using Teuchos::Array;
2167  using Teuchos::ArrayRCP;
2168  using Teuchos::ArrayView;
2169  using Teuchos::as;
2170  using Teuchos::RCP;
2171  using Teuchos::rcp;
2172  using Teuchos::rcpFromRef;
2173  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
2174 
2175  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
2176  // starting multivector itself. The generic RowMatrix version here
2177  // does not, so we have to zero out Y here.
2178  if (ZeroStartingSolution_) {
2179  Y.putScalar (STS::zero ());
2180  }
2181 
2182  const size_t NumVectors = X.getNumVectors ();
2183  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
2184  Array<local_ordinal_type> Indices (maxLength);
2185  Array<scalar_type> Values (maxLength);
2186 
2187  // Local smoothing stuff
2188  const size_t numMyRows = A_->getNodeNumRows();
2189  const local_ordinal_type * rowInd = 0;
2190  size_t numActive = numMyRows;
2191  bool do_local = localSmoothingIndices_.is_null();
2192  if(do_local) {
2193  rowInd = localSmoothingIndices_.getRawPtr();
2194  numActive = localSmoothingIndices_.size();
2195  }
2196 
2197 
2198  RCP<MV> Y2;
2199  if (IsParallel_) {
2200  if (Importer_.is_null ()) { // domain and column Maps are the same.
2201  // We will copy Y into Y2 below, so no need to fill with zeros here.
2202  Y2 = rcp (new MV (Y.getMap (), NumVectors, false));
2203  } else {
2204  // FIXME (mfh 21 Mar 2013) We probably don't need to fill with
2205  // zeros here, since we are doing an Import into Y2 below
2206  // anyway. However, it doesn't hurt correctness.
2207  Y2 = rcp (new MV (Importer_->getTargetMap (), NumVectors));
2208  }
2209  }
2210  else {
2211  Y2 = rcpFromRef (Y);
2212  }
2213 
2214  // Diagonal
2215  ArrayRCP<const scalar_type> d_rcp = Diagonal_->get1dView();
2216  ArrayView<const scalar_type> d_ptr = d_rcp();
2217 
2218  // Constant stride check
2219  bool constant_stride = X.isConstantStride() && Y2->isConstantStride();
2220 
2221  if(constant_stride) {
2222  // extract 1D RCPs
2223  size_t x_stride = X.getStride();
2224  size_t y2_stride = Y2->getStride();
2225  ArrayRCP<scalar_type> y2_rcp = Y2->get1dViewNonConst();
2226  ArrayRCP<const scalar_type> x_rcp = X.get1dView();
2227  ArrayView<scalar_type> y2_ptr = y2_rcp();
2228  ArrayView<const scalar_type> x_ptr = x_rcp();
2229  Array<scalar_type> dtemp(NumVectors,STS::zero());
2230  for (int iter = 0; iter < NumSweeps_; ++iter) {
2231  // only one data exchange per sweep
2232  if (IsParallel_) {
2233  if (Importer_.is_null ()) {
2234  // just copy, since domain and column Maps are the same
2235  Tpetra::deep_copy (*Y2, Y);
2236  } else {
2237  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2238  }
2239  }
2240 
2241  for (int j = 0; j < NumSweeps_; j++) {
2242  // data exchange is here, once per sweep
2243  if (IsParallel_) {
2244  if (Importer_.is_null ()) {
2245  // just copy, since domain and column Maps are the same
2246  Tpetra::deep_copy (*Y2, Y);
2247  } else {
2248  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2249  }
2250  }
2251 
2252  for (size_t ii = 0; ii < numActive; ++ii) {
2253  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2254  size_t NumEntries;
2255  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2256  dtemp.assign(NumVectors,STS::zero());
2257 
2258  for (size_t k = 0; k < NumEntries; ++k) {
2259  const local_ordinal_type col = Indices[k];
2260  for (size_t m = 0; m < NumVectors; ++m) {
2261  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2262  }
2263  }
2264 
2265  for (size_t m = 0; m < NumVectors; ++m) {
2266  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2267  }
2268  }
2269 
2270  // ptrdiff_t is the same size as size_t, but is signed. Being
2271  // signed is important so that i >= 0 is not trivially true.
2272  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2273  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2274  size_t NumEntries;
2275  A_->getLocalRowCopy (i, Indices (), Values (), NumEntries);
2276  dtemp.assign(NumVectors,STS::zero());
2277 
2278  for (size_t k = 0; k < NumEntries; ++k) {
2279  const local_ordinal_type col = Indices[k];
2280  for (size_t m = 0; m < NumVectors; ++m) {
2281  dtemp[m] += Values[k] * y2_ptr[col + y2_stride*m];
2282  }
2283  }
2284 
2285  for (size_t m = 0; m < NumVectors; ++m) {
2286  y2_ptr[i + y2_stride*m] += DampingFactor_ * d_ptr[i] * (x_ptr[i + x_stride*m] - dtemp[m]);
2287  }
2288  }
2289  }
2290 
2291  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2292  if (IsParallel_) {
2293  Tpetra::deep_copy (Y, *Y2);
2294  }
2295  }
2296  }
2297  else {
2298  // extract 2D RCPs
2299  ArrayRCP<ArrayRCP<scalar_type> > y2_ptr = Y2->get2dViewNonConst ();
2300  ArrayRCP<ArrayRCP<const scalar_type> > x_ptr = X.get2dView ();
2301 
2302  for (int iter = 0; iter < NumSweeps_; ++iter) {
2303  // only one data exchange per sweep
2304  if (IsParallel_) {
2305  if (Importer_.is_null ()) {
2306  // just copy, since domain and column Maps are the same
2307  Tpetra::deep_copy (*Y2, Y);
2308  } else {
2309  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
2310  }
2311  }
2312 
2313  for (size_t ii = 0; ii < numActive; ++ii) {
2314  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2315  const scalar_type diag = d_ptr[i];
2316  size_t NumEntries;
2317  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2318 
2319  for (size_t m = 0; m < NumVectors; ++m) {
2320  scalar_type dtemp = STS::zero ();
2321  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2322  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2323 
2324  for (size_t k = 0; k < NumEntries; ++k) {
2325  const local_ordinal_type col = Indices[k];
2326  dtemp += Values[k] * y2_local[col];
2327  }
2328  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2329  }
2330  }
2331 
2332  // ptrdiff_t is the same size as size_t, but is signed. Being
2333  // signed is important so that i >= 0 is not trivially true.
2334  for (ptrdiff_t ii = as<ptrdiff_t> (numActive) - 1; ii >= 0; --ii) {
2335  local_ordinal_type i = as<local_ordinal_type>(do_local ? rowInd[ii] : ii);
2336  const scalar_type diag = d_ptr[i];
2337  size_t NumEntries;
2338  A_->getLocalRowCopy (as<local_ordinal_type> (i), Indices (), Values (), NumEntries);
2339 
2340  for (size_t m = 0; m < NumVectors; ++m) {
2341  scalar_type dtemp = STS::zero ();
2342  ArrayView<const scalar_type> x_local = (x_ptr())[m]();
2343  ArrayView<scalar_type> y2_local = (y2_ptr())[m]();
2344 
2345  for (size_t k = 0; k < NumEntries; ++k) {
2346  const local_ordinal_type col = Indices[k];
2347  dtemp += Values[k] * y2_local[col];
2348  }
2349  y2_local[i] += DampingFactor_ * (x_local[i] - dtemp) * diag;
2350  }
2351  }
2352 
2353  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
2354  if (IsParallel_) {
2355  Tpetra::deep_copy (Y, *Y2);
2356  }
2357  }
2358  }
2359 
2360  // See flop count discussion in implementation of ApplyInverseSGS_CrsMatrix().
2361  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2362  const double numVectors = as<double> (X.getNumVectors ());
2363  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2364  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2365  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2366  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2367 }
2368 
2369 
2370 template<class MatrixType>
2371 void
2372 Relaxation<MatrixType>::
2373 ApplyInverseSGS_CrsMatrix (const crs_matrix_type& A,
2374  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2375  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
2376 {
2377  using Teuchos::as;
2378  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2379  if (localSmoothingIndices_.is_null ()) {
2380  A.gaussSeidelCopy (Y, X, *Diagonal_, DampingFactor_, direction,
2381  NumSweeps_, ZeroStartingSolution_);
2382  }
2383  else {
2384  A.reorderedGaussSeidelCopy (Y, X, *Diagonal_, localSmoothingIndices_ (),
2385  DampingFactor_, direction,
2386  NumSweeps_, ZeroStartingSolution_);
2387  }
2388 
2389  // For each column of output, for each sweep over the matrix:
2390  //
2391  // - One + and one * for each matrix entry
2392  // - One / and one + for each row of the matrix
2393  // - If the damping factor is not one: one * for each row of the
2394  // matrix. (It's not fair to count this if the damping factor is
2395  // one, since the implementation could skip it. Whether it does
2396  // or not is the implementation's choice.)
2397  //
2398  // Each sweep of symmetric Gauss-Seidel / SOR counts as two sweeps,
2399  // one forward and one backward.
2400 
2401  // Floating-point operations due to the damping factor, per matrix
2402  // row, per direction, per columm of output.
2403  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
2404  const double numVectors = as<double> (X.getNumVectors ());
2405  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2406  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2407  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
2408  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2409 }
2410 
2411 template<class MatrixType>
2412 void
2413 Relaxation<MatrixType>::
2414 ApplyInverseSGS_BlockCrsMatrix (const block_crs_matrix_type& A,
2415  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
2416  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y)
2417 {
2418  using Teuchos::RCP;
2419  using Teuchos::rcp;
2420  using Teuchos::rcpFromRef;
2421  typedef Tpetra::Experimental::BlockMultiVector<scalar_type,
2422  local_ordinal_type, global_ordinal_type, node_type> BMV;
2423  typedef Tpetra::MultiVector<scalar_type,
2424  local_ordinal_type, global_ordinal_type, node_type> MV;
2425 
2426  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
2427  //
2428  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
2429  // multiple right-hand sides, unless the input or output MultiVector
2430  // does not have constant stride. We should check for that case
2431  // here, in case it doesn't work in localGaussSeidel (which is
2432  // entirely possible).
2433  BMV yBlock (Y, * (A.getGraph ()->getDomainMap ()), A.getBlockSize ());
2434  const BMV xBlock (X, * (A.getColMap ()), A.getBlockSize ());
2435 
2436  bool performImport = false;
2437  RCP<BMV> yBlockCol;
2438  if (Importer_.is_null ()) {
2439  yBlockCol = Teuchos::rcpFromRef (yBlock);
2440  }
2441  else {
2442  if (yBlockColumnPointMap_.is_null () ||
2443  yBlockColumnPointMap_->getNumVectors () != yBlock.getNumVectors () ||
2444  yBlockColumnPointMap_->getBlockSize () != yBlock.getBlockSize ()) {
2445  yBlockColumnPointMap_ =
2446  rcp (new BMV (* (A.getColMap ()), A.getBlockSize (),
2447  static_cast<local_ordinal_type> (yBlock.getNumVectors ())));
2448  }
2449  yBlockCol = yBlockColumnPointMap_;
2450  performImport = true;
2451  }
2452 
2453  if (ZeroStartingSolution_) {
2454  yBlockCol->putScalar (STS::zero ());
2455  }
2456  else if (performImport) {
2457  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2458  }
2459 
2460  // FIXME (mfh 12 Sep 2014) Shouldn't this come from the user's parameter?
2461  const Tpetra::ESweepDirection direction = Tpetra::Symmetric;
2462 
2463  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2464  if (performImport && sweep > 0) {
2465  yBlockCol->doImport (yBlock, *Importer_, Tpetra::INSERT);
2466  }
2467  A.localGaussSeidel (xBlock, *yBlockCol, blockDiag_,
2468  DampingFactor_, direction);
2469  if (performImport) {
2470  RCP<const MV> yBlockColPointDomain =
2471  yBlockCol->getMultiVectorView ().offsetView (A.getDomainMap (), 0);
2472  MV yBlockView = yBlock.getMultiVectorView ();
2473  Tpetra::deep_copy (yBlockView, *yBlockColPointDomain);
2474  }
2475  }
2476 }
2477 
2478 
2479 template<class MatrixType>
2481 {
2482  std::ostringstream os;
2483 
2484  // Output is a valid YAML dictionary in flow style. If you don't
2485  // like everything on a single line, you should call describe()
2486  // instead.
2487  os << "\"Ifpack2::Relaxation\": {";
2488 
2489  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2490  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2491 
2492  // It's useful to print this instance's relaxation method (Jacobi,
2493  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2494  // than that, call describe() instead.
2495  os << "Type: ";
2496  if (PrecType_ == Ifpack2::Details::JACOBI) {
2497  os << "Jacobi";
2498  } else if (PrecType_ == Ifpack2::Details::GS) {
2499  os << "Gauss-Seidel";
2500  } else if (PrecType_ == Ifpack2::Details::SGS) {
2501  os << "Symmetric Gauss-Seidel";
2502  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2503  os << "MT Gauss-Seidel";
2504  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2505  os << "MT Symmetric Gauss-Seidel";
2506  }
2507  else {
2508  os << "INVALID";
2509  }
2510 
2511  os << ", " << "sweeps: " << NumSweeps_ << ", "
2512  << "damping factor: " << DampingFactor_ << ", ";
2513  if (DoL1Method_) {
2514  os << "use l1: " << DoL1Method_ << ", "
2515  << "l1 eta: " << L1Eta_ << ", ";
2516  }
2517 
2518  if (A_.is_null ()) {
2519  os << "Matrix: null";
2520  }
2521  else {
2522  os << "Global matrix dimensions: ["
2523  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2524  << ", Global nnz: " << A_->getGlobalNumEntries();
2525  }
2526 
2527  os << "}";
2528  return os.str ();
2529 }
2530 
2531 
2532 template<class MatrixType>
2533 void
2535 describe (Teuchos::FancyOStream &out,
2536  const Teuchos::EVerbosityLevel verbLevel) const
2537 {
2538  using Teuchos::OSTab;
2539  using Teuchos::TypeNameTraits;
2540  using Teuchos::VERB_DEFAULT;
2541  using Teuchos::VERB_NONE;
2542  using Teuchos::VERB_LOW;
2543  using Teuchos::VERB_MEDIUM;
2544  using Teuchos::VERB_HIGH;
2545  using Teuchos::VERB_EXTREME;
2546  using std::endl;
2547 
2548  const Teuchos::EVerbosityLevel vl =
2549  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2550 
2551  const int myRank = this->getComm ()->getRank ();
2552 
2553  // none: print nothing
2554  // low: print O(1) info from Proc 0
2555  // medium:
2556  // high:
2557  // extreme:
2558 
2559  if (vl != VERB_NONE && myRank == 0) {
2560  // Describable interface asks each implementation to start with a tab.
2561  OSTab tab1 (out);
2562  // Output is valid YAML; hence the quotes, to protect the colons.
2563  out << "\"Ifpack2::Relaxation\":" << endl;
2564  OSTab tab2 (out);
2565  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2566  << endl;
2567  if (this->getObjectLabel () != "") {
2568  out << "Label: " << this->getObjectLabel () << endl;
2569  }
2570  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2571  << "Computed: " << (isComputed () ? "true" : "false") << endl
2572  << "Parameters: " << endl;
2573  {
2574  OSTab tab3 (out);
2575  out << "\"relaxation: type\": ";
2576  if (PrecType_ == Ifpack2::Details::JACOBI) {
2577  out << "Jacobi";
2578  } else if (PrecType_ == Ifpack2::Details::GS) {
2579  out << "Gauss-Seidel";
2580  } else if (PrecType_ == Ifpack2::Details::SGS) {
2581  out << "Symmetric Gauss-Seidel";
2582  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2583  out << "MT Gauss-Seidel";
2584  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2585  out << "MT Symmetric Gauss-Seidel";
2586  } else {
2587  out << "INVALID";
2588  }
2589  // We quote these parameter names because they contain colons.
2590  // YAML uses the colon to distinguish key from value.
2591  out << endl
2592  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2593  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2594  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2595  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2596  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2597  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2598  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2599  }
2600  out << "Computed quantities:" << endl;
2601  {
2602  OSTab tab3 (out);
2603  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2604  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2605  }
2606  if (checkDiagEntries_ && isComputed ()) {
2607  out << "Properties of input diagonal entries:" << endl;
2608  {
2609  OSTab tab3 (out);
2610  out << "Magnitude of minimum-magnitude diagonal entry: "
2611  << globalMinMagDiagEntryMag_ << endl
2612  << "Magnitude of maximum-magnitude diagonal entry: "
2613  << globalMaxMagDiagEntryMag_ << endl
2614  << "Number of diagonal entries with small magnitude: "
2615  << globalNumSmallDiagEntries_ << endl
2616  << "Number of zero diagonal entries: "
2617  << globalNumZeroDiagEntries_ << endl
2618  << "Number of diagonal entries with negative real part: "
2619  << globalNumNegDiagEntries_ << endl
2620  << "Abs 2-norm diff between computed and actual inverse "
2621  << "diagonal: " << globalDiagNormDiff_ << endl;
2622  }
2623  }
2624  if (isComputed ()) {
2625  out << "Saved diagonal offsets: "
2626  << (savedDiagOffsets_ ? "true" : "false") << endl;
2627  }
2628  out << "Call counts and total times (in seconds): " << endl;
2629  {
2630  OSTab tab3 (out);
2631  out << "initialize: " << endl;
2632  {
2633  OSTab tab4 (out);
2634  out << "Call count: " << NumInitialize_ << endl;
2635  out << "Total time: " << InitializeTime_ << endl;
2636  }
2637  out << "compute: " << endl;
2638  {
2639  OSTab tab4 (out);
2640  out << "Call count: " << NumCompute_ << endl;
2641  out << "Total time: " << ComputeTime_ << endl;
2642  }
2643  out << "apply: " << endl;
2644  {
2645  OSTab tab4 (out);
2646  out << "Call count: " << NumApply_ << endl;
2647  out << "Total time: " << ApplyTime_ << endl;
2648  }
2649  }
2650  }
2651 }
2652 
2653 } // namespace Ifpack2
2654 
2655 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2656  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2657 
2658 #endif // IFPACK2_RELAXATION_DEF_HPP
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2480
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:377
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:393
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:423
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:175
void compute()
Compute the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:827
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:443
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:387
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:405
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:411
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:155
Ifpack2 implementation details.
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:364
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:355
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:250
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:334
LinearOp zero(const VectorSpace &vs)
File for utility functions.
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:435
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:220
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:247
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:429
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:417
Definition: Ifpack2_Container.hpp:761
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:399
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:244
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:344
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:241
virtual ~Relaxation()
Destructor.
Definition: Ifpack2_Relaxation_def.hpp:215
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:541
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:226
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2535
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:50
void initialize()
Initialize the preconditioner.
Definition: Ifpack2_Relaxation_def.hpp:562
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:253