Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_RILUK_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Tempated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 //@HEADER
41 */
42 
43 #ifndef IFPACK2_CRSRILUK_DEF_HPP
44 #define IFPACK2_CRSRILUK_DEF_HPP
45 
46 #include "Ifpack2_LocalFilter.hpp"
47 #include "Tpetra_CrsMatrix.hpp"
48 #include "Ifpack2_LocalSparseTriangularSolver.hpp"
49 
50 #ifdef IFPACK2_ILUK_EXPERIMENTAL
51 #include <shylubasker_def.hpp>
52 #include <Kokkos_CrsMatrix.hpp>
53 # ifdef IFPACK2_HTS_EXPERIMENTAL
54 # include <shylu_hts.hpp>
55 # endif
56 #endif
57 
58 namespace Ifpack2 {
59 
60 template<class MatrixType>
61 RILUK<MatrixType>::RILUK (const Teuchos::RCP<const row_matrix_type>& Matrix_in)
62  : A_ (Matrix_in),
63  LevelOfFill_ (0),
64  isAllocated_ (false),
65  isInitialized_ (false),
66  isComputed_ (false),
67  isExperimental_ (false),
68  numInitialize_ (0),
69  numCompute_ (0),
70  numApply_ (0),
71  initializeTime_ (0.0),
72  computeTime_ (0.0),
73  applyTime_ (0.0),
74  RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
75  Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
76  Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ())
77 {}
78 
79 
80 template<class MatrixType>
81 RILUK<MatrixType>::RILUK (const Teuchos::RCP<const crs_matrix_type>& Matrix_in)
82  : A_ (Matrix_in),
83  LevelOfFill_ (0),
84  isAllocated_ (false),
85  isInitialized_ (false),
86  isComputed_ (false),
87  isExperimental_ (false),
88  numInitialize_ (0),
89  numCompute_ (0),
90  numApply_ (0),
91  initializeTime_ (0.0),
92  computeTime_ (0.0),
93  applyTime_ (0.0),
94  RelaxValue_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
95  Athresh_ (Teuchos::ScalarTraits<magnitude_type>::zero ()),
96  Rthresh_ (Teuchos::ScalarTraits<magnitude_type>::one ())
97 {}
98 
99 
100 template<class MatrixType>
102 
103 
104 template<class MatrixType>
105 void
106 RILUK<MatrixType>::setMatrix (const Teuchos::RCP<const row_matrix_type>& A)
107 {
108  // It's legal for A to be null; in that case, you may not call
109  // initialize() until calling setMatrix() with a nonnull input.
110  // Regardless, setting the matrix invalidates any previous
111  // factorization.
112  if (A.getRawPtr () != A_.getRawPtr ()) {
113  isAllocated_ = false;
114  isInitialized_ = false;
115  isComputed_ = false;
116  A_local_ = Teuchos::null;
117  Graph_ = Teuchos::null;
118 
119  // The sparse triangular solvers get a triangular factor as their
120  // input matrix. The triangular factors L_ and U_ are getting
121  // reset, so we reset the solvers' matrices to null. Do that
122  // before setting L_ and U_ to null, so that latter step actually
123  // frees the factors.
124  if (! L_solver_.is_null ()) {
125  L_solver_->setMatrix (Teuchos::null);
126  }
127  if (! U_solver_.is_null ()) {
128  U_solver_->setMatrix (Teuchos::null);
129  }
130 
131  L_ = Teuchos::null;
132  U_ = Teuchos::null;
133  D_ = Teuchos::null;
134  A_ = A;
135  }
136 }
137 
138 
139 
140 template<class MatrixType>
143 {
144  TEUCHOS_TEST_FOR_EXCEPTION(
145  L_.is_null (), std::runtime_error, "Ifpack2::RILUK::getL: The L factor "
146  "is null. Please call initialize() (and preferably also compute()) "
147  "before calling this method. If the input matrix has not yet been set, "
148  "you must first call setMatrix() with a nonnull input matrix before you "
149  "may call initialize() or compute().");
150  return *L_;
151 }
152 
153 
154 template<class MatrixType>
155 const Tpetra::Vector<typename RILUK<MatrixType>::scalar_type,
160 {
161  TEUCHOS_TEST_FOR_EXCEPTION(
162  D_.is_null (), std::runtime_error, "Ifpack2::RILUK::getD: The D factor "
163  "(of diagonal entries) is null. Please call initialize() (and "
164  "preferably also compute()) before calling this method. If the input "
165  "matrix has not yet been set, you must first call setMatrix() with a "
166  "nonnull input matrix before you may call initialize() or compute().");
167  return *D_;
168 }
169 
170 
171 template<class MatrixType>
174 {
175  TEUCHOS_TEST_FOR_EXCEPTION(
176  U_.is_null (), std::runtime_error, "Ifpack2::RILUK::getU: The U factor "
177  "is null. Please call initialize() (and preferably also compute()) "
178  "before calling this method. If the input matrix has not yet been set, "
179  "you must first call setMatrix() with a nonnull input matrix before you "
180  "may call initialize() or compute().");
181  return *U_;
182 }
183 
184 
185 template<class MatrixType>
186 Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
188  typename RILUK<MatrixType>::node_type> >
190  TEUCHOS_TEST_FOR_EXCEPTION(
191  A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
192  "The matrix is null. Please call setMatrix() with a nonnull input "
193  "before calling this method.");
194 
195  // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
196  TEUCHOS_TEST_FOR_EXCEPTION(
197  Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getDomainMap: "
198  "The computed graph is null. Please call initialize() before calling "
199  "this method.");
200  return Graph_->getL_Graph ()->getDomainMap ();
201 }
202 
203 
204 template<class MatrixType>
205 Teuchos::RCP<const Tpetra::Map<typename RILUK<MatrixType>::local_ordinal_type,
207  typename RILUK<MatrixType>::node_type> >
209  TEUCHOS_TEST_FOR_EXCEPTION(
210  A_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
211  "The matrix is null. Please call setMatrix() with a nonnull input "
212  "before calling this method.");
213 
214  // FIXME (mfh 25 Jan 2014) Shouldn't this just come from A_?
215  TEUCHOS_TEST_FOR_EXCEPTION(
216  Graph_.is_null (), std::runtime_error, "Ifpack2::RILUK::getRangeMap: "
217  "The computed graph is null. Please call initialize() before calling "
218  "this method.");
219  return Graph_->getL_Graph ()->getRangeMap ();
220 }
221 
222 
223 template<class MatrixType>
225 {
226  using Teuchos::null;
227  using Teuchos::rcp;
228 
229  if (! isAllocated_) {
230  // Deallocate any existing storage. This avoids storing 2x
231  // memory, since RCP op= does not deallocate until after the
232  // assignment.
233  L_ = null;
234  U_ = null;
235  D_ = null;
236 
237  // Allocate Matrix using ILUK graphs
238  L_ = rcp (new crs_matrix_type (Graph_->getL_Graph ()));
239  U_ = rcp (new crs_matrix_type (Graph_->getU_Graph ()));
240  L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
241  U_->setAllToScalar (STS::zero ());
242 
243  // FIXME (mfh 24 Jan 2014) This assumes domain == range Map for L and U.
244  L_->fillComplete ();
245  U_->fillComplete ();
246  D_ = rcp (new vec_type (Graph_->getL_Graph ()->getRowMap ()));
247  }
248  isAllocated_ = true;
249 }
250 
251 
252 template<class MatrixType>
253 void
254 RILUK<MatrixType>::
255 setParameters (const Teuchos::ParameterList& params)
256 {
257  using Teuchos::as;
258  using Teuchos::Exceptions::InvalidParameterName;
259  using Teuchos::Exceptions::InvalidParameterType;
260 
261  // Default values of the various parameters.
262  int fillLevel = 0;
263  magnitude_type absThresh = STM::zero ();
264  magnitude_type relThresh = STM::one ();
265  magnitude_type relaxValue = STM::zero ();
266 
267  //
268  // "fact: iluk level-of-fill" parsing is more complicated, because
269  // we want to allow as many types as make sense. int is the native
270  // type, but we also want to accept magnitude_type (for
271  // compatibility with ILUT) and double (for backwards compatibilty
272  // with ILUT).
273  //
274 
275  bool gotFillLevel = false;
276  try {
277  fillLevel = params.get<int> ("fact: iluk level-of-fill");
278  gotFillLevel = true;
279  }
280  catch (InvalidParameterType&) {
281  // Throwing again in the catch block would just unwind the stack.
282  // Instead, we do nothing here, and check the Boolean outside to
283  // see if we got the value.
284  }
285  catch (InvalidParameterName&) {
286  gotFillLevel = true; // Accept the default value.
287  }
288 
289  if (! gotFillLevel) {
290  try {
291  // Try global_ordinal_type. The cast to int must succeed.
292  fillLevel = as<int> (params.get<global_ordinal_type> ("fact: iluk level-of-fill"));
293  gotFillLevel = true;
294  }
295  catch (InvalidParameterType&) {
296  // Try the next type.
297  }
298  // Don't catch InvalidParameterName here; we've already done that above.
299  }
300 
301  if (! gotFillLevel) {
302  try {
303  // Try magnitude_type, for compatibility with ILUT.
304  // The cast from magnitude_type to int must succeed.
305  fillLevel = as<int> (params.get<magnitude_type> ("fact: iluk level-of-fill"));
306  gotFillLevel = true;
307  }
308  catch (InvalidParameterType&) {
309  // Try the next type.
310  }
311  // Don't catch InvalidParameterName here; we've already done that above.
312  }
313 
314  if (! gotFillLevel) {
315  try {
316  // Try double, for compatibility with ILUT.
317  // The cast from double to int must succeed.
318  fillLevel = as<int> (params.get<double> ("fact: iluk level-of-fill"));
319  gotFillLevel = true;
320  }
321  catch (InvalidParameterType& e) {
322  // We're out of options. The user gave us the parameter, but it
323  // doesn't have the right type. The best thing for us to do in
324  // that case is to throw, telling the user to use the right
325  // type.
326  throw e;
327  }
328  // Don't catch InvalidParameterName here; we've already done that above.
329  }
330 
331  TEUCHOS_TEST_FOR_EXCEPTION(
332  ! gotFillLevel,
333  std::logic_error,
334  "Ifpack2::RILUK::setParameters: We should never get here! "
335  "The method should either have read the \"fact: iluk level-of-fill\" "
336  "parameter by this point, or have thrown an exception. "
337  "Please let the Ifpack2 developers know about this bug.");
338 
339  //
340  // For the other parameters, we prefer magnitude_type, but allow
341  // double for backwards compatibility.
342  //
343 
344  try {
345  absThresh = params.get<magnitude_type> ("fact: absolute threshold");
346  }
347  catch (InvalidParameterType&) {
348  // Try double, for backwards compatibility.
349  // The cast from double to magnitude_type must succeed.
350  absThresh = as<magnitude_type> (params.get<double> ("fact: absolute threshold"));
351  }
352  catch (InvalidParameterName&) {
353  // Accept the default value.
354  }
355 
356  try {
357  relThresh = params.get<magnitude_type> ("fact: relative threshold");
358  }
359  catch (InvalidParameterType&) {
360  // Try double, for backwards compatibility.
361  // The cast from double to magnitude_type must succeed.
362  relThresh = as<magnitude_type> (params.get<double> ("fact: relative threshold"));
363  }
364  catch (InvalidParameterName&) {
365  // Accept the default value.
366  }
367 
368  try {
369  relaxValue = params.get<magnitude_type> ("fact: relax value");
370  }
371  catch (InvalidParameterType&) {
372  // Try double, for backwards compatibility.
373  // The cast from double to magnitude_type must succeed.
374  relaxValue = as<magnitude_type> (params.get<double> ("fact: relax value"));
375  }
376  catch (InvalidParameterName&) {
377  // Accept the default value.
378  }
379 
380  //Experimental
381  //Note: just following RILUK original style.
382  //Do not think catch is the method for this. JDB
383 #ifdef IFPACK2_ILUK_EXPERIMENTAL
384  try {
385  isExperimental_ = params.get<bool> ("fact: iluk experimental basker");
386  }
387  catch (InvalidParameterType&) {
388  //Use default
389  }
390  catch (InvalidParameterName&) {
391  //Use default
392  }
393 
394  try {
395  basker_threads = params.get<int> ("fact: iluk experimental basker threads");
396  }
397  catch (InvalidParameterType&) {
398  basker_threads = 1;
399  }
400  catch (InvalidParameterName&) {
401  basker_threads = 1;
402  }
403 
404  try {
405  basker_user_fill = (scalar_type) params.get<double>("fact: iluk experimental basker user_fill");
406  }
407  catch (InvalidParameterType&) {
408  basker_user_fill = (scalar_type) 0;
409  }
410  catch (InvalidParameterName&) {
411  basker_user_fill = (scalar_type) 0;
412  }
413 
414 # ifdef IFPACK2_HTS_EXPERIMENTAL
415  use_hts_ = false;
416  hts_nthreads_ = basker_threads;
417  {
418  const std::string name("fact: iluk experimental HTS");
419  if (params.isType<bool> (name))
420  use_hts_ = params.get<bool> (name);
421  }
422  {
423  const std::string name("fact: iluk experimental HTS threads");
424  if (params.isType<int> (name))
425  hts_nthreads_ = params.get<int> (name);
426  }
427 # endif
428 #endif
429 
430  // "Commit" the values only after validating all of them. This
431  // ensures that there are no side effects if this routine throws an
432  // exception.
433 
434  LevelOfFill_ = fillLevel;
435 
436  // mfh 28 Nov 2012: The previous code would not assign Athresh_,
437  // Rthresh_, or RelaxValue_, if the read-in value was -1. I don't
438  // know if keeping this behavior is correct, but I'll keep it just
439  // so as not to change previous behavior.
440 
441  if (absThresh != -STM::one ()) {
442  Athresh_ = absThresh;
443  }
444  if (relThresh != -STM::one ()) {
445  Rthresh_ = relThresh;
446  }
447  if (relaxValue != -STM::one ()) {
448  RelaxValue_ = relaxValue;
449  }
450 }
451 
452 
453 template<class MatrixType>
454 Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
456  return Teuchos::rcp_implicit_cast<const row_matrix_type> (A_);
457 }
458 
459 
460 template<class MatrixType>
461 Teuchos::RCP<const typename RILUK<MatrixType>::crs_matrix_type>
463  return Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_, true);
464 }
465 
466 
467 template<class MatrixType>
468 Teuchos::RCP<const typename RILUK<MatrixType>::row_matrix_type>
469 RILUK<MatrixType>::makeLocalFilter (const Teuchos::RCP<const row_matrix_type>& A)
470 {
471  using Teuchos::RCP;
472  using Teuchos::rcp;
473  using Teuchos::rcp_dynamic_cast;
474  using Teuchos::rcp_implicit_cast;
475 
476  // If A_'s communicator only has one process, or if its column and
477  // row Maps are the same, then it is already local, so use it
478  // directly.
479  if (A->getRowMap ()->getComm ()->getSize () == 1 ||
480  A->getRowMap ()->isSameAs (* (A->getColMap ()))) {
481  return A;
482  }
483 
484  // If A_ is already a LocalFilter, then use it directly. This
485  // should be the case if RILUK is being used through
486  // AdditiveSchwarz, for example.
487  RCP<const LocalFilter<row_matrix_type> > A_lf_r =
488  rcp_dynamic_cast<const LocalFilter<row_matrix_type> > (A);
489  if (! A_lf_r.is_null ()) {
490  return rcp_implicit_cast<const row_matrix_type> (A_lf_r);
491  }
492  else {
493  // A_'s communicator has more than one process, its row Map and
494  // its column Map differ, and A_ is not a LocalFilter. Thus, we
495  // have to wrap it in a LocalFilter.
496  return rcp (new LocalFilter<row_matrix_type> (A));
497  }
498 }
499 
500 
501 template<class MatrixType>
503 {
504  using Teuchos::RCP;
505  using Teuchos::rcp;
506  using Teuchos::rcp_const_cast;
507  using Teuchos::rcp_dynamic_cast;
508  using Teuchos::rcp_implicit_cast;
509  typedef Tpetra::CrsGraph<local_ordinal_type,
511  node_type> crs_graph_type;
512  const char prefix[] = "Ifpack2::RILUK::initialize: ";
513 
514  TEUCHOS_TEST_FOR_EXCEPTION
515  (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
516  "call setMatrix() with a nonnull input before calling this method.");
517  TEUCHOS_TEST_FOR_EXCEPTION
518  (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
519  "fill complete. You may not invoke initialize() or compute() with this "
520  "matrix until the matrix is fill complete. If your matrix is a "
521  "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
522  "range Maps, if appropriate) before calling this method.");
523 
524  Teuchos::Time timer ("RILUK::initialize");
525  { // Start timing
526  Teuchos::TimeMonitor timeMon (timer);
527 
528  // Calling initialize() means that the user asserts that the graph
529  // of the sparse matrix may have changed. We must not just reuse
530  // the previous graph in that case.
531  //
532  // Regarding setting isAllocated_ to false: Eventually, we may want
533  // some kind of clever memory reuse strategy, but it's always
534  // correct just to blow everything away and start over.
535  isInitialized_ = false;
536  isAllocated_ = false;
537  isComputed_ = false;
538  Graph_ = Teuchos::null;
539 
540  A_local_ = makeLocalFilter (A_);
541  TEUCHOS_TEST_FOR_EXCEPTION(
542  A_local_.is_null (), std::logic_error, "Ifpack2::RILUK::initialize: "
543  "makeLocalFilter returned null; it failed to compute A_local. "
544  "Please report this bug to the Ifpack2 developers.");
545 
546  // FIXME (mfh 24 Jan 2014, 26 Mar 2014) It would be more efficient
547  // to rewrite RILUK so that it works with any RowMatrix input, not
548  // just CrsMatrix. (That would require rewriting IlukGraph to
549  // handle a Tpetra::RowGraph.) However, to make it work for now,
550  // we just copy the input matrix if it's not a CrsMatrix.
551  {
552  RCP<const crs_matrix_type> A_local_crs =
553  rcp_dynamic_cast<const crs_matrix_type> (A_local_);
554  if (A_local_crs.is_null ()) {
555  // FIXME (mfh 24 Jan 2014) It would be smarter to count up the
556  // number of elements in each row of A_local, so that we can
557  // create A_local_crs_nc using static profile. The code below is
558  // correct but potentially slow.
559  RCP<crs_matrix_type> A_local_crs_nc =
560  rcp (new crs_matrix_type (A_local_->getRowMap (),
561  A_local_->getColMap (), 0));
562  // FIXME (mfh 24 Jan 2014) This Import approach will only work
563  // if A_ has a one-to-one row Map. This is generally the case
564  // with matrices given to Ifpack2.
565  //
566  // Source and destination Maps are the same in this case.
567  // That way, the Import just implements a copy.
568  typedef Tpetra::Import<local_ordinal_type, global_ordinal_type,
569  node_type> import_type;
570  import_type import (A_local_->getRowMap (), A_local_->getRowMap ());
571  A_local_crs_nc->doImport (*A_local_, import, Tpetra::REPLACE);
572  A_local_crs_nc->fillComplete (A_local_->getDomainMap (), A_local_->getRangeMap ());
573  A_local_crs = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
574  }
575  Graph_ = rcp (new Ifpack2::IlukGraph<crs_graph_type> (A_local_crs->getCrsGraph (),
576  LevelOfFill_, 0));
577 #ifdef IFPACK2_ILUK_EXPERIMENTAL
578  if (isExperimental_) A_local_crs_ = A_local_crs;
579 #endif
580  }
581 
582  if(!isExperimental_)
583  {
584  Graph_->initialize ();
585  allocate_L_and_U ();
586  checkOrderingConsistency (*A_local_);
587  // Do not call initAllValues. compute() always calls initAllValues to
588  // fill L and U with possibly new numbers. initialize() is concerned
589  // only with the nonzero pattern.
590  }
591  else
592  {
593 #ifdef IFPACK2_ILUK_EXPERIMENTAL
594  typedef typename node_type::device_type kokkos_device;
595  typedef typename kokkos_device::execution_space kokkos_exe;
596  typedef typename crs_matrix_type::local_matrix_type kokkos_csr_matrix;
597 
598  static_assert( std::is_same< kokkos_exe,
599  Kokkos::OpenMP>::value,
600  "Kokkos node type not supported by experimental thread basker RILUK");
601 
602 
603  myBasker = rcp( new BaskerNS::Basker<local_ordinal_type, scalar_type, Kokkos::OpenMP>);
604  myBasker->Options.no_pivot = true;
605  myBasker->Options.transpose = true; //CRS not CCS
606  myBasker->Options.symmetric = false;
607  myBasker->Options.realloc = true;
608  myBasker->Options.btf = false;
609  myBasker->Options.incomplete = true;
610  myBasker->Options.inc_lvl = LevelOfFill_;
611  myBasker->Options.user_fill = basker_user_fill;
612  myBasker->Options.same_pattern = false;
613  myBasker->SetThreads(basker_threads);
614  basker_reuse = false;
615 
616  kokkos_csr_matrix kcsr = A_local_crs_->getLocalMatrix();
617 
618  local_ordinal_type* r_ptr;
619  r_ptr = new local_ordinal_type[(local_ordinal_type)A_local_crs_->getNodeNumRows()+1];
620 
621  //Still need to convert
622  //Lost on why Trilinos uses such odd types for row_ptrs
623  for(local_ordinal_type bi = 0;
624  bi < A_local_crs_->getNodeNumRows()+1; bi++)
625  {
626  r_ptr[bi] = (local_ordinal_type)kcsr.graph.row_map(bi);
627  }
628 
629  int basker_error =
630  myBasker->Symbolic(
631  ((local_ordinal_type)A_local_crs_->getNodeNumRows()),
632  ((local_ordinal_type)A_local_crs_->getNodeNumCols()),
633  ((local_ordinal_type)A_local_crs_->getNodeNumEntries()),
634  r_ptr,
635  &(kcsr.graph.entries(0)),
636  &(kcsr.values(0)));
637 
638  TEUCHOS_TEST_FOR_EXCEPTION(
639  basker_error != 0, std::logic_error, "Ifpack2::RILUK::initialize:"
640  "Error in basker Symbolic");
641 
642 
643 #else
644  TEUCHOS_TEST_FOR_EXCEPTION(
645  0==0, std::logic_error, "Ifpack2::RILUK::initialize: "
646  "Using experimental ILUK without compiling experimental "
647  "Try again with -DIFPACK2_ILUK_EXPERIMENAL.");
648 #endif
649  }
650 
651  } // Stop timing
652 
653  isInitialized_ = true;
654  ++numInitialize_;
655  initializeTime_ += timer.totalElapsedTime ();
656 }
657 
658 #ifdef IFPACK2_ILUK_EXPERIMENTAL
659 // Basker needs to refresh numbers using A_local_crs_, not just A_local_.
660 template<class MatrixType>
661 void
663 initLocalCrs ()
664 {
665  using Teuchos::RCP;
666  using Teuchos::rcp;
667  using Teuchos::rcp_dynamic_cast;
668  using Teuchos::rcp_implicit_cast;
669  using Teuchos::rcp_const_cast;
670 
671  RCP<crs_matrix_type> A_local_crs_nc = rcp_const_cast<crs_matrix_type> (A_local_crs_);
672  A_local_crs_ = rcp_dynamic_cast<const crs_matrix_type> (A_local_);
673  if (A_local_crs_.is_null ()) {
674  Teuchos::Array<local_ordinal_type> lclColInds;
675  Teuchos::Array<scalar_type> vals;
676  A_local_crs_nc->resumeFill();
677  for (size_t i = 0; i < A_local_crs_nc->getNodeNumRows(); ++i) {
678  size_t numEnt;
679  const auto nc = A_local_->getNumEntriesInLocalRow(i);
680  if (static_cast<size_t>(lclColInds.size()) < nc) {
681  lclColInds.resize(nc);
682  vals.resize(nc);
683  }
684  A_local_->getLocalRowCopy(i, lclColInds(), vals(), numEnt);
685  A_local_crs_nc->replaceLocalValues(i, lclColInds.view(0, numEnt), vals.view(0, numEnt));
686  }
687  A_local_crs_nc->fillComplete();
688  }
689  A_local_crs_ = rcp_const_cast<const crs_matrix_type> (A_local_crs_nc);
690 }
691 #endif
692 
693 template<class MatrixType>
694 void
695 RILUK<MatrixType>::
696 checkOrderingConsistency (const row_matrix_type& A)
697 {
698  // First check that the local row map ordering is the same as the local portion of the column map.
699  // The extraction of the strictly lower/upper parts of A, as well as the factorization,
700  // implicitly assume that this is the case.
701  Teuchos::ArrayView<const global_ordinal_type> rowGIDs = A.getRowMap()->getNodeElementList();
702  Teuchos::ArrayView<const global_ordinal_type> colGIDs = A.getColMap()->getNodeElementList();
703  bool gidsAreConsistentlyOrdered=true;
704  global_ordinal_type indexOfInconsistentGID=0;
705  for (global_ordinal_type i=0; i<rowGIDs.size(); ++i) {
706  if (rowGIDs[i] != colGIDs[i]) {
707  gidsAreConsistentlyOrdered=false;
708  indexOfInconsistentGID=i;
709  break;
710  }
711  }
712  TEUCHOS_TEST_FOR_EXCEPTION(gidsAreConsistentlyOrdered==false, std::runtime_error,
713  "The ordering of the local GIDs in the row and column maps is not the same"
714  << std::endl << "at index " << indexOfInconsistentGID
715  << ". Consistency is required, as all calculations are done with"
716  << std::endl << "local indexing.");
717 }
718 
719 template<class MatrixType>
720 void
721 RILUK<MatrixType>::
722 initAllValues (const row_matrix_type& A)
723 {
724  using Teuchos::ArrayRCP;
725  using Teuchos::Comm;
726  using Teuchos::ptr;
727  using Teuchos::RCP;
728  using Teuchos::REDUCE_SUM;
729  using Teuchos::reduceAll;
730  typedef Tpetra::Map<local_ordinal_type,global_ordinal_type,node_type> map_type;
731 
732  size_t NumIn = 0, NumL = 0, NumU = 0;
733  bool DiagFound = false;
734  size_t NumNonzeroDiags = 0;
735  size_t MaxNumEntries = A.getGlobalMaxNumRowEntries();
736 
737  // Allocate temporary space for extracting the strictly
738  // lower and upper parts of the matrix A.
739  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries);
740  Teuchos::Array<local_ordinal_type> LI(MaxNumEntries);
741  Teuchos::Array<local_ordinal_type> UI(MaxNumEntries);
742  Teuchos::Array<scalar_type> InV(MaxNumEntries);
743  Teuchos::Array<scalar_type> LV(MaxNumEntries);
744  Teuchos::Array<scalar_type> UV(MaxNumEntries);
745 
746  // Check if values should be inserted or replaced
747  const bool ReplaceValues = L_->isStaticGraph () || L_->isLocallyIndexed ();
748 
749  L_->resumeFill ();
750  U_->resumeFill ();
751  if (ReplaceValues) {
752  L_->setAllToScalar (STS::zero ()); // Zero out L and U matrices
753  U_->setAllToScalar (STS::zero ());
754  }
755 
756  D_->putScalar (STS::zero ()); // Set diagonal values to zero
757  ArrayRCP<scalar_type> DV = D_->get1dViewNonConst (); // Get view of diagonal
758 
759  RCP<const map_type> rowMap = L_->getRowMap ();
760 
761  // First we copy the user's matrix into L and U, regardless of fill level.
762  // It is important to note that L and U are populated using local indices.
763  // This means that if the row map GIDs are not monotonically increasing
764  // (i.e., permuted or gappy), then the strictly lower (upper) part of the
765  // matrix is not the one that you would get if you based L (U) on GIDs.
766  // This is ok, as the *order* of the GIDs in the rowmap is a better
767  // expression of the user's intent than the GIDs themselves.
768 
769  Teuchos::ArrayView<const global_ordinal_type> nodeGIDs = rowMap->getNodeElementList();
770  for (size_t myRow=0; myRow<A.getNodeNumRows(); ++myRow) {
771  local_ordinal_type local_row = myRow;
772 
773  //TODO JJH 4April2014 An optimization is to use getLocalRowView. Not all matrices support this,
774  // we'd need to check via the Tpetra::RowMatrix method supportsRowViews().
775  A.getLocalRowCopy (local_row, InI(), InV(), NumIn); // Get Values and Indices
776 
777  // Split into L and U (we don't assume that indices are ordered).
778 
779  NumL = 0;
780  NumU = 0;
781  DiagFound = false;
782 
783  for (size_t j = 0; j < NumIn; ++j) {
784  const local_ordinal_type k = InI[j];
785 
786  if (k == local_row) {
787  DiagFound = true;
788  // Store perturbed diagonal in Tpetra::Vector D_
789  DV[local_row] += Rthresh_ * InV[j] + IFPACK2_SGN(InV[j]) * Athresh_;
790  }
791  else if (k < 0) { // Out of range
792  TEUCHOS_TEST_FOR_EXCEPTION(
793  true, std::runtime_error, "Ifpack2::RILUK::initAllValues: current "
794  "GID k = " << k << " < 0. I'm not sure why this is an error; it is "
795  "probably an artifact of the undocumented assumptions of the "
796  "original implementation (likely copied and pasted from Ifpack). "
797  "Nevertheless, the code I found here insisted on this being an error "
798  "state, so I will throw an exception here.");
799  }
800  else if (k < local_row) {
801  LI[NumL] = k;
802  LV[NumL] = InV[j];
803  NumL++;
804  }
805  else if (Teuchos::as<size_t>(k) <= rowMap->getNodeNumElements()) {
806  UI[NumU] = k;
807  UV[NumU] = InV[j];
808  NumU++;
809  }
810  }
811 
812  // Check in things for this row of L and U
813 
814  if (DiagFound) {
815  ++NumNonzeroDiags;
816  } else {
817  DV[local_row] = Athresh_;
818  }
819 
820  if (NumL) {
821  if (ReplaceValues) {
822  L_->replaceLocalValues(local_row, LI(0, NumL), LV(0,NumL));
823  } else {
824  //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
825  //FIXME in this row in the column locations corresponding to UI.
826  L_->insertLocalValues(local_row, LI(0,NumL), LV(0,NumL));
827  }
828  }
829 
830  if (NumU) {
831  if (ReplaceValues) {
832  U_->replaceLocalValues(local_row, UI(0,NumU), UV(0,NumU));
833  } else {
834  //FIXME JJH 24April2014 Is this correct? I believe this case is when there aren't already values
835  //FIXME in this row in the column locations corresponding to UI.
836  U_->insertLocalValues(local_row, UI(0,NumU), UV(0,NumU));
837  }
838  }
839  }
840 
841  // At this point L and U have the values of A in the structure of L
842  // and U, and diagonal vector D
843 
844  isInitialized_ = true;
845 }
846 
847 
848 template<class MatrixType>
850 {
851  using Teuchos::rcp;
852  const char prefix[] = "Ifpack2::RILUK::compute: ";
853 
854  // initialize() checks this too, but it's easier for users if the
855  // error shows them the name of the method that they actually
856  // called, rather than the name of some internally called method.
857  TEUCHOS_TEST_FOR_EXCEPTION
858  (A_.is_null (), std::runtime_error, prefix << "The matrix is null. Please "
859  "call setMatrix() with a nonnull input before calling this method.");
860  TEUCHOS_TEST_FOR_EXCEPTION
861  (! A_->isFillComplete (), std::runtime_error, prefix << "The matrix is not "
862  "fill complete. You may not invoke initialize() or compute() with this "
863  "matrix until the matrix is fill complete. If your matrix is a "
864  "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and "
865  "range Maps, if appropriate) before calling this method.");
866 
867  if (! isInitialized ()) {
868  initialize (); // Don't count this in the compute() time
869  }
870 
871  Teuchos::Time timer ("RILUK::compute");
872  if ( ! isExperimental_) {
873  // Start timing
874  Teuchos::TimeMonitor timeMon (timer);
875 
876  isComputed_ = false;
877 
878  // Fill L and U with numbers. This supports nonzero pattern reuse by calling
879  // initialize() once and then compute() multiple times.
880  initAllValues (*A_local_);
881 
882  // MinMachNum should be officially defined, for now pick something a little
883  // bigger than IEEE underflow value
884 
885  const scalar_type MinDiagonalValue = STS::rmin ();
886  const scalar_type MaxDiagonalValue = STS::one () / MinDiagonalValue;
887 
888  size_t NumIn, NumL, NumU;
889 
890  // Get Maximum Row length
891  const size_t MaxNumEntries =
892  L_->getNodeMaxNumRowEntries () + U_->getNodeMaxNumRowEntries () + 1;
893 
894  Teuchos::Array<local_ordinal_type> InI(MaxNumEntries); // Allocate temp space
895  Teuchos::Array<scalar_type> InV(MaxNumEntries);
896  size_t num_cols = U_->getColMap()->getNodeNumElements();
897  Teuchos::Array<int> colflag(num_cols);
898 
899  Teuchos::ArrayRCP<scalar_type> DV = D_->get1dViewNonConst(); // Get view of diagonal
900 
901  // Now start the factorization.
902 
903  // Need some integer workspace and pointers
904  size_t NumUU;
905  Teuchos::ArrayView<const local_ordinal_type> UUI;
906  Teuchos::ArrayView<const scalar_type> UUV;
907  for (size_t j = 0; j < num_cols; ++j) {
908  colflag[j] = -1;
909  }
910 
911  for (size_t i = 0; i < L_->getNodeNumRows (); ++i) {
912  local_ordinal_type local_row = i;
913 
914  // Fill InV, InI with current row of L, D and U combined
915 
916  NumIn = MaxNumEntries;
917  L_->getLocalRowCopy (local_row, InI (), InV (), NumL);
918 
919  InV[NumL] = DV[i]; // Put in diagonal
920  InI[NumL] = local_row;
921 
922  U_->getLocalRowCopy (local_row, InI (NumL+1, MaxNumEntries-NumL-1),
923  InV (NumL+1, MaxNumEntries-NumL-1), NumU);
924  NumIn = NumL+NumU+1;
925 
926  // Set column flags
927  for (size_t j = 0; j < NumIn; ++j) {
928  colflag[InI[j]] = j;
929  }
930 
931  scalar_type diagmod = STS::zero (); // Off-diagonal accumulator
932 
933  for (size_t jj = 0; jj < NumL; ++jj) {
934  local_ordinal_type j = InI[jj];
935  scalar_type multiplier = InV[jj]; // current_mults++;
936 
937  InV[jj] *= DV[j];
938 
939  U_->getLocalRowView(j, UUI, UUV); // View of row above
940  NumUU = UUI.size();
941 
942  if (RelaxValue_ == STM::zero ()) {
943  for (size_t k = 0; k < NumUU; ++k) {
944  const int kk = colflag[UUI[k]];
945  // FIXME (mfh 23 Dec 2013) Wait a second, we just set
946  // colflag above using size_t (which is generally unsigned),
947  // but now we're querying it using int (which is signed).
948  if (kk > -1) {
949  InV[kk] -= multiplier * UUV[k];
950  }
951  }
952  }
953  else {
954  for (size_t k = 0; k < NumUU; ++k) {
955  // FIXME (mfh 23 Dec 2013) Wait a second, we just set
956  // colflag above using size_t (which is generally unsigned),
957  // but now we're querying it using int (which is signed).
958  const int kk = colflag[UUI[k]];
959  if (kk > -1) {
960  InV[kk] -= multiplier*UUV[k];
961  }
962  else {
963  diagmod -= multiplier*UUV[k];
964  }
965  }
966  }
967  }
968  if (NumL) {
969  // Replace current row of L
970  L_->replaceLocalValues (local_row, InI (0, NumL), InV (0, NumL));
971  }
972 
973  DV[i] = InV[NumL]; // Extract Diagonal value
974 
975  if (RelaxValue_ != STM::zero ()) {
976  DV[i] += RelaxValue_*diagmod; // Add off diagonal modifications
977  }
978 
979  if (STS::magnitude (DV[i]) > STS::magnitude (MaxDiagonalValue)) {
980  if (STS::real (DV[i]) < STM::zero ()) {
981  DV[i] = -MinDiagonalValue;
982  }
983  else {
984  DV[i] = MinDiagonalValue;
985  }
986  }
987  else {
988  DV[i] = STS::one () / DV[i]; // Invert diagonal value
989  }
990 
991  for (size_t j = 0; j < NumU; ++j) {
992  InV[NumL+1+j] *= DV[i]; // Scale U by inverse of diagonal
993  }
994 
995  if (NumU) {
996  // Replace current row of L and U
997  U_->replaceLocalValues (local_row, InI (NumL+1, NumU), InV (NumL+1, NumU));
998  }
999 
1000  // Reset column flags
1001  for (size_t j = 0; j < NumIn; ++j) {
1002  colflag[InI[j]] = -1;
1003  }
1004  }
1005 
1006  // The domain of L and the range of U are exactly their own row maps
1007  // (there is no communication). The domain of U and the range of L
1008  // must be the same as those of the original matrix, However if the
1009  // original matrix is a VbrMatrix, these two latter maps are
1010  // translation from a block map to a point map.
1011  // FIXME (mfh 23 Dec 2013) Do we know that the column Map of L_ is
1012  // always one-to-one?
1013  L_->fillComplete (L_->getColMap (), A_local_->getRangeMap ());
1014  U_->fillComplete (A_local_->getDomainMap (), U_->getRowMap ());
1015 
1016  // Validate that the L and U factors are actually lower and upper triangular
1017 
1018  //18-Aug-2016 The following two Teuchos tests-for-exceptions were changed by Massimiliano Lupo Pasini
1019  TEUCHOS_TEST_FOR_EXCEPTION(
1020  0 < L_->getNodeNumRows() &&
1021  ! L_->isLowerTriangular (), std::runtime_error,
1022  "Ifpack2::RILUK::compute: L isn't lower triangular.");
1023  TEUCHOS_TEST_FOR_EXCEPTION(
1024  0 < U_->getNodeNumRows() &&
1025  ! U_->isUpperTriangular (), std::runtime_error,
1026  "Ifpack2::RILUK::compute: U isn't lower triangular.");
1027 
1028  L_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> (L_));
1029  L_solver_->initialize ();
1030  L_solver_->compute ();
1031 
1032  U_solver_ = Teuchos::rcp (new LocalSparseTriangularSolver<row_matrix_type> (U_));
1033  U_solver_->initialize ();
1034  U_solver_->compute ();
1035  }
1036  else {
1037 #ifdef IFPACK2_ILUK_EXPERIMENTAL
1038  Teuchos::Time timerbasker ("RILUK::basercompute");
1039  {
1040  // Start timing
1041  Teuchos::TimeMonitor timeMon (timer);
1042  //
1043  if(basker_reuse == false)
1044  {
1045  //We don't have to reimport Matrix because same
1046  {
1047  //Teuchos::TimeMonitor timeMon(timerbasker);
1048  myBasker->Factor_Inc(0);
1049  basker_reuse = true;
1050  }
1051  }
1052  else
1053  {
1054  //Do we need to import Matrix with file again?
1055  myBasker->Options.same_pattern = true;
1056 
1057  typedef typename crs_matrix_type::local_matrix_type kokkos_csr_matrix;
1058  kokkos_csr_matrix kcsr = A_local_crs_->getLocalMatrix();
1059 
1060 
1061  local_ordinal_type* r_ptr;
1062  r_ptr = new local_ordinal_type[(local_ordinal_type)A_local_crs_->getNodeNumRows()+1];
1063 
1064  //Still need to convert
1065  for(local_ordinal_type bi = 0; bi < A_local_crs_->getNodeNumRows()+1; bi++)
1066  {
1067  r_ptr[bi] = (local_ordinal_type)kcsr.graph.row_map(bi);
1068  }
1069 
1070  int basker_error =
1071  myBasker->Factor(
1072  ((local_ordinal_type)A_local_crs_->getNodeNumRows()),
1073  ((local_ordinal_type)A_local_crs_->getNodeNumCols()),
1074  ((local_ordinal_type)A_local_crs_->getNodeNumEntries()),
1075  r_ptr,
1076  &(kcsr.graph.entries(0)),
1077  &(kcsr.values(0)));
1078 
1079  //myBasker->DEBUG_PRINT();
1080 
1081  TEUCHOS_TEST_FOR_EXCEPTION(
1082  basker_error != 0, std::logic_error, "Ifpack2::RILUK::initialize:"
1083  "Error in basker compute");
1084 
1085 
1086  }
1087  }
1088 # ifdef IFPACK2_HTS_EXPERIMENTAL
1089  //TODO Reuse symbolic information.
1090  if (use_hts_) {
1091  Teuchos::Time basker_getL("basker_getL");
1092  Teuchos::Time hts_buildL ("hts_buildL");
1093  Teuchos::Time basker_getU("basker_getU");
1094  Teuchos::Time hts_buildU ("hts_buildU");
1095 
1096  hts_mgr_ = Teuchos::rcp(new HTSManager());
1097  local_ordinal_type* p, * q;
1098  local_ordinal_type nnz;
1099 
1100  myBasker->GetPerm(&p, &q);
1101  {
1102  HTSData d;
1103  myBasker->GetL(d.n, nnz, &d.ir, &d.jc, &d.v);
1104  d.sort();
1105  typename HTST::CrsMatrix* T = HTST::make_CrsMatrix(d.n, d.ir, d.jc, d.v, true);
1106  hts_mgr_->Limpl = HTST::preprocess(T, 1, hts_nthreads_, true, p, 0);
1107  HTST::delete_CrsMatrix(T);
1108  delete[] p;
1109  }
1110  {
1111  HTSData d;
1112  myBasker->GetU(d.n, nnz, &d.ir, &d.jc, &d.v);
1113  d.sort();
1114  typename HTST::CrsMatrix* T = HTST::make_CrsMatrix(d.n, d.ir, d.jc, d.v, true);
1115  hts_mgr_->Uimpl = HTST::preprocess(T, 1, hts_nthreads_, true, 0, q);
1116  HTST::delete_CrsMatrix(T);
1117  delete[] q;
1118  }
1119  }
1120 # endif
1121 
1122 #else
1123  TEUCHOS_TEST_FOR_EXCEPTION(
1124  false, std::runtime_error,
1125  "Ifpack2::RILUK::compute: experimental not enabled");
1126 #endif
1127  }//end -- if experimental
1128 
1129  isComputed_ = true;
1130  ++numCompute_;
1131  computeTime_ += timer.totalElapsedTime ();
1132 }
1133 
1134 
1135 template<class MatrixType>
1136 void
1138 apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1139  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1140  Teuchos::ETransp mode,
1141  scalar_type alpha,
1142  scalar_type beta) const
1143 {
1144  using Teuchos::RCP;
1145  using Teuchos::rcpFromRef;
1146 
1147  TEUCHOS_TEST_FOR_EXCEPTION(
1148  A_.is_null (), std::runtime_error, "Ifpack2::RILUK::apply: The matrix is "
1149  "null. Please call setMatrix() with a nonnull input, then initialize() "
1150  "and compute(), before calling this method.");
1151  TEUCHOS_TEST_FOR_EXCEPTION(
1152  ! isComputed (), std::runtime_error,
1153  "Ifpack2::RILUK::apply: If you have not yet called compute(), "
1154  "you must call compute() before calling this method.");
1155  TEUCHOS_TEST_FOR_EXCEPTION(
1156  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
1157  "Ifpack2::RILUK::apply: X and Y do not have the same number of columns. "
1158  "X.getNumVectors() = " << X.getNumVectors ()
1159  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
1160  TEUCHOS_TEST_FOR_EXCEPTION(
1161  STS::isComplex && mode == Teuchos::CONJ_TRANS, std::logic_error,
1162  "Ifpack2::RILUK::apply: mode = Teuchos::CONJ_TRANS is not implemented for "
1163  "complex Scalar type. Please talk to the Ifpack2 developers to get this "
1164  "fixed. There is a FIXME in this file about this very issue.");
1165 #ifdef HAVE_IFPACK2_DEBUG
1166  {
1167  const magnitude_type D_nrm1 = D_->norm1 ();
1168  TEUCHOS_TEST_FOR_EXCEPTION( STM::isnaninf (D_nrm1), std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the stored diagonal is NaN or Inf.");
1169  Teuchos::Array<magnitude_type> norms (X.getNumVectors ());
1170  X.norm1 (norms ());
1171  bool good = true;
1172  for (size_t j = 0; j < X.getNumVectors (); ++j) {
1173  if (STM::isnaninf (norms[j])) {
1174  good = false;
1175  break;
1176  }
1177  }
1178  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the input X is NaN or Inf.");
1179  }
1180 #endif // HAVE_IFPACK2_DEBUG
1181 
1182  const scalar_type one = STS::one ();
1183  const scalar_type zero = STS::zero ();
1184 
1185  Teuchos::Time timer ("RILUK::apply");
1186  { // Start timing
1187  if(!isExperimental_){
1188  Teuchos::TimeMonitor timeMon (timer);
1189  if (alpha == one && beta == zero) {
1190  if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y.
1191  // Start by solving L C = X for C. C must have the same Map
1192  // as D. We have to use a temp multivector, since our
1193  // implementation of triangular solves does not allow its
1194  // input and output to alias one another.
1195  //
1196  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
1197  MV C (D_->getMap (), X.getNumVectors ());
1198  L_solver_->apply (X, C, mode);
1199 
1200  // Solve D Y_tmp = C. Y_tmp must have the same Map as C, and
1201  // the operation lets us do this in place in C, so we can
1202  // write "solve D C = C for C."
1203  C.elementWiseMultiply (one, *D_, C, zero);
1204 
1205  U_solver_->apply (C, Y, mode); // Solve U Y = C.
1206  }
1207  else { // Solve U^P (D^P (U^P Y)) = X for Y (where P is * or T).
1208 
1209  // Start by solving U^P C = X for C. C must have the same Map
1210  // as D. We have to use a temp multivector, since our
1211  // implementation of triangular solves does not allow its
1212  // input and output to alias one another.
1213  //
1214  // FIXME (mfh 24 Jan 2014) Cache this temp multivector.
1215  MV C (D_->getMap (), X.getNumVectors ());
1216  U_solver_->apply (X, C, mode);
1217 
1218  // Solve D^P Y_tmp = C. Y_tmp must have the same Map as C,
1219  // and the operation lets us do this in place in C, so we can
1220  // write "solve D^P C = C for C."
1221  //
1222  // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we
1223  // need to do an elementwise multiply with the conjugate of
1224  // D_, not just with D_ itself.
1225  C.elementWiseMultiply (one, *D_, C, zero);
1226 
1227  L_solver_->apply (C, Y, mode); // Solve L^P Y = C.
1228  }
1229  }
1230  else { // alpha != 1 or beta != 0
1231  if (alpha == zero) {
1232  if (beta == zero) {
1233  Y.putScalar (zero);
1234  } else {
1235  Y.scale (beta);
1236  }
1237  } else { // alpha != zero
1238  MV Y_tmp (Y.getMap (), Y.getNumVectors ());
1239  apply (X, Y_tmp, mode);
1240  Y.update (alpha, Y_tmp, beta);
1241  }
1242  }
1243  }
1244  else
1245  {
1246 #ifdef IFPACK2_ILUK_EXPERIMENTAL
1247  Teuchos::ArrayRCP<const scalar_type> XX;
1248  Teuchos::ArrayRCP<const scalar_type> YY;
1249  XX = X.get1dView();
1250  YY = Y.get1dView();
1251 
1252 # ifdef IFPACK2_HTS_EXPERIMENTAL
1253  if (use_hts_) {
1254  HTST::solve_omp(hts_mgr_->Limpl, const_cast<scalar_type*>(XX.getRawPtr()),
1255  X.getNumVectors(), const_cast<scalar_type*>(YY.getRawPtr()));
1256  HTST::solve_omp(hts_mgr_->Uimpl, const_cast<scalar_type*>(YY.getRawPtr()),
1257  X.getNumVectors());
1258  } else
1259 # endif
1260  {
1261  myBasker->Solve(((local_ordinal_type)X.getNumVectors()),
1262  (const_cast<scalar_type *>(XX.getRawPtr())),
1263  (const_cast<scalar_type *>(YY.getRawPtr())));
1264  }
1265 #else
1266  TEUCHOS_TEST_FOR_EXCEPTION(
1267  0==1, std::runtime_error,
1268  "Ifpack2::RILUK::apply: Experimental no enabled");
1269 #endif
1270  }//end isExperimental
1271  }//end timing
1272 
1273 #ifdef HAVE_IFPACK2_DEBUG
1274  {
1275  Teuchos::Array<magnitude_type> norms (Y.getNumVectors ());
1276  Y.norm1 (norms ());
1277  bool good = true;
1278  for (size_t j = 0; j < Y.getNumVectors (); ++j) {
1279  if (STM::isnaninf (norms[j])) {
1280  good = false;
1281  break;
1282  }
1283  }
1284  TEUCHOS_TEST_FOR_EXCEPTION( ! good, std::runtime_error, "Ifpack2::RILUK::apply: The 1-norm of the output Y is NaN or Inf.");
1285  }
1286 #endif // HAVE_IFPACK2_DEBUG
1287 
1288  ++numApply_;
1289  applyTime_ = timer.totalElapsedTime ();
1290 }
1291 
1292 
1293 template<class MatrixType>
1295 multiply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1296  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1297  const Teuchos::ETransp mode) const
1298 {
1299  const scalar_type zero = STS::zero ();
1300  const scalar_type one = STS::one ();
1301 
1302  if (mode != Teuchos::NO_TRANS) {
1303  U_->apply (X, Y, mode); //
1304  Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1305 
1306  // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we need
1307  // to do an elementwise multiply with the conjugate of D_, not
1308  // just with D_ itself.
1309  Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1310 
1311  MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y
1312  L_->apply (Y_tmp, Y, mode);
1313  Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1314  }
1315  else {
1316  L_->apply (X, Y, mode);
1317  Y.update (one, X, one); // Y = Y + X (account for implicit unit diagonal)
1318  Y.elementWiseMultiply (one, *D_, Y, zero); // y = D*y (D_ has inverse of diagonal)
1319  MV Y_tmp (Y, Teuchos::Copy); // Need a temp copy of Y1
1320  U_->apply (Y_tmp, Y, mode);
1321  Y.update (one, Y_tmp, one); // (account for implicit unit diagonal)
1322  }
1323 }
1324 
1325 
1326 template<class MatrixType>
1328 {
1329  std::ostringstream os;
1330 
1331  // Output is a valid YAML dictionary in flow style. If you don't
1332  // like everything on a single line, you should call describe()
1333  // instead.
1334  os << "\"Ifpack2::RILUK\": {";
1335  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
1336  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
1337 
1338  os << "Level-of-fill: " << getLevelOfFill() << ", ";
1339 
1340  if (A_.is_null ()) {
1341  os << "Matrix: null";
1342  }
1343  else {
1344  os << "Global matrix dimensions: ["
1345  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
1346  << ", Global nnz: " << A_->getGlobalNumEntries();
1347  }
1348 
1349  os << "}";
1350  return os.str ();
1351 }
1352 
1353 #if defined IFPACK2_ILUK_EXPERIMENTAL && defined IFPACK2_HTS_EXPERIMENTAL
1354 template<class MatrixType>
1356  : Limpl(0), Uimpl(0)
1357 {}
1358 
1359 template<class MatrixType>
1360 RILUK<MatrixType>::HTSManager::~HTSManager ()
1361 {
1362  HTST::delete_Impl(Limpl);
1363  HTST::delete_Impl(Uimpl);
1364 }
1365 
1366 template<class MatrixType>
1367 RILUK<MatrixType>::HTSData::HTSData ()
1368  : jc(0), ir(0), v(0)
1369 {}
1370 
1371 template<class MatrixType>
1372 RILUK<MatrixType>::HTSData::~HTSData ()
1373 {
1374  free_CrsMatrix_data();
1375 }
1376 
1377 template<class MatrixType>
1378 void RILUK<MatrixType>::HTSData::free_CrsMatrix_data ()
1379 {
1380  if (jc) delete[] jc;
1381  if (ir) delete[] ir;
1382  if (v) delete[] v;
1383  jc = ir = 0;
1384  v = 0;
1385 }
1386 
1387 template<class MatrixType>
1388 void RILUK<MatrixType>::HTSData::sort ()
1389 {
1390  if ( ! ir || ! jc) return;
1391  std::vector<Entry> es;
1392  for (local_ordinal_type i = 0; i < n; ++i) {
1393  es.resize(ir[i+1] - ir[i]);
1394  const local_ordinal_type os = ir[i];
1395  for (local_ordinal_type j = 0; j < ir[i+1] - os; ++j) {
1396  es[j].j = jc[os+j];
1397  es[j].v = v[os+j];
1398  }
1399  std::sort(es.begin(), es.end());
1400  for (local_ordinal_type j = 0; j < ir[i+1] - os; ++j) {
1401  jc[os+j] = es[j].j;
1402  v[os+j] = es[j].v;
1403  }
1404  }
1405 }
1406 #endif
1407 
1408 } // namespace Ifpack2
1409 
1410 #define IFPACK2_RILUK_INSTANT(S,LO,GO,N) \
1411  template class Ifpack2::RILUK< Tpetra::RowMatrix<S, LO, GO, N> >;
1412 
1413 #endif
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:272
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_RILUK_decl.hpp:284
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_RILUK_def.hpp:106
ILU(k) factorization of a given Tpetra::RowMatrix.
Definition: Ifpack2_RILUK_decl.hpp:254
Tpetra::CrsMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > crs_matrix_type
Tpetra::CrsMatrix specialization used by this class for representing L and U.
Definition: Ifpack2_RILUK_decl.hpp:287
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:275
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:269
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_RILUK_def.hpp:189
LinearOp zero(const VectorSpace &vs)
"Preconditioner" that solves local sparse triangular systems.
Definition: Ifpack2_LocalSparseTriangularSolver_decl.hpp:83
Construct a level filled graph for use in computing an ILU(k) incomplete factorization.
Definition: Ifpack2_IlukGraph.hpp:97
Tpetra::global_size_t getGlobalNumEntries() const
Returns the number of nonzero entries in the global graph.
Definition: Ifpack2_RILUK_decl.hpp:524
Definition: Ifpack2_Container.hpp:761
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:72
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_RILUK_def.hpp:208
Tpetra::RowMatrix< MatrixType::scalar_type, MatrixType::local_ordinal_type, MatrixType::global_ordinal_type, MatrixType::node_type > ::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_RILUK_decl.hpp:266