43#ifndef IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
44#define IFPACK2_ADDITIVE_SCHWARZ_FILTER_DEF_HPP
46#include "Ifpack2_Details_AdditiveSchwarzFilter_decl.hpp"
47#include "KokkosKernels_Sorting.hpp"
48#include "KokkosSparse_SortCrs.hpp"
49#include "Kokkos_Bitset.hpp"
56template<
class MatrixType>
59 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
60 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
61 bool filterSingletons)
63 setup(A_unfiltered, perm, reverseperm, filterSingletons);
66template<
class MatrixType>
68setup(
const Teuchos::RCP<const row_matrix_type>& A_unfiltered,
69 const Teuchos::ArrayRCP<local_ordinal_type>& perm,
70 const Teuchos::ArrayRCP<local_ordinal_type>& reverseperm,
71 bool filterSingletons)
75 using Teuchos::rcp_dynamic_cast;
79 TEUCHOS_TEST_FOR_EXCEPTION(
80 !rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered) &&
81 !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered),
82 std::invalid_argument,
83 "Ifpack2::Details::AdditiveSchwarzFilter: The input matrix must be a Tpetra::CrsMatrix or an Ifpack2::OverlappingRowMatrix");
84 A_unfiltered_ = A_unfiltered;
85 local_matrix_type A_local, A_halo;
86 bool haveHalo = !rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
89 auto A_overlapping = rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
90 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
91 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
95 auto A_crs = rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
96 A_local = A_crs->getLocalMatrixDevice();
100 TEUCHOS_TEST_FOR_EXCEPTION(
101 perm.size() != reverseperm.size(),
102 std::invalid_argument,
103 "Ifpack2::Details::AdditiveSchwarzFilter: perm and reverseperm should be the same length");
104 TEUCHOS_TEST_FOR_EXCEPTION(
105 (
size_t) perm.size() != (
size_t) A_unfiltered_->getLocalNumRows(),
106 std::invalid_argument,
107 "Ifpack2::Details::AdditiveSchwarzFilter: length of perm and reverseperm should be the same as the number of local rows in unfiltered A");
109 FilterSingletons_ = filterSingletons;
110 local_ordinal_type numLocalRows;
111 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
112 if(FilterSingletons_)
115 singletons_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletons_"), totalLocalRows);
116 singletonDiagonals_ = Kokkos::DualView<impl_scalar_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"singletonDiagonals_"), totalLocalRows);
117 auto singletonsDevice = singletons_.view_device();
118 singletons_.modify_device();
119 auto singletonDiagonalsDevice = singletonDiagonals_.view_device();
120 singletonDiagonals_.modify_device();
121 local_ordinal_type numSingletons;
122 Kokkos::Bitset<device_type> isSingletonBitset(totalLocalRows);
123 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
124 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lnumSingletons,
bool finalPass)
126 bool isSingleton =
true;
127 impl_scalar_type singletonValue = Kokkos::ArithTraits<impl_scalar_type>::zero();
128 if(i < A_local.numRows())
131 size_type rowBegin = A_local.graph.row_map(i);
132 size_type rowEnd = A_local.graph.row_map(i + 1);
133 for(size_type j = rowBegin; j < rowEnd; j++)
135 local_ordinal_type entry = A_local.graph.entries(j);
136 if(entry < totalLocalRows && entry != i)
141 if(finalPass && entry == i)
144 singletonValue += A_local.values(j);
151 local_ordinal_type row = i - A_local.numRows();
152 size_type rowBegin = A_halo.graph.row_map(row);
153 size_type rowEnd = A_halo.graph.row_map(row + 1);
154 for(size_type j = rowBegin; j < rowEnd; j++)
156 local_ordinal_type entry = A_halo.graph.entries(j);
157 if(entry < totalLocalRows && entry != i)
162 if(finalPass && entry == i)
164 singletonValue += A_halo.values(j);
172 isSingletonBitset.set(i);
173 singletonsDevice(lnumSingletons) = i;
174 singletonDiagonalsDevice(lnumSingletons) = singletonValue;
179 numSingletons_ = numSingletons;
181 numLocalRows = totalLocalRows - numSingletons_;
183 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), totalLocalRows);
184 perm_.modify_device();
185 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), numLocalRows);
186 reverseperm_.modify_device();
187 auto permView = perm_.view_device();
188 auto reversepermView = reverseperm_.view_device();
190 Kokkos::View<local_ordinal_type*, device_type> origpermView(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"input reverse perm"), totalLocalRows);
191 Kokkos::View<local_ordinal_type*, Kokkos::HostSpace> origpermHost(reverseperm.get(), totalLocalRows);
192 Kokkos::deep_copy(execution_space(), origpermView, origpermHost);
194 Kokkos::parallel_scan(policy_type(0, totalLocalRows),
195 KOKKOS_LAMBDA(local_ordinal_type i, local_ordinal_type& lindex,
bool finalPass)
197 local_ordinal_type origRow = origpermView(i);
198 if(!isSingletonBitset.test(origRow))
203 reversepermView(lindex) = origRow;
204 permView(origRow) = lindex;
212 permView(origRow) = local_ordinal_type(-1);
216 reverseperm_.sync_host();
221 perm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"perm_"), perm.size());
223 auto permHost = perm_.view_host();
224 for(
size_t i = 0; i < (size_t) perm.size(); i++)
225 permHost(i) = perm[i];
227 reverseperm_ = Kokkos::DualView<local_ordinal_type*, device_type>(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"reverseperm_"), reverseperm.size());
228 reverseperm_.modify_host();
229 auto reversepermHost = reverseperm_.view_host();
230 for(
size_t i = 0; i < (size_t) reverseperm.size(); i++)
231 reversepermHost(i) = reverseperm[i];
232 reverseperm_.sync_device();
234 numLocalRows = totalLocalRows;
236 auto permView = perm_.view_device();
237 auto reversepermView = reverseperm_.view_device();
240 row_map_type localRowptrs(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered rowptrs"), numLocalRows + 1);
241 Kokkos::parallel_for(policy_type(0, numLocalRows + 1),
242 KOKKOS_LAMBDA(local_ordinal_type i)
244 if(i == numLocalRows)
251 local_ordinal_type numEntries = 0;
252 local_ordinal_type origRow = reversepermView(i);
253 if(origRow < A_local.numRows())
256 size_type rowBegin = A_local.graph.row_map(origRow);
257 size_type rowEnd = A_local.graph.row_map(origRow + 1);
258 for(size_type j = rowBegin; j < rowEnd; j++)
260 local_ordinal_type entry = A_local.graph.entries(j);
261 if(entry < totalLocalRows && permView(entry) != -1)
268 local_ordinal_type row = origRow - A_local.numRows();
269 size_type rowBegin = A_halo.graph.row_map(row);
270 size_type rowEnd = A_halo.graph.row_map(row + 1);
271 for(size_type j = rowBegin; j < rowEnd; j++)
273 local_ordinal_type entry = A_halo.graph.entries(j);
274 if(entry < totalLocalRows && permView(entry) != -1)
278 localRowptrs(i) = numEntries;
281 size_type numLocalEntries;
282 Kokkos::parallel_scan(policy_type(0, numLocalRows + 1),
283 KOKKOS_LAMBDA(local_ordinal_type i, size_type& lnumEntries,
bool finalPass)
285 size_type numEnt = localRowptrs(i);
287 localRowptrs(i) = lnumEntries;
288 lnumEntries += numEnt;
291 entries_type localEntries(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered entries"), numLocalEntries);
292 values_type localValues(Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Filtered values"), numLocalEntries);
294 local_matrix_type localMatrix(
"AdditiveSchwarzFilter", numLocalRows, numLocalRows, numLocalEntries, localValues, localRowptrs, localEntries);
295 fillLocalMatrix(localMatrix);
297#ifdef HAVE_IFPACK2_DEBUG
298 TEUCHOS_TEST_FOR_EXCEPTION(
299 ! mapPairsAreFitted (*A_unfiltered_), std::invalid_argument,
"Ifpack2::LocalFilter: "
300 "A's Map pairs are not fitted to each other on Process "
301 << A_->getRowMap ()->getComm ()->getRank () <<
" of the input matrix's "
303 "This means that LocalFilter does not currently know how to work with A. "
304 "This will change soon. Please see discussion of Bug 5992.");
308 RCP<const Teuchos::Comm<int> > localComm;
310 localComm = rcp (
new Teuchos::MpiComm<int> (MPI_COMM_SELF));
312 localComm = rcp (
new Teuchos::SerialComm<int> ());
315 localMap_ = rcp (
new map_type (numLocalRows, 0, localComm, Tpetra::GloballyDistributed));
317 auto crsParams = rcp(
new Teuchos::ParameterList);
318 crsParams->template set<bool>(
"sorted",
true);
325 A_ = rcp(
new crs_matrix_type(localMap_, localMap_, localMatrix, crsParams));
328template<
class MatrixType>
331template<
class MatrixType>
335 auto localMatrix = A_->getLocalMatrixDevice();
337 fillLocalMatrix(localMatrix);
338 A_->setAllValues(localMatrix.graph.row_map, localMatrix.graph.entries, localMatrix.values);
341template<
class MatrixType>
342Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::crs_matrix_type>
348template<
class MatrixType>
351 auto localRowptrs = localMatrix.graph.row_map;
352 auto localEntries = localMatrix.graph.entries;
353 auto localValues = localMatrix.values;
354 auto permView = perm_.view_device();
355 auto reversepermView = reverseperm_.view_device();
356 local_matrix_type A_local, A_halo;
357 bool haveHalo = !Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_).is_null();
360 auto A_overlapping = Teuchos::rcp_dynamic_cast<const overlapping_matrix_type>(A_unfiltered_);
361 A_local = A_overlapping->getUnderlyingMatrix()->getLocalMatrixDevice();
362 A_halo = A_overlapping->getExtMatrix()->getLocalMatrixDevice();
366 auto A_crs = Teuchos::rcp_dynamic_cast<const crs_matrix_type>(A_unfiltered_);
367 A_local = A_crs->getLocalMatrixDevice();
370 local_ordinal_type totalLocalRows = A_local.numRows() + A_halo.numRows();
372 Kokkos::parallel_for(policy_type(0, localMatrix.numRows()),
373 KOKKOS_LAMBDA(local_ordinal_type i)
377 size_type outRowStart = localRowptrs(i);
378 local_ordinal_type insertPos = 0;
379 local_ordinal_type origRow = reversepermView(i);
380 if(origRow < A_local.numRows())
383 size_type rowBegin = A_local.graph.row_map(origRow);
384 size_type rowEnd = A_local.graph.row_map(origRow + 1);
385 for(size_type j = rowBegin; j < rowEnd; j++)
387 local_ordinal_type entry = A_local.graph.entries(j);
388 if(entry < totalLocalRows)
390 auto newCol = permView(entry);
393 localEntries(outRowStart + insertPos) = newCol;
394 localValues(outRowStart + insertPos) = A_local.values(j);
403 local_ordinal_type row = origRow - A_local.numRows();
404 size_type rowBegin = A_halo.graph.row_map(row);
405 size_type rowEnd = A_halo.graph.row_map(row + 1);
406 for(size_type j = rowBegin; j < rowEnd; j++)
408 local_ordinal_type entry = A_halo.graph.entries(j);
409 if(entry < totalLocalRows)
411 auto newCol = permView(entry);
414 localEntries(outRowStart + insertPos) = newCol;
415 localValues(outRowStart + insertPos) = A_halo.values(j);
423 KokkosSparse::sort_crs_matrix(localMatrix);
426template<
class MatrixType>
429 return localMap_->getComm();
432template<
class MatrixType>
433Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
439template<
class MatrixType>
440Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
446template<
class MatrixType>
447Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
453template<
class MatrixType>
454Teuchos::RCP<const typename AdditiveSchwarzFilter<MatrixType>::map_type>
460template<
class MatrixType>
461Teuchos::RCP<
const Tpetra::RowGraph<
typename MatrixType::local_ordinal_type,
462 typename MatrixType::global_ordinal_type,
463 typename MatrixType::node_type> >
470 return A_unfiltered_->getGraph();
473template<
class MatrixType>
476 return A_->getGlobalNumRows();
479template<
class MatrixType>
482 return A_->getGlobalNumCols();
485template<
class MatrixType>
488 return A_->getLocalNumRows();
491template<
class MatrixType>
494 return A_->getLocalNumCols();
497template<
class MatrixType>
500 return A_->getIndexBase();
503template<
class MatrixType>
506 return getLocalNumEntries();
509template<
class MatrixType>
512 return A_->getLocalNumEntries();
515template<
class MatrixType>
519 return A_->getNumEntriesInGlobalRow(globalRow);
522template<
class MatrixType>
526 return A_->getNumEntriesInLocalRow(localRow);
529template<
class MatrixType>
534 return A_unfiltered_->getGlobalMaxNumRowEntries();
537template<
class MatrixType>
541 return A_unfiltered_->getLocalMaxNumRowEntries();
545template<
class MatrixType>
552template<
class MatrixType>
559template<
class MatrixType>
566template<
class MatrixType>
573template<
class MatrixType>
576 nonconst_global_inds_host_view_type &globalInd,
577 nonconst_values_host_view_type &val,
578 size_t& numEntries)
const
580 throw std::runtime_error(
"Ifpack2::Details::AdditiveSchwarzFilter does not implement getGlobalRowCopy.");
583template<
class MatrixType>
586 nonconst_local_inds_host_view_type &Indices,
587 nonconst_values_host_view_type &Values,
588 size_t& NumEntries)
const
591 A_->getLocalRowCopy(LocalRow, Indices, Values, NumEntries);
594template<
class MatrixType>
596 global_inds_host_view_type &,
597 values_host_view_type &)
const
599 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter: does not support getGlobalRowView.");
602template<
class MatrixType>
604 local_inds_host_view_type & indices,
605 values_host_view_type & values)
const
607 A_->getLocalRowView(LocalRow, indices, values);
610template<
class MatrixType>
612getLocalDiagCopy (Tpetra::Vector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &diag)
const
615 A_->getLocalDiagCopy(diag);
618template<
class MatrixType>
621 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support leftScale.");
624template<
class MatrixType>
627 throw std::runtime_error(
"Ifpack2::AdditiveSchwarzFilter does not support rightScale.");
630template<
class MatrixType>
632apply (
const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &X,
633 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> &Y,
634 Teuchos::ETransp mode,
636 scalar_type beta)
const
638 A_->apply(X, Y, mode, alpha, beta);
641template<
class MatrixType>
644 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingB,
645 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY,
646 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedB)
const
650 TEUCHOS_TEST_FOR_EXCEPTION(!OverlappingB.isConstantStride() || !OverlappingY.isConstantStride() || !ReducedReorderedB.isConstantStride(),
651 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::CreateReducedProblem ERROR: One of the input MultiVectors is not constant stride.");
652 local_ordinal_type numVecs = OverlappingB.getNumVectors();
653 auto b = OverlappingB.getLocalViewDevice(Tpetra::Access::ReadOnly);
654 auto reducedB = ReducedReorderedB.getLocalViewDevice(Tpetra::Access::OverwriteAll);
655 auto singletons = singletons_.view_device();
656 auto singletonDiags = singletonDiagonals_.view_device();
657 auto revperm = reverseperm_.view_device();
660 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
661 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) numSingletons_, numVecs}),
662 KOKKOS_LAMBDA(local_ordinal_type singletonIndex, local_ordinal_type col)
664 local_ordinal_type row = singletons(singletonIndex);
665 y(row, col) = b(row, col) / singletonDiags(singletonIndex);
669 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
670 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
672 reducedB(row, col) = b(revperm(row), col);
679 mv_type singletonUpdates(A_unfiltered_->getRowMap(), numVecs,
false);
680 A_unfiltered_->apply(OverlappingY, singletonUpdates);
681 auto updatesView = singletonUpdates.getLocalViewDevice(Tpetra::Access::ReadOnly);
683 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedB.extent(0), numVecs}),
684 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
686 local_ordinal_type origRow = revperm(row);
687 reducedB(row, col) -= updatesView(origRow, col);
692template<
class MatrixType>
694 const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& ReducedReorderedY,
695 Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& OverlappingY)
const
699 TEUCHOS_TEST_FOR_EXCEPTION(!ReducedReorderedY.isConstantStride() || !OverlappingY.isConstantStride(),
700 std::logic_error,
"Ifpack2::AdditiveSchwarzFilter::UpdateLHS ERROR: One of the input MultiVectors is not constant stride.");
701 auto reducedY = ReducedReorderedY.getLocalViewDevice(Tpetra::Access::ReadOnly);
702 auto y = OverlappingY.getLocalViewDevice(Tpetra::Access::ReadWrite);
703 auto revperm = reverseperm_.view_device();
704 Kokkos::parallel_for(policy_2d_type({0, 0}, {(local_ordinal_type) reducedY.extent(0), (local_ordinal_type) reducedY.extent(1)}),
705 KOKKOS_LAMBDA(local_ordinal_type row, local_ordinal_type col)
707 y(revperm(row), col) = reducedY(row, col);
711template<
class MatrixType>
718template<
class MatrixType>
724template<
class MatrixType>
728 return A_->getFrobeniusNorm ();
731template<
class MatrixType>
736 const map_type& rangeMap = * (A.getRangeMap ());
737 const map_type& rowMap = * (A.getRowMap ());
738 const bool rangeAndRowFitted = mapPairIsFitted (rowMap, rangeMap);
740 const map_type& domainMap = * (A.getDomainMap ());
741 const map_type& columnMap = * (A.getColMap ());
742 const bool domainAndColumnFitted = mapPairIsFitted (columnMap, domainMap);
749 int localSuccess = rangeAndRowFitted && domainAndColumnFitted;
752 Teuchos::reduceAll<int, int> (*(A.getComm()), Teuchos::REDUCE_MIN, localSuccess, Teuchos::outArg (globalSuccess));
754 return globalSuccess == 1;
758template<
class MatrixType>
763 return map1.isLocallyFitted (map2);
769#define IFPACK2_DETAILS_ADDITIVESCHWARZFILTER_INSTANT(S,LO,GO,N) \
770 template class Ifpack2::Details::AdditiveSchwarzFilter< Tpetra::RowMatrix<S, LO, GO, N> >;
Wraps a Tpetra::CrsMatrix or Ifpack2::OverlappingRowMatrix in a filter that removes off-process edges...
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74