46#ifndef MUELU_UTILITIES_KOKKOS_DEF_HPP
47#define MUELU_UTILITIES_KOKKOS_DEF_HPP
50#include <Teuchos_DefaultComm.hpp>
51#include <Teuchos_ParameterList.hpp>
55#ifdef HAVE_MUELU_EPETRA
57# include "Epetra_MpiComm.h"
61#include <Kokkos_Core.hpp>
62#include <KokkosSparse_CrsMatrix.hpp>
63#include <KokkosSparse_getDiagCopy.hpp>
65#if defined(HAVE_MUELU_EPETRA) && defined(HAVE_MUELU_EPETRAEXT)
72#include <Xpetra_EpetraUtils.hpp>
73#include <Xpetra_EpetraMultiVector.hpp>
77#ifdef HAVE_MUELU_TPETRA
78#include <MatrixMarket_Tpetra.hpp>
79#include <Tpetra_RowMatrixTransposer.hpp>
80#include <TpetraExt_MatrixMatrix.hpp>
81#include <Xpetra_TpetraMultiVector.hpp>
82#include <Xpetra_TpetraCrsMatrix.hpp>
83#include <Xpetra_TpetraBlockCrsMatrix.hpp>
86#ifdef HAVE_MUELU_EPETRA
87#include <Xpetra_EpetraMap.hpp>
90#include <Xpetra_BlockedCrsMatrix.hpp>
91#include <Xpetra_DefaultPlatform.hpp>
92#include <Xpetra_Import.hpp>
93#include <Xpetra_ImportFactory.hpp>
94#include <Xpetra_Map.hpp>
95#include <Xpetra_MapFactory.hpp>
96#include <Xpetra_Matrix.hpp>
97#include <Xpetra_MatrixMatrix.hpp>
98#include <Xpetra_MatrixFactory.hpp>
99#include <Xpetra_MultiVector.hpp>
100#include <Xpetra_MultiVectorFactory.hpp>
101#include <Xpetra_Operator.hpp>
102#include <Xpetra_Vector.hpp>
103#include <Xpetra_VectorFactory.hpp>
107#include <KokkosKernels_Handle.hpp>
108#include <KokkosGraph_RCM.hpp>
114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
115 Teuchos::RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
118 const auto rowMap = A.getRowMap();
119 auto diag = Xpetra::VectorFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Build(rowMap,
true);
121 A.getLocalDiagCopy(*diag);
126 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
127 Teuchos::RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
129 typename Teuchos::ScalarTraits<Scalar>::magnitudeType tol,
const bool doLumped) {
130 Teuchos::TimeMonitor MM = *Teuchos::TimeMonitor::getNewTimer(
"Utilities_kokkos::GetMatrixDiagonalInverse");
132 using Matrix = Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>;
133 using Map = Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node>;
134 using Vector = Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node>;
135 using VectorFactory = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>;
136 using local_matrix_type =
typename Matrix::local_matrix_type;
138 using value_type =
typename local_matrix_type::value_type;
139 using ordinal_type =
typename local_matrix_type::ordinal_type;
140 using execution_space =
typename local_matrix_type::execution_space;
147 using KAT = Kokkos::ArithTraits<value_type>;
150 RCP<const Map> rowMap = A.getRowMap();
151 RCP<Vector> diag = VectorFactory::Build(rowMap,
false);
154 local_matrix_type localMatrix = A.getLocalMatrixDevice();
155 auto diagVals = diag->getDeviceLocalView(Xpetra::Access::ReadWrite);
157 ordinal_type numRows = localMatrix.graph.numRows();
164 Kokkos::parallel_for(
"Utilities_kokkos::GetMatrixDiagonalInverse",
165 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
166 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
167 bool foundDiagEntry =
false;
168 auto myRow = localMatrix.rowConst(rowIdx);
169 for(ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
170 if(myRow.colidx(entryIdx) == rowIdx) {
171 foundDiagEntry = true;
172 if(KAT::magnitude(myRow.value(entryIdx)) > KAT::magnitude(tol)) {
173 diagVals(rowIdx, 0) = KAT::one() / myRow.value(entryIdx);
175 diagVals(rowIdx, 0) = KAT::zero();
181 if(!foundDiagEntry) {diagVals(rowIdx, 0) = KAT::zero();}
184 Kokkos::parallel_for(
"Utilities_kokkos::GetMatrixDiagonalInverse",
185 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
186 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
187 auto myRow = localMatrix.rowConst(rowIdx);
188 for(ordinal_type entryIdx = 0; entryIdx < myRow.length; ++entryIdx) {
189 diagVals(rowIdx, 0) += KAT::magnitude(myRow.value(entryIdx));
191 if(KAT::magnitude(diagVals(rowIdx, 0)) > KAT::magnitude(tol))
192 diagVals(rowIdx, 0) = KAT::one() / diagVals(rowIdx, 0);
194 diagVals(rowIdx, 0) = KAT::zero();
201 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
202 Teuchos::RCP<Xpetra::Vector<Scalar, LocalOrdinal, GlobalOrdinal,Node> >
206 return MueLu::GetMatrixDiagonalInverse<Scalar, LocalOrdinal, GlobalOrdinal, Node>(A,tol,doLumped);
209 template <
class Node>
210 Teuchos::RCP<Xpetra::Vector<double,int,int,Node> >
214 return MueLu::GetMatrixDiagonalInverse<double, int, int, Node>(A,tol,doLumped);
217 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
218 RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
222 RCP<const Map> rowMap = A.getRowMap(), colMap = A.getColMap();
223 RCP<Vector> localDiag = VectorFactory::Build(rowMap);
227 Teuchos::ArrayRCP<size_t> offsets;
228 crsOp->getLocalDiagOffsets(offsets);
229 crsOp->getLocalDiagCopy(*localDiag,offsets());
232 auto localDiagVals = localDiag->getDeviceLocalView(Xpetra::Access::ReadWrite);
233 const auto diagVals = GetMatrixDiagonal(A)->getDeviceLocalView(Xpetra::Access::ReadOnly);
234 Kokkos::deep_copy(localDiagVals, diagVals);
237 RCP<Vector> diagonal = VectorFactory::Build(colMap);
238 RCP< const Import> importer;
239 importer = A.getCrsGraph()->getImporter();
240 if (importer == Teuchos::null) {
241 importer = ImportFactory::Build(rowMap, colMap);
243 diagonal->doImport(*localDiag, *(importer), Xpetra::INSERT);
248 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
251 bool doInverse,
bool doFillComplete,
bool doOptimizeStorage)
253 SC one = Teuchos::ScalarTraits<SC>::one();
254 Teuchos::ArrayRCP<SC> sv(scalingVector.size());
256 for (
int i = 0; i < scalingVector.size(); ++i)
257 sv[i] = one / scalingVector[i];
259 for (
int i = 0; i < scalingVector.size(); ++i)
260 sv[i] = scalingVector[i];
263 switch (Op.getRowMap()->lib()) {
264 case Xpetra::UseTpetra:
265 MyOldScaleMatrix_Tpetra(Op, sv, doFillComplete, doOptimizeStorage);
268 case Xpetra::UseEpetra:
271 throw std::runtime_error(
"FIXME");
284 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
289 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
292 bool doOptimizeStorage)
294#ifdef HAVE_MUELU_TPETRA
296 Tpetra::CrsMatrix<SC,LO,GO,NO>& tpOp = Op2NonConstTpetraCrs(Op);
298 const RCP<const Tpetra::Map<LO,GO,NO> > rowMap = tpOp.getRowMap();
299 const RCP<const Tpetra::Map<LO,GO,NO> > domainMap = tpOp.getDomainMap();
300 const RCP<const Tpetra::Map<LO,GO,NO> > rangeMap = tpOp.getRangeMap();
302 size_t maxRowSize = tpOp.getLocalMaxNumRowEntries();
303 if (maxRowSize == Teuchos::as<size_t>(-1))
306 if (tpOp.isFillComplete())
309 if (Op.isLocallyIndexed() ==
true) {
310 typename Tpetra::CrsMatrix<SC,LO,GO,NO>::local_inds_host_view_type cols;
311 typename Tpetra::CrsMatrix<SC,LO,GO,NO>::values_host_view_type vals;
313 for (
size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
314 tpOp.getLocalRowView(i, cols, vals);
315 size_t nnz = tpOp.getNumEntriesInLocalRow(i);
316 typename Tpetra::CrsMatrix<SC,LO,GO,NO>::nonconst_values_host_view_type scaledVals(
"scaledVals", nnz);
317 for (
size_t j = 0; j < nnz; ++j)
318 scaledVals[j] = scalingVector[i]*vals[j];
321 tpOp.replaceLocalValues(i, cols, scaledVals);
326 typename Tpetra::CrsMatrix<SC,LO,GO,NO>::global_inds_host_view_type cols;
327 typename Tpetra::CrsMatrix<SC,LO,GO,NO>::values_host_view_type vals;
329 for (
size_t i = 0; i < rowMap->getLocalNumElements(); ++i) {
330 GO gid = rowMap->getGlobalElement(i);
331 tpOp.getGlobalRowView(gid, cols, vals);
332 size_t nnz = tpOp.getNumEntriesInGlobalRow(gid);
333 typename Tpetra::CrsMatrix<SC,LO,GO,NO>::nonconst_values_host_view_type scaledVals(
"scaledVals", nnz);
336 for (
size_t j = 0; j < nnz; ++j)
337 scaledVals[j] = scalingVector[i]*vals[j];
340 tpOp.replaceGlobalValues(gid, cols, scaledVals);
345 if (doFillComplete) {
346 if (domainMap == Teuchos::null || rangeMap == Teuchos::null)
347 throw Exceptions::RuntimeError(
"In Utils_kokkos::Scaling: cannot fillComplete because the domain and/or range map hasn't been defined");
349 RCP<Teuchos::ParameterList> params = rcp(
new Teuchos::ParameterList());
350 params->set(
"Optimize Storage", doOptimizeStorage);
351 params->set(
"No Nonlocal Changes",
true);
352 Op.fillComplete(Op.getDomainMap(), Op.getRangeMap(), params);
363 template <
class SC,
class LO,
class GO,
class NO>
364 Kokkos::View<bool*, typename NO::device_type>
366 const typename Teuchos::ScalarTraits<SC>::magnitudeType& tol,
367 const bool count_twos_as_dirichlet) {
368 using impl_scalar_type =
typename Kokkos::ArithTraits<SC>::val_type;
369 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
370 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
371 using helpers = Xpetra::Helpers<SC,LO,GO,NO>;
374 if(helpers::isTpetraBlockCrs(A)) {
375#ifdef HAVE_MUELU_TPETRA
376 const Tpetra::BlockCrsMatrix<SC,LO,GO,NO> & Am = helpers::Op2TpetraBlockCrs(A);
377 auto b_graph = Am.getCrsGraph().getLocalGraphDevice();
378 auto b_rowptr = Am.getCrsGraph().getLocalRowPtrsDevice();
379 auto values = Am.getValuesDevice();
380 LO numBlockRows = Am.getLocalNumRows();
381 const LO stride = Am.getBlockSize() * Am.getBlockSize();
383 Kokkos::View<bool*, typename NO::device_type> boundaryNodes(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numBlockRows);
385 if (count_twos_as_dirichlet)
388 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRowsBlockCrs", range_type(0,numBlockRows),
389 KOKKOS_LAMBDA(
const LO row) {
390 auto rowView = b_graph.rowConst(row);
391 auto length = rowView.length;
392 LO valstart = b_rowptr[row] * stride;
394 boundaryNodes(row) =
true;
395 decltype(length) colID =0;
396 for (; colID < length; colID++) {
397 if (rowView.colidx(colID) != row) {
398 LO current = valstart + colID*stride;
399 for(LO k=0; k<stride; k++) {
400 if (ATS::magnitude(values[current+ k]) > tol) {
401 boundaryNodes(row) =
false;
406 if(boundaryNodes(row) ==
false)
411 return boundaryNodes;
417 auto localMatrix = A.getLocalMatrixDevice();
418 LO numRows = A.getLocalNumRows();
419 Kokkos::View<bool*, typename NO::device_type> boundaryNodes(Kokkos::ViewAllocateWithoutInitializing(
"boundaryNodes"), numRows);
421 if (count_twos_as_dirichlet)
422 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRows_Twos_As_Dirichlet", range_type(0,numRows),
423 KOKKOS_LAMBDA(
const LO row) {
424 auto rowView = localMatrix.row(row);
425 auto length = rowView.length;
427 boundaryNodes(row) =
true;
429 decltype(length) colID =0;
430 for ( ; colID < length; colID++)
431 if ((rowView.colidx(colID) != row) &&
432 (ATS::magnitude(rowView.value(colID)) > tol)) {
433 if (!boundaryNodes(row))
435 boundaryNodes(row) =
false;
438 boundaryNodes(row) =
true;
442 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletRows", range_type(0,numRows),
443 KOKKOS_LAMBDA(
const LO row) {
444 auto rowView = localMatrix.row(row);
445 auto length = rowView.length;
447 boundaryNodes(row) =
true;
448 for (
decltype(length) colID = 0; colID < length; colID++)
449 if ((rowView.colidx(colID) != row) &&
450 (ATS::magnitude(rowView.value(colID)) > tol)) {
451 boundaryNodes(row) =
false;
455 return boundaryNodes;
460 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
461 Kokkos::View<bool*, typename Node::device_type>
463 DetectDirichletRows(
const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>& A,
const typename Teuchos::ScalarTraits<Scalar>::magnitudeType& tol,
const bool count_twos_as_dirichlet) {
464 return MueLu::DetectDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, tol, count_twos_as_dirichlet);
467 template <
class Node>
468 Kokkos::View<bool*, typename Node::device_type>
470 DetectDirichletRows(
const Xpetra::Matrix<double,int,int,Node>& A,
const typename Teuchos::ScalarTraits<double>::magnitudeType& tol,
const bool count_twos_as_dirichlet) {
471 return MueLu::DetectDirichletRows<double,int,int,Node>(A, tol,count_twos_as_dirichlet);
475 template <
class SC,
class LO,
class GO,
class NO>
476 Kokkos::View<bool*, typename NO::device_type>
478 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
479 using ATS = Kokkos::ArithTraits<SC>;
480 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
481 using range_type = Kokkos::RangePolicy<LO, typename NO::execution_space>;
483 SC zero = ATS::zero();
485 auto localMatrix = A.getLocalMatrixDevice();
486 LO numRows = A.getLocalNumRows();
488 Teuchos::RCP<const Xpetra::Map<LO,GO,NO> > domMap = A.getDomainMap();
489 Teuchos::RCP<const Xpetra::Map<LO,GO,NO> > colMap = A.getColMap();
490 Teuchos::RCP<Xpetra::MultiVector<SC,LO,GO,NO> > myColsToZero = Xpetra::MultiVectorFactory<SC,LO,GO,NO>::Build(colMap,1);
491 myColsToZero->putScalar(zero);
492 auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
494 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletCols1", range_type(0,numRows),
495 KOKKOS_LAMBDA(
const LO row) {
496 if (dirichletRows(row)) {
497 auto rowView = localMatrix.row(row);
498 auto length = rowView.length;
500 for (
decltype(length) colID = 0; colID < length; colID++)
501 myColsToZeroView(rowView.colidx(colID),0) = impl_ATS::one();
505 Teuchos::RCP<Xpetra::MultiVector<SC,LO,GO,NO> > globalColsToZero = Xpetra::MultiVectorFactory<SC,LO,GO,NO>::Build(domMap,1);
506 globalColsToZero->putScalar(zero);
507 Teuchos::RCP<Xpetra::Export<LO,GO,NO> > exporter = Xpetra::ExportFactory<LO,GO,NO>::Build(colMap,domMap);
509 globalColsToZero->doExport(*myColsToZero,*exporter,Xpetra::ADD);
511 myColsToZero->doImport(*globalColsToZero,*exporter,Xpetra::INSERT);
513 auto myCols = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly);
514 size_t numColEntries = colMap->getLocalNumElements();
515 Kokkos::View<bool*, typename NO::device_type> dirichletCols(Kokkos::ViewAllocateWithoutInitializing(
"dirichletCols"), numColEntries);
516 const typename ATS::magnitudeType eps = 2.0*ATS::eps();
518 Kokkos::parallel_for(
"MueLu:Utils::DetectDirichletCols2", range_type(0,numColEntries),
519 KOKKOS_LAMBDA (
const size_t i) {
520 dirichletCols(i) = impl_ATS::magnitude(myCols(i,0))>eps;
522 return dirichletCols;
526 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
527 Kokkos::View<bool*, typename Node::device_type>
530 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows) {
531 return MueLu::DetectDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
534 template <
class Node>
535 Kokkos::View<bool*, typename Node::device_type>
538 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
539 return MueLu::DetectDirichletCols<double,int,int,Node>(A, dirichletRows);
544 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
545 void FindNonZeros(
const typename Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev_const_um vals,
546 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
547 using ATS = Kokkos::ArithTraits<Scalar>;
548 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
549 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
550 TEUCHOS_ASSERT(vals.extent(0) == nonzeros.extent(0));
551 const typename ATS::magnitudeType eps = 2.0*impl_ATS::eps();
553 Kokkos::parallel_for(
"MueLu:Maxwell1::FindNonZeros", range_type(0,vals.extent(0)),
554 KOKKOS_LAMBDA (
const size_t i) {
555 nonzeros(i) = (impl_ATS::magnitude(vals(i,0)) > eps);
559 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
562 FindNonZeros(
const typename Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node>::dual_view_type::t_dev_const_um vals,
563 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
564 MueLu::FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(vals,nonzeros);
567 template <
class Node>
570 FindNonZeros(
const typename Xpetra::MultiVector<double,int,int,Node>::dual_view_type::t_dev_const_um vals,
571 Kokkos::View<bool*, typename Node::device_type> nonzeros) {
572 MueLu::FindNonZeros<double,int,int,Node>(vals,nonzeros);
576 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
578 const Kokkos::View<bool*, typename Node::device_type> & dirichletRows,
579 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
580 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
581 using ATS = Kokkos::ArithTraits<Scalar>;
582 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
583 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
584 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > domMap = A.getDomainMap();
585 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > rowMap = A.getRowMap();
586 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > colMap = A.getColMap();
587 TEUCHOS_ASSERT(dirichletRows.extent(0) == rowMap->getLocalNumElements());
588 TEUCHOS_ASSERT(dirichletCols.extent(0) == colMap->getLocalNumElements());
589 TEUCHOS_ASSERT(dirichletDomain.extent(0) == domMap->getLocalNumElements());
590 RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > myColsToZero = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(colMap,
true);
592 auto myColsToZeroView = myColsToZero->getDeviceLocalView(Xpetra::Access::ReadWrite);
593 auto localMatrix = A.getLocalMatrixDevice();
594 Kokkos::parallel_for(
"MueLu:Maxwell1::DetectDirichletCols", range_type(0,rowMap->getLocalNumElements()),
596 if (dirichletRows(row)) {
597 auto rowView = localMatrix.row(row);
598 auto length = rowView.length;
600 for (
decltype(length) colID = 0; colID < length; colID++)
601 myColsToZeroView(rowView.colidx(colID),0) = impl_ATS::one();
605 RCP<Xpetra::Vector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > globalColsToZero;
606 RCP<const Xpetra::Import<LocalOrdinal,GlobalOrdinal,Node> > importer = A.getCrsGraph()->getImporter();
607 if (!importer.is_null()) {
608 globalColsToZero = Xpetra::VectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(domMap,
true);
610 globalColsToZero->doExport(*myColsToZero,*importer,Xpetra::ADD);
612 myColsToZero->doImport(*globalColsToZero,*importer,Xpetra::INSERT);
615 globalColsToZero = myColsToZero;
616 FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(globalColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly),dirichletDomain);
617 FindNonZeros<Scalar,LocalOrdinal,GlobalOrdinal,Node>(myColsToZero->getDeviceLocalView(Xpetra::Access::ReadOnly),dirichletCols);
621 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
625 const Kokkos::View<bool*, typename Node::device_type> & dirichletRows,
626 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
627 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
628 MueLu::DetectDirichletColsAndDomains<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A,dirichletRows,dirichletCols,dirichletDomain);
631 template <
class Node>
635 const Kokkos::View<bool*, typename Node::device_type> & dirichletRows,
636 Kokkos::View<bool*, typename Node::device_type> dirichletCols,
637 Kokkos::View<bool*, typename Node::device_type> dirichletDomain) {
638 MueLu::DetectDirichletColsAndDomains<double,int,int,Node>(A,dirichletRows,dirichletCols,dirichletDomain);
643 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
646 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
648 using ATS = Kokkos::ArithTraits<Scalar>;
649 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
651 typename ATS::val_type impl_replaceWith = replaceWith;
653 auto localMatrix = A->getLocalMatrixDevice();
656 Kokkos::parallel_for(
"MueLu:Utils::ZeroDirichletRows", range_type(0,numRows),
658 if (dirichletRows(row)) {
659 auto rowView = localMatrix.row(row);
660 auto length = rowView.length;
661 for (
decltype(length) colID = 0; colID < length; colID++)
662 rowView.value(colID) = impl_replaceWith;
667 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
669 Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
670 ZeroDirichletRows(RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
671 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows,
673 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows, replaceWith);
676 template <
class Node>
678 Utilities_kokkos<double,int,int,Node>::
679 ZeroDirichletRows(RCP<Xpetra::Matrix<double, int, int, Node> >& A,
680 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
681 double replaceWith) {
682 return MueLu::ZeroDirichletRows<double,int,int,Node>(A, dirichletRows, replaceWith);
687 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
690 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
692 using ATS = Kokkos::ArithTraits<Scalar>;
693 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
695 typename ATS::val_type impl_replaceWith = replaceWith;
697 auto myCols = X->getDeviceLocalView(Xpetra::Access::ReadWrite);
698 size_t numVecs = X->getNumVectors();
699 Kokkos::parallel_for(
"MueLu:Utils::ZeroDirichletRows_MV", range_type(0,dirichletRows.size()),
700 KOKKOS_LAMBDA(
const size_t i) {
701 if (dirichletRows(i)) {
702 for(
size_t j=0; j<numVecs; j++)
703 myCols(i,j) = impl_replaceWith;
708 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
710 Utilities_kokkos<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
711 ZeroDirichletRows(RCP<Xpetra::MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >& X,
712 const Kokkos::View<const bool*, typename NO::device_type>& dirichletRows,
714 MueLu::ZeroDirichletRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(X, dirichletRows, replaceWith);
717 template <
class Node>
719 Utilities_kokkos<double,int,int,Node>::
720 ZeroDirichletRows(RCP<Xpetra::MultiVector<
typename Teuchos::ScalarTraits<Scalar>::magnitudeType,
int,
int,
Node> >& X,
721 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows,
722 double replaceWith) {
723 return MueLu::ZeroDirichletRows<double,int,int,Node>(X, dirichletRows, replaceWith);
728 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
731 const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
733 using ATS = Kokkos::ArithTraits<Scalar>;
734 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
736 typename ATS::val_type impl_replaceWith = replaceWith;
738 auto localMatrix = A->getLocalMatrixDevice();
741 Kokkos::parallel_for(
"MueLu:Utils::ZeroDirichletCols", range_type(0,numRows),
743 auto rowView = localMatrix.row(row);
744 auto length = rowView.length;
745 for (
decltype(length) colID = 0; colID < length; colID++)
746 if (dirichletCols(rowView.colidx(colID))) {
747 rowView.value(colID) = impl_replaceWith;
752 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
755 ZeroDirichletCols(RCP<Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> >& A,
756 const Kokkos::View<const bool*, typename NO::device_type>& dirichletCols,
758 MueLu::ZeroDirichletCols<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletCols, replaceWith);
761 template <
class Node>
765 const Kokkos::View<const bool*, typename Node::device_type>& dirichletCols,
766 double replaceWith) {
767 return MueLu::ZeroDirichletCols<double,int,int,Node>(A, dirichletCols, replaceWith);
771 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
773 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
774 Kokkos::View<bool*, typename Node::device_type> & dirichletRows)
776 typedef Teuchos::ScalarTraits<Scalar> STS;
777 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node>> rowmap = A.getRowMap();
779 auto dirichletRowsHost = Kokkos::create_mirror_view(dirichletRows);
780 Kokkos::deep_copy(dirichletRowsHost, dirichletRows);
782 for (
LocalOrdinal row = 0; row < Teuchos::as<LocalOrdinal>(rowmap->getLocalNumElements()); ++row) {
783 size_t nnz = A.getNumEntriesInLocalRow(row);
784 ArrayView<const LocalOrdinal> indices;
785 ArrayView<const Scalar> vals;
786 A.getLocalRowView(row, indices, vals);
788 Scalar rowsum = STS::zero();
789 Scalar diagval = STS::zero();
790 for (
LocalOrdinal colID = 0; colID < Teuchos::as<LocalOrdinal>(nnz); colID++) {
793 diagval = vals[colID];
794 rowsum += vals[colID];
796 if (STS::real(rowsum) > STS::magnitude(diagval) * rowSumTol)
797 dirichletRowsHost(row) =
true;
800 Kokkos::deep_copy(dirichletRows, dirichletRowsHost);
803 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
807 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType rowSumTol,
808 Kokkos::View<bool*, typename Node::device_type> & dirichletRows)
810 MueLu::ApplyRowSumCriterion<Scalar, LocalOrdinal, GlobalOrdinal, Node>(A,rowSumTol,dirichletRows);
814 template <
class Node>
818 const typename Teuchos::ScalarTraits<double>::magnitudeType rowSumTol,
819 Kokkos::View<bool*, typename Node::device_type> & dirichletRows)
821 MueLu::ApplyRowSumCriterion<double, int, int, Node>(A,rowSumTol,dirichletRows);
825 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
826 RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >
829 RCP<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> > Xscalar;
830#if defined(HAVE_XPETRA_TPETRA) && (defined(HAVE_TPETRA_INST_COMPLEX_DOUBLE) || defined(HAVE_TPETRA_INST_COMPLEX_FLOAT))
831 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
834 if ((
typeid(
Scalar).name() ==
typeid(std::complex<double>).name()) ||
835 (
typeid(
Scalar).name() ==
typeid(std::complex<float>).name())) {
836 size_t numVecs = X->getNumVectors();
837 Xscalar = Xpetra::MultiVectorFactory<Scalar,LocalOrdinal,GlobalOrdinal,Node>::Build(X->getMap(),numVecs);
838 auto XVec = X->getDeviceLocalView(Xpetra::Access::ReadOnly);
839 auto XVecScalar = Xscalar->getDeviceLocalView(Xpetra::Access::ReadWrite);
841 Kokkos::parallel_for(
"MueLu:Utils::RealValuedToScalarMultiVector", range_type(0,X->getLocalLength()),
842 KOKKOS_LAMBDA(
const size_t i) {
843 for (
size_t j=0; j<numVecs; j++)
844 XVecScalar(i,j) = XVec(i,j);
848 Xscalar = rcp_dynamic_cast<Xpetra::MultiVector<Scalar,LocalOrdinal,GlobalOrdinal,Node> >(X);
852 template <
class Node>
853 RCP<Xpetra::MultiVector<double,int,int,Node> >
860 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
861 Teuchos::RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> >
ReverseCuthillMcKee(
const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Op) {
862 using local_matrix_type =
typename Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type;
863 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
864 using lno_nnz_view_t =
typename local_graph_type::entries_type::non_const_type;
865 using device =
typename local_graph_type::device_type;
866 using execution_space =
typename local_matrix_type::execution_space;
867 using ordinal_type =
typename local_matrix_type::ordinal_type;
869 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
871 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm
872 <device,
typename local_graph_type::row_map_type,
typename local_graph_type::entries_type, lno_nnz_view_t>
873 (localGraph.row_map, localGraph.entries);
875 RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> > retval =
876 Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(Op.getRowMap());
879 auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite),Kokkos::ALL (), 0);
880 Kokkos::parallel_for(
"Utilities_kokkos::ReverseCuthillMcKee",
881 Kokkos::RangePolicy<ordinal_type, execution_space>(0, localGraph.numRows()),
882 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
883 view1D(rcmOrder(rowIdx)) = rowIdx;
888 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
889 Teuchos::RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> >
CuthillMcKee(
const Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node> &Op) {
890 using local_matrix_type =
typename Xpetra::Matrix<Scalar,LocalOrdinal,GlobalOrdinal,Node>::local_matrix_type;
891 using local_graph_type =
typename local_matrix_type::staticcrsgraph_type;
892 using lno_nnz_view_t =
typename local_graph_type::entries_type::non_const_type;
893 using device =
typename local_graph_type::device_type;
894 using execution_space =
typename local_matrix_type::execution_space;
895 using ordinal_type =
typename local_matrix_type::ordinal_type;
897 local_graph_type localGraph = Op.getLocalMatrixDevice().graph;
900 lno_nnz_view_t rcmOrder = KokkosGraph::Experimental::graph_rcm
901 <device,
typename local_graph_type::row_map_type,
typename local_graph_type::entries_type, lno_nnz_view_t>
902 (localGraph.row_map, localGraph.entries);
904 RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> > retval =
905 Xpetra::VectorFactory<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node>::Build(Op.getRowMap());
908 auto view1D = Kokkos::subview(retval->getDeviceLocalView(Xpetra::Access::ReadWrite),Kokkos::ALL (), 0);
910 Kokkos::parallel_for(
"Utilities_kokkos::ReverseCuthillMcKee",
911 Kokkos::RangePolicy<ordinal_type, execution_space>(0, numRows),
912 KOKKOS_LAMBDA(
const ordinal_type rowIdx) {
913 view1D(rcmOrder(numRows - 1 - rowIdx)) = rowIdx;
918 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
919 Teuchos::RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> >
921 return MueLu::ReverseCuthillMcKee<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Op);
924 template <
class Node>
925 Teuchos::RCP<Xpetra::Vector<int,int,int,Node> >
927 return MueLu::ReverseCuthillMcKee<double,int,int,Node>(Op);
930 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
931 Teuchos::RCP<Xpetra::Vector<LocalOrdinal,LocalOrdinal,GlobalOrdinal,Node> >
933 return MueLu::CuthillMcKee<Scalar,LocalOrdinal,GlobalOrdinal,Node>(Op);
936 template <
class Node>
937 Teuchos::RCP<Xpetra::Vector<int,int,int,Node> >
939 return MueLu::CuthillMcKee<double,int,int,Node>(Op);
944 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
947 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
948 TEUCHOS_ASSERT(A->isFillComplete());
949 using ATS = Kokkos::ArithTraits<Scalar>;
950 using impl_ATS = Kokkos::ArithTraits<typename ATS::val_type>;
951 using range_type = Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space>;
953 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > domMap = A->getDomainMap();
954 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > ranMap = A->getRangeMap();
955 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > Rmap = A->getRowMap();
956 RCP<const Xpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > Cmap = A->getColMap();
958 TEUCHOS_ASSERT(
static_cast<size_t>(dirichletRows.size()) == Rmap->getLocalNumElements());
960 auto localMatrix = A->getLocalMatrixDevice();
961 auto localRmap = Rmap->getLocalMap();
962 auto localCmap = Cmap->getLocalMap();
964 Kokkos::parallel_for(
"MueLu::Utils::ApplyOAZ",range_type(0,dirichletRows.extent(0)),
966 if (dirichletRows(row)){
967 auto rowView = localMatrix.row(row);
968 auto length = rowView.length;
969 auto row_gid = localRmap.getGlobalElement(row);
970 auto row_lid = localCmap.getLocalElement(row_gid);
972 for (
decltype(length) colID = 0; colID < length; colID++)
973 if (rowView.colidx(colID) == row_lid)
974 rowView.value(colID) = impl_ATS::one();
976 rowView.value(colID) = impl_ATS::zero();
981 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
985 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
986 MueLu::ApplyOAZToMatrixRows<Scalar,LocalOrdinal,GlobalOrdinal,Node>(A, dirichletRows);
989 template <
class Node>
993 const Kokkos::View<const bool*, typename Node::device_type>& dirichletRows) {
994 MueLu::ApplyOAZToMatrixRows<double,int,int,Node>(A, dirichletRows);
999#define MUELU_UTILITIES_KOKKOS_SHORT
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Xpetra::CrsMatrixWrap< Scalar, LocalOrdinal, GlobalOrdinal, Node > CrsMatrixWrap
Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > Matrix
Teuchos::ScalarTraits< Scalar >::magnitudeType Magnitude
static RCP< Vector > GetMatrixDiagonal(const Matrix &A)
Extract Matrix Diagonal.
typename TST::magnitudeType Magnitude
Namespace for MueLu classes and methods.
void ApplyOAZToMatrixRows(Teuchos::RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows)
void FindNonZeros(const typename Xpetra::MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node >::dual_view_type::t_dev_const_um vals, Kokkos::View< bool *, typename Node::device_type > nonzeros)
void ApplyRowSumCriterion(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const typename Teuchos::ScalarTraits< Scalar >::magnitudeType rowSumTol, Kokkos::View< bool *, typename Node::device_type > &dirichletRows)
void ZeroDirichletCols(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletCols, Scalar replaceWith)
Kokkos::View< bool *, typename NO::device_type > DetectDirichletCols(const Xpetra::Matrix< SC, LO, GO, NO > &A, const Kokkos::View< const bool *, typename NO::device_type > &dirichletRows)
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)
Kokkos::View< bool *, typename NO::device_type > DetectDirichletRows(const Xpetra::Matrix< SC, LO, GO, NO > &A, const typename Teuchos::ScalarTraits< SC >::magnitudeType &tol, const bool count_twos_as_dirichlet)
void DetectDirichletColsAndDomains(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Kokkos::View< bool *, typename Node::device_type > &dirichletRows, Kokkos::View< bool *, typename Node::device_type > dirichletCols, Kokkos::View< bool *, typename Node::device_type > dirichletDomain)
Teuchos::RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixDiagonalInverse(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, typename Teuchos::ScalarTraits< Scalar >::magnitudeType tol, const bool doLumped)
void ZeroDirichletRows(RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > &A, const Kokkos::View< const bool *, typename Node::device_type > &dirichletRows, Scalar replaceWith)
Teuchos::RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > ReverseCuthillMcKee(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op)