46#ifndef MUELU_SAPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SAPFACTORY_KOKKOS_DEF_HPP
50#include "KokkosKernels_Handle.hpp"
51#include "KokkosSparse_spgemm.hpp"
52#include "KokkosSparse_spmv.hpp"
56#include <Xpetra_Matrix.hpp>
57#include <Xpetra_IteratorOps.hpp>
63#include "MueLu_PerfUtils.hpp"
65#include "MueLu_TentativePFactory.hpp"
66#include "MueLu_Utilities_kokkos.hpp"
72 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
74 RCP<ParameterList> validParamList = rcp(
new ParameterList());
76#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
86 validParamList->set< RCP<const FactoryBase> >(
"A", Teuchos::null,
"Generating factory of the matrix A used during the prolongator smoothing process");
87 validParamList->set< RCP<const FactoryBase> >(
"P", Teuchos::null,
"Tentative prolongator factory");
90 ParameterList norecurse;
91 norecurse.disableRecursiveValidation();
92 validParamList->set<ParameterList> (
"matrixmatrix: kernel params", norecurse,
"MatrixMatrix kernel parameters");
95 return validParamList;
98 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
100 Input(fineLevel,
"A");
104 RCP<const FactoryBase> initialPFact = GetFactory(
"P");
105 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.
GetFactoryManager()->GetFactory(
"Ptent"); }
106 coarseLevel.
DeclareInput(
"P", initialPFact.get(),
this);
109 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
111 return BuildP(fineLevel, coarseLevel);
114 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
118 typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;
123 RCP<const FactoryBase> initialPFact = GetFactory(
"P");
124 if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.
GetFactoryManager()->GetFactory(
"Ptent"); }
125 const ParameterList& pL = GetParameterList();
128 RCP<Matrix> A = Get< RCP<Matrix> >(fineLevel,
"A");
129 RCP<Matrix> Ptent = coarseLevel.
Get< RCP<Matrix> >(
"P", initialPFact.get());
133 if (Ptent == Teuchos::null) {
134 finalP = Teuchos::null;
135 Set(coarseLevel,
"P", finalP);
139 if(restrictionMode_) {
148 RCP<ParameterList> APparams;
149 if(pL.isSublist(
"matrixmatrix: kernel params"))
150 APparams=rcp(
new ParameterList(pL.sublist(
"matrixmatrix: kernel params")));
152 APparams= rcp(
new ParameterList);
153 if (coarseLevel.
IsAvailable(
"AP reuse data",
this)) {
154 GetOStream(
static_cast<MsgType>(
Runtime0 |
Test)) <<
"Reusing previous AP data" << std::endl;
156 APparams = coarseLevel.
Get< RCP<ParameterList> >(
"AP reuse data",
this);
158 if (APparams->isParameter(
"graph"))
159 finalP = APparams->get< RCP<Matrix> >(
"graph");
162 APparams->set(
"compute global constants: temporaries",APparams->get(
"compute global constants: temporaries",
false));
163 APparams->set(
"compute global constants",APparams->get(
"compute global constants",
false));
165 const SC
dampingFactor = as<SC>(pL.get<
double>(
"sa: damping factor"));
166 const LO maxEigenIterations = as<LO>(pL.get<
int>(
"sa: eigenvalue estimate num iterations"));
167 const bool estimateMaxEigen = pL.get<
bool>(
"sa: calculate eigenvalue estimate");
168 const bool useAbsValueRowSum = pL.get<
bool> (
"sa: use rowsumabs diagonal scaling");
169 const bool doQRStep = pL.get<
bool>(
"tentative: calculate qr");
170 const bool enforceConstraints= pL.get<
bool>(
"sa: enforce constraints");
171 const SC userDefinedMaxEigen = as<SC>(pL.get<
double>(
"sa: max eigenvalue"));
175 "MueLu::TentativePFactory::MakeTentative: cannot use 'enforce constraints' and 'calculate qr' at the same time");
181 if (Teuchos::ScalarTraits<SC>::real(userDefinedMaxEigen) == Teuchos::ScalarTraits<SC>::real(-1.0))
184 lambdaMax = A->GetMaxEigenvalueEstimate();
185 if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
186 GetOStream(
Statistics1) <<
"Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations <<
187 ( (useAbsValueRowSum) ?
", use rowSumAbs diagonal)" :
", use point diagonal)") << std::endl;
188 Magnitude stopTol = 1e-4;
190 if (useAbsValueRowSum)
194 A->SetMaxEigenvalueEstimate(lambdaMax);
196 GetOStream(
Statistics1) <<
"Using cached max eigenvalue estimate" << std::endl;
200 lambdaMax = userDefinedMaxEigen;
201 A->SetMaxEigenvalueEstimate(lambdaMax);
209 if (useAbsValueRowSum)
210 GetOStream(
Runtime0) <<
"Using rowSumAbs diagonal" << std::endl;
211 if (invDiag == Teuchos::null)
215 TEUCHOS_TEST_FOR_EXCEPTION(!std::isfinite(Teuchos::ScalarTraits<SC>::magnitude(omega)),
Exceptions::RuntimeError,
"Prolongator damping factor needs to be finite.");
220 finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(
Statistics2), std::string(
"MueLu::SaP-") +
toString(coarseLevel.
GetLevelID()), APparams);
221 if (enforceConstraints) SatisfyPConstraints( A, finalP);
230 if (!restrictionMode_) {
232 if(!finalP.is_null()) {std::ostringstream oss; oss <<
"P_" << coarseLevel.
GetLevelID(); finalP->setObjectLabel(oss.str());}
233 Set(coarseLevel,
"P", finalP);
236 if (Ptent->IsView(
"stridedMaps"))
237 finalP->CreateView(
"stridedMaps", Ptent);
242 Set(coarseLevel,
"R", R);
243 if(!R.is_null()) {std::ostringstream oss; oss <<
"R_" << coarseLevel.
GetLevelID(); R->setObjectLabel(oss.str());}
246 if (Ptent->IsView(
"stridedMaps"))
247 R->CreateView(
"stridedMaps", Ptent,
true);
251 RCP<ParameterList> params = rcp(
new ParameterList());
252 params->set(
"printLoadBalancingInfo",
true);
253 params->set(
"printCommInfo",
true);
275template<
typename local_matrix_type>
278 using Scalar=
typename local_matrix_type::non_const_value_type;
280 using LO=
typename local_matrix_type::non_const_ordinal_type;
281 using Device=
typename local_matrix_type::device_type;
287 Kokkos::View<SC**, Device>
Rsum;
293 Rsum = Kokkos::View<SC**, Device>(
"Rsum", localP_.numRows(),
nPDEs);
294 nPositive = Kokkos::View<size_t**, Device>(
"nPositive", localP_.numRows(),
nPDEs);
297 KOKKOS_INLINE_FUNCTION
300 auto rowPtr =
localP.graph.row_map;
301 auto values =
localP.values;
303 bool checkRow =
true;
305 if (rowPtr(rowIdx + 1) == rowPtr(rowIdx)) checkRow =
false;
312 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
313 Rsum(rowIdx, entryIdx%
nPDEs) += values(entryIdx);
314 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) < Kokkos::ArithTraits<SC>::real(
zero)) {
317 values(entryIdx) =
zero;
320 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) != Kokkos::ArithTraits<SC>::real(
zero))
323 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(1.00001 )) {
325 values(entryIdx) =
one;
334 for (
size_t k=0; k < (size_t)
nPDEs; k++) {
336 if (Kokkos::ArithTraits<SC>::real(
Rsum(rowIdx, k)) < Kokkos::ArithTraits<SC>::magnitude(
zero)) {
339 else if (Kokkos::ArithTraits<SC>::real(
Rsum(rowIdx, k)) > Kokkos::ArithTraits<SC>::magnitude(1.00001)) {
345 for (
size_t k=0; k < (size_t)
nPDEs; k++) {
351 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) {
352 if (Kokkos::ArithTraits<SC>::real(values(entryIdx)) > Kokkos::ArithTraits<SC>::real(
zero)) {
353 values(entryIdx) = values(entryIdx) +
359 for (
size_t k=0; k < (size_t)
nPDEs; k++)
Rsum(rowIdx, k) =
zero;
360 for (
size_t k=0; k < (size_t)
nPDEs; k++)
nPositive(rowIdx, k) = 0;
366template<
typename local_matrix_type>
369 using Scalar=
typename local_matrix_type::non_const_value_type;
371 using LO=
typename local_matrix_type::non_const_ordinal_type;
372 using Device=
typename local_matrix_type::device_type;
373 using KAT = Kokkos::ArithTraits<SC>;
380 Kokkos::View<LO**, Device>
inds;
384 origSorted = Kokkos::View<SC**, Device>(
"origSorted" , localP_.numRows(), maxRowEntries_);
385 fixedSorted = Kokkos::View<SC**, Device>(
"fixedSorted", localP_.numRows(), maxRowEntries_);
386 inds = Kokkos::View<LO**, Device>(
"inds" , localP_.numRows(), maxRowEntries_);
389 KOKKOS_INLINE_FUNCTION
392 auto rowPtr =
localP.graph.row_map;
393 auto values =
localP.values;
395 auto nnz = rowPtr(rowIdx+1) - rowPtr(rowIdx);
400 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) rsumTarget += values(entryIdx);
404 SC rowSumDeviation, temp, delta;
405 SC closestToLeftBoundDist, closestToRghtBoundDist;
406 LO closestToLeftBound, closestToRghtBound;
411 if ((KAT::real(rsumTarget) >= KAT::real(leftBound*(
static_cast<SC>(nnz)))) &&
412 (KAT::real(rsumTarget) <= KAT::real(rghtBound*(
static_cast<SC>(nnz))))){
417 aBigNumber = KAT::zero();
418 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
419 if ( KAT::magnitude( values(entryIdx) ) > KAT::magnitude(aBigNumber))
420 aBigNumber = KAT::magnitude( values(entryIdx) );
422 aBigNumber = aBigNumber+ (KAT::magnitude(leftBound) + KAT::magnitude(rghtBound))*(100*
one);
425 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++){
427 inds(rowIdx, ind) = ind;
431 auto sortVals = Kokkos::subview(
origSorted, rowIdx, Kokkos::ALL() );
432 auto sortInds = Kokkos::subview(
inds, rowIdx, Kokkos::make_pair(0,ind));
436 for (
LO i = 1; i < static_cast<LO>(nnz); ++i){
440 if ( KAT::real(sortVals(sortInds(i))) < KAT::real(sortVals(sortInds(0))) ){
441 for ( ; j > 0; --j) sortInds(j) = sortInds(j - 1);
445 for ( ; KAT::real(sortVals(ind)) < KAT::real(sortVals(sortInds(j-1))); --j) sortInds(j) = sortInds(j-1);
452 for (
LO i = 0; i < static_cast<LO>(nnz); i++)
origSorted(rowIdx, i) = values(rowPtr(rowIdx) +
inds(rowIdx, i));
454 closestToLeftBound = 0;
455 while ((closestToLeftBound <
static_cast<LO>(nnz)) && (KAT::real(
origSorted(rowIdx, closestToLeftBound)) <= KAT::real(leftBound))) closestToLeftBound++;
458 closestToRghtBound = closestToLeftBound;
459 while ((closestToRghtBound <
static_cast<LO>(nnz)) && (KAT::real(
origSorted(rowIdx, closestToRghtBound)) <= KAT::real(rghtBound))) closestToRghtBound++;
464 closestToLeftBoundDist =
origSorted(rowIdx, closestToLeftBound) - leftBound;
465 if (closestToRghtBound ==
static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
466 else closestToRghtBoundDist=
origSorted(rowIdx, closestToRghtBound) - rghtBound;
471 rowSumDeviation = leftBound*(
static_cast<SC>(closestToLeftBound)) + (
static_cast<SC>(nnz-closestToRghtBound))*rghtBound - rsumTarget;
472 for (
LO i=closestToLeftBound; i < closestToRghtBound; i++) rowSumDeviation +=
origSorted(rowIdx, i);
477 if (KAT::real(rowSumDeviation) < KAT::real(KAT::zero())) {
479 temp = leftBound; leftBound = -rghtBound; rghtBound = temp;
485 for (
LO i=0; i < static_cast<LO>(nnz/2); i++) {
493 LO itemp = closestToLeftBound;
494 closestToLeftBound = nnz-closestToRghtBound;
495 closestToRghtBound = nnz-itemp;
496 closestToLeftBoundDist =
origSorted(rowIdx, closestToLeftBound) - leftBound;
497 if (closestToRghtBound ==
static_cast<LO>(nnz)) closestToRghtBoundDist= aBigNumber;
498 else closestToRghtBoundDist=
origSorted(rowIdx, closestToRghtBound) - rghtBound;
500 rowSumDeviation = -rowSumDeviation;
505 for (
LO i = 0; i < closestToLeftBound; i++)
fixedSorted(rowIdx, i) = leftBound;
507 for (
LO i = closestToRghtBound; i < static_cast<LO>(nnz); i++)
fixedSorted(rowIdx, i) = rghtBound;
509 while ((KAT::magnitude(rowSumDeviation) > KAT::magnitude((
one*1.e-10)*rsumTarget))){
510 if (closestToRghtBound != closestToLeftBound)
511 delta = rowSumDeviation/
static_cast<SC>(closestToRghtBound - closestToLeftBound);
512 else delta = aBigNumber;
514 if (KAT::magnitude(closestToLeftBoundDist) <= KAT::magnitude(closestToRghtBoundDist)) {
515 if (KAT::magnitude(delta) <= KAT::magnitude(closestToLeftBoundDist)) {
516 rowSumDeviation =
zero;
517 for (
LO i = closestToLeftBound; i < closestToRghtBound ; i++)
fixedSorted(rowIdx, i) =
origSorted(rowIdx, i) - delta;
520 rowSumDeviation = rowSumDeviation - closestToLeftBoundDist;
521 fixedSorted(rowIdx, closestToLeftBound) = leftBound;
522 closestToLeftBound++;
523 if (closestToLeftBound <
static_cast<LO>(nnz)) closestToLeftBoundDist =
origSorted(rowIdx, closestToLeftBound) - leftBound;
524 else closestToLeftBoundDist = aBigNumber;
528 if (KAT::magnitude(delta) <= KAT::magnitude(closestToRghtBoundDist)) {
530 for (
LO i = closestToLeftBound; i < closestToRghtBound ; i++)
fixedSorted(rowIdx, i) =
origSorted(rowIdx, i) - delta;
533 rowSumDeviation = rowSumDeviation + closestToRghtBoundDist;
536 closestToRghtBound++;
538 if (closestToRghtBound >=
static_cast<LO>(nnz)) closestToRghtBoundDist = aBigNumber;
539 else closestToRghtBoundDist =
origSorted(rowIdx, closestToRghtBound) - rghtBound;
544 auto rowStart = rowPtr(rowIdx);
549 for (
LO i=0; i < static_cast<LO>(nnz/2); i++) {
556 for (
LO i = 0; i < static_cast<LO>(nnz); i++) values(rowStart +
inds(rowIdx, i)) =
fixedSorted(rowIdx, i);
560 for (
auto entryIdx = rowPtr(rowIdx); entryIdx < rowPtr(rowIdx + 1); entryIdx++) values(entryIdx) =
one/(
static_cast<SC>(nnz) );
570 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
573 using Device =
typename Matrix::local_matrix_type::device_type;
574 LO nPDEs = A->GetFixedBlockSize();
576 using local_mat_type =
typename Matrix::local_matrix_type;
578 Kokkos::parallel_for(
"enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getRowMap()->getLocalNumElements() ),
583 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class DeviceType>
586 using Device =
typename Matrix::local_matrix_type::device_type;
589 using local_mat_type =
typename Matrix::local_matrix_type;
591 Kokkos::parallel_for(
"enforce constraint",Kokkos::RangePolicy<typename Device::execution_space>(0, P->getLocalNumRows() ),
#define SET_VALID_ENTRY(name)
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
static std::string PrintMatrixInfo(const Matrix &A, const std::string &msgTag, RCP< const Teuchos::ParameterList > params=Teuchos::null)
Factory for building Smoothed Aggregation prolongators.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
static SC PowerMethod(const Matrix &A, bool scaleByDiag=true, LO niters=10, Magnitude tolerance=1e-2, bool verbose=false, unsigned int seed=123)
Simple transpose for Tpetra::CrsMatrix types.
static RCP< Vector > GetMatrixDiagonalInverse(const Matrix &A, Magnitude tol=TST::eps() *100, const bool doLumped=false)
Extract Matrix Diagonal.
static RCP< Matrix > Transpose(Matrix &Op, bool optimizeTranspose=false, const std::string &label=std::string())
Transpose a Xpetra::Matrix.
Namespace for MueLu classes and methods.
@ Statistics2
Print even more statistics.
@ Statistics1
Print more statistics.
@ Runtime0
One-liner description of what is happening.
@ Statistics0
Print statistics that do not involve significant additional computation.
std::string toString(const T &what)
Little helper function to convert non-string types to strings.
scalar_type dampingFactor
typename local_matrix_type::device_type Device
typename local_matrix_type::non_const_value_type Scalar
Kokkos::View< SC **, Device > ConstraintViolationSum
Kokkos::View< size_t **, Device > nPositive
constraintKernel(LO nPDEs_, local_matrix_type localP_)
typename local_matrix_type::non_const_ordinal_type LO
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
Kokkos::View< SC **, Device > Rsum
typename local_matrix_type::non_const_value_type Scalar
KOKKOS_INLINE_FUNCTION void operator()(const size_t rowIdx) const
optimalSatisfyConstraintsForScalarPDEsKernel(LO nPDEs_, LO maxRowEntries_, local_matrix_type localP_)
Kokkos::View< LO **, Device > inds
typename local_matrix_type::non_const_ordinal_type LO
Kokkos::View< SC **, Device > origSorted
Kokkos::ArithTraits< SC > KAT
Kokkos::View< SC **, Device > fixedSorted
typename local_matrix_type::device_type Device