46#ifndef MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
47#define MUELU_SEMICOARSENPFACTORY_KOKKOS_DEF_HPP
51#include <Kokkos_Core.hpp>
53#include <KokkosBatched_LU_Decl.hpp>
54#include <KokkosBatched_LU_Serial_Impl.hpp>
55#include <KokkosBatched_Trsm_Decl.hpp>
56#include <KokkosBatched_Trsm_Serial_Impl.hpp>
57#include <KokkosBatched_Util.hpp>
58#include <KokkosSparse_CrsMatrix.hpp>
60#include <Xpetra_CrsMatrixWrap.hpp>
61#include <Xpetra_ImportFactory.hpp>
62#include <Xpetra_Matrix.hpp>
63#include <Xpetra_MultiVectorFactory.hpp>
64#include <Xpetra_VectorFactory.hpp>
75RCP<const ParameterList>
77 Kokkos::Compat::KokkosDeviceWrapperNode<
78 DeviceType>>::GetValidParameterList()
const {
79 RCP<ParameterList> validParamList = rcp(
new ParameterList());
81 std::string name =
"semicoarsen: coarsen rate";
83 validParamList->set<RCP<const FactoryBase>>(
84 "A", Teuchos::null,
"Generating factory of the matrix A");
85 validParamList->set<RCP<const FactoryBase>>(
86 "Nullspace", Teuchos::null,
"Generating factory of the nullspace");
87 validParamList->set<RCP<const FactoryBase>>(
88 "Coordinates", Teuchos::null,
"Generating factory for coordinates");
90 validParamList->set<RCP<const FactoryBase>>(
91 "LineDetection_VertLineIds", Teuchos::null,
92 "Generating factory for LineDetection vertical line ids");
93 validParamList->set<RCP<const FactoryBase>>(
94 "LineDetection_Layers", Teuchos::null,
95 "Generating factory for LineDetection layer ids");
96 validParamList->set<RCP<const FactoryBase>>(
97 "CoarseNumZLayers", Teuchos::null,
98 "Generating factory for number of coarse z-layers");
100 return validParamList;
107 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
108 DeclareInput(
Level &fineLevel,
Level & )
const {
109 Input(fineLevel,
"A");
110 Input(fineLevel,
"Nullspace");
112 Input(fineLevel,
"LineDetection_VertLineIds");
113 Input(fineLevel,
"LineDetection_Layers");
114 Input(fineLevel,
"CoarseNumZLayers");
122 bTransferCoordinates_ =
true;
124 }
else if (bTransferCoordinates_ ==
true) {
128 RCP<const FactoryBase> myCoordsFact = GetFactory(
"Coordinates");
129 if (myCoordsFact == Teuchos::null) {
132 if (fineLevel.
IsAvailable(
"Coordinates", myCoordsFact.get())) {
133 fineLevel.
DeclareInput(
"Coordinates", myCoordsFact.get(),
this);
141 Kokkos::Compat::KokkosDeviceWrapperNode<
142 DeviceType>>::Build(
Level &fineLevel,
145 return BuildP(fineLevel, coarseLevel);
151 Kokkos::Compat::KokkosDeviceWrapperNode<
152 DeviceType>>::BuildP(
Level &fineLevel,
158 RCP<Matrix> A = Get<RCP<Matrix>>(fineLevel,
"A");
159 RCP<MultiVector> fineNullspace =
160 Get<RCP<MultiVector>>(fineLevel,
"Nullspace");
163 const ParameterList &pL = GetParameterList();
164 LO CoarsenRate = as<LO>(pL.get<
int>(
"semicoarsen: coarsen rate"));
165 TEUCHOS_TEST_FOR_EXCEPTION(
167 "semicoarsen: coarsen rate must be greater than 1");
170 LO BlkSize = A->GetFixedBlockSize();
171 RCP<const Map> rowMap = A->getRowMap();
172 LO Ndofs = rowMap->getLocalNumElements();
173 LO Nnodes = Ndofs / BlkSize;
177 LO FineNumZLayers = Get<LO>(fineLevel,
"CoarseNumZLayers");
178 Teuchos::ArrayRCP<LO> TVertLineId =
179 Get<Teuchos::ArrayRCP<LO>>(fineLevel,
"LineDetection_VertLineIds");
180 Teuchos::ArrayRCP<LO> TLayerId =
181 Get<Teuchos::ArrayRCP<LO>>(fineLevel,
"LineDetection_Layers");
185 "Cannot coarsen further");
186 using coordT =
typename Teuchos::ScalarTraits<Scalar>::coordinateType;
187 LO CoarseNumZLayers =
188 (LO)floor(((coordT)(FineNumZLayers + 1)) / ((coordT)CoarsenRate) - 1.0);
189 if (CoarseNumZLayers < 1)
190 CoarseNumZLayers = 1;
194 RCP<MultiVector> coarseNullspace;
195 BuildSemiCoarsenP(coarseLevel, Ndofs, Nnodes, BlkSize, FineNumZLayers,
196 CoarseNumZLayers, TLayerId, TVertLineId, A, fineNullspace, P,
202 coarseLevel.
Set(
"NumZLayers", Teuchos::as<LO>(CoarseNumZLayers),
206 Set(coarseLevel,
"P", P);
207 Set(coarseLevel,
"Nullspace", coarseNullspace);
210 if (bTransferCoordinates_) {
212 typedef Xpetra::MultiVector<
213 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO, NO>
215 RCP<xdMV> fineCoords = Teuchos::null;
220 RCP<const FactoryBase> myCoordsFact = GetFactory(
"Coordinates");
221 if (myCoordsFact == Teuchos::null) {
224 if (fineLevel.
IsAvailable(
"Coordinates", myCoordsFact.get())) {
226 fineLevel.
Get<RCP<xdMV>>(
"Coordinates", myCoordsFact.get());
230 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords == Teuchos::null,
232 "No Coordinates found provided by the user.");
234 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3,
236 "Three coordinates arrays must be supplied if "
237 "line detection orientation not given.");
238 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x =
239 fineCoords->getDataNonConst(0);
240 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y =
241 fineCoords->getDataNonConst(1);
242 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z =
243 fineCoords->getDataNonConst(2);
247 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max =
248 -Teuchos::ScalarTraits<
249 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
250 Teuchos::ScalarTraits<
251 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
252 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min =
253 Teuchos::ScalarTraits<
254 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() /
255 Teuchos::ScalarTraits<
256 typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
257 for (
auto it = z.begin(); it != z.end(); ++it) {
264 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
266 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType>
267 myZLayerCoords = Teuchos::arcp<
268 typename Teuchos::ScalarTraits<Scalar>::coordinateType>(
270 if (myCoarseZLayers == 1) {
271 myZLayerCoords[0] = zval_min;
273 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz =
274 (zval_max - zval_min) / (myCoarseZLayers - 1);
275 for (LO k = 0; k < myCoarseZLayers; ++k) {
276 myZLayerCoords[k] = k * dz;
284 LO numVertLines = Nnodes / FineNumZLayers;
285 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
287 RCP<const Map> coarseCoordMap = MapFactory::Build(
288 fineCoords->getMap()->lib(),
289 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
290 Teuchos::as<size_t>(numLocalCoarseNodes),
291 fineCoords->getMap()->getIndexBase(), fineCoords->getMap()->getComm());
292 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<
293 typename Teuchos::ScalarTraits<Scalar>::coordinateType, LO, GO,
294 NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
295 coarseCoords->putScalar(-1.0);
296 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx =
297 coarseCoords->getDataNonConst(0);
298 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy =
299 coarseCoords->getDataNonConst(1);
300 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz =
301 coarseCoords->getDataNonConst(2);
304 LO cntCoarseNodes = 0;
305 for (LO vt = 0; vt < TVertLineId.size(); ++vt) {
307 LO curVertLineId = TVertLineId[vt];
309 if (cx[curVertLineId * myCoarseZLayers] == -1.0 &&
310 cy[curVertLineId * myCoarseZLayers] == -1.0) {
312 for (LO n = 0; n < myCoarseZLayers; ++n) {
313 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
314 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
315 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
317 cntCoarseNodes += myCoarseZLayers;
320 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes,
322 "number of coarse nodes is inconsistent.");
323 if (cntCoarseNodes == numLocalCoarseNodes)
328 Set(coarseLevel,
"Coordinates", coarseCoords);
336 Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::
337 BuildSemiCoarsenP(
Level &coarseLevel,
const LO NFRows,
const LO NFNodes,
338 const LO DofsPerNode,
const LO NFLayers,
339 const LO NCLayers,
const ArrayRCP<LO> LayerId,
340 const ArrayRCP<LO> VertLineId,
const RCP<Matrix> &Amat,
341 const RCP<MultiVector> fineNullspace, RCP<Matrix> &P,
342 RCP<MultiVector> &coarseNullspace)
const {
344 using impl_SC =
typename Kokkos::ArithTraits<SC>::val_type;
345 using impl_ATS = Kokkos::ArithTraits<impl_SC>;
346 using LOView1D = Kokkos::View<LO *, DeviceType>;
347 using LOView2D = Kokkos::View<LO **, DeviceType>;
351 const auto FCol2LayerVector =
352 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
353 const auto localTemp =
354 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getDomainMap());
355 RCP<const Import> importer = Amat->getCrsGraph()->getImporter();
356 if (importer == Teuchos::null)
357 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
360 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
361 for (
int row = 0; row < NFRows; row++)
362 localTempHost(row, 0) = LayerId[row / DofsPerNode];
363 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
364 Kokkos::deep_copy(localTempView, localTempHost);
365 FCol2LayerVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
367 const auto FCol2LayerView = FCol2LayerVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
368 const auto FCol2Layer = Kokkos::subview(FCol2LayerView, Kokkos::ALL(), 0);
372 const auto FCol2DofVector =
373 Xpetra::VectorFactory<LO, LO, GO, NO>::Build(Amat->getColMap());
376 const auto localTempHost = localTemp->getHostLocalView(Xpetra::Access::ReadWrite);
377 for (
int row = 0; row < NFRows; row++)
378 localTempHost(row, 0) = row % DofsPerNode;
379 const auto localTempView = localTemp->getDeviceLocalView(Xpetra::Access::ReadWrite);
380 Kokkos::deep_copy(localTempView, localTempHost);
381 FCol2DofVector->doImport(*localTemp, *(importer), Xpetra::INSERT);
383 const auto FCol2DofView = FCol2DofVector->getDeviceLocalView(Xpetra::Access::ReadOnly);
384 const auto FCol2Dof = Kokkos::subview(FCol2DofView, Kokkos::ALL(), 0);
390 NVertLines = VertLineId[0];
391 for (
int node = 1; node < NFNodes; ++node)
392 if (VertLineId[node] > NVertLines)
393 NVertLines = VertLineId[node];
397 LOView2D LineLayer2Node(
"LineLayer2Node", NVertLines, NFLayers);
398 typename LOView2D::HostMirror LineLayer2NodeHost =
399 Kokkos::create_mirror_view(LineLayer2Node);
400 for (
int node = 0; node < NFNodes; ++node)
401 LineLayer2NodeHost(VertLineId[node], LayerId[node]) = node;
402 Kokkos::deep_copy(LineLayer2Node, LineLayer2NodeHost);
405 LOView1D CLayer2FLayer(
"CLayer2FLayer", NCLayers);
406 typename LOView1D::HostMirror CLayer2FLayerHost =
407 Kokkos::create_mirror_view(CLayer2FLayer);
408 using coordT =
typename Teuchos::ScalarTraits<Scalar>::coordinateType;
409 const LO FirstStride =
410 (LO)ceil(((coordT)(NFLayers + 1)) / ((coordT)(NCLayers + 1)));
411 const coordT RestStride =
412 ((coordT)(NFLayers - FirstStride + 1)) / ((coordT)NCLayers);
414 (LO)floor((((coordT)(NFLayers - FirstStride + 1)) / RestStride) + .00001);
416 "sizes do not match.");
417 coordT stride = (coordT)FirstStride;
418 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
419 CLayer2FLayerHost(clayer) = (LO)floor(stride) - 1;
420 stride += RestStride;
422 Kokkos::deep_copy(CLayer2FLayer, CLayer2FLayerHost);
426 int MaxStencilSize = 1;
427 LOView1D CLayer2StartLayer(
"CLayer2StartLayer", NCLayers);
428 LOView1D CLayer2StencilSize(
"CLayer2StencilSize", NCLayers);
429 typename LOView1D::HostMirror CLayer2StartLayerHost =
430 Kokkos::create_mirror_view(CLayer2StartLayer);
431 typename LOView1D::HostMirror CLayer2StencilSizeHost =
432 Kokkos::create_mirror_view(CLayer2StencilSize);
433 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
434 const int startLayer = (clayer > 0) ? CLayer2FLayerHost(clayer - 1) + 1 : 0;
435 const int stencilSize = (clayer < NCLayers - 1)
436 ? CLayer2FLayerHost(clayer + 1) - startLayer
437 : NFLayers - startLayer;
439 if (MaxStencilSize < stencilSize)
440 MaxStencilSize = stencilSize;
441 CLayer2StartLayerHost(clayer) = startLayer;
442 CLayer2StencilSizeHost(clayer) = stencilSize;
444 Kokkos::deep_copy(CLayer2StartLayer, CLayer2StartLayerHost);
445 Kokkos::deep_copy(CLayer2StencilSize, CLayer2StencilSizeHost);
452 int Nmax = MaxStencilSize * DofsPerNode;
453 Kokkos::View<impl_SC ***, DeviceType> BandMat(
454 "BandMat", NVertLines, Nmax, Nmax);
455 Kokkos::View<impl_SC ***, DeviceType> BandSol(
456 "BandSol", NVertLines, Nmax, DofsPerNode);
462 for (
int clayer = 0; clayer < NCLayers; ++clayer)
463 NnzP += CLayer2StencilSizeHost(clayer);
464 NnzP *= NVertLines * DofsPerNode * DofsPerNode;
465 Kokkos::View<impl_SC *, DeviceType> Pvals(
"Pvals", NnzP);
466 Kokkos::View<LO *, DeviceType> Pcols(
"Pcols", NnzP);
471 Kokkos::View<size_t *, DeviceType> Pptr(
"Pptr", NFRows + 1);
472 typename Kokkos::View<size_t *, DeviceType>::HostMirror PptrHost =
473 Kokkos::create_mirror_view(Pptr);
474 Kokkos::deep_copy(PptrHost, 0);
475 for (
int line = 0; line < NVertLines; ++line) {
476 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
477 const int stencilSize = CLayer2StencilSizeHost(clayer);
478 const int startLayer = CLayer2StartLayerHost(clayer);
479 for (
int snode = 0; snode < stencilSize; ++snode) {
480 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
481 const int layer = startLayer + snode;
482 const int AmatBlkRow = LineLayer2NodeHost(line, layer);
483 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
484 PptrHost(AmatRow + 1) += DofsPerNode;
489 for (
int i = 2; i < NFRows + 1; ++i)
490 PptrHost(i) += PptrHost(i - 1);
491 TEUCHOS_TEST_FOR_EXCEPTION(NnzP != (
int)PptrHost(NFRows),
493 "Number of nonzeros in P does not match");
494 Kokkos::deep_copy(Pptr, PptrHost);
498 Kokkos::View<LO *, Kokkos::DefaultHostExecutionSpace> layerBuckets(
499 "layerBuckets", NFLayers);
500 Kokkos::deep_copy(layerBuckets, 0);
501 LOView2D CLayerSNode2PptrOffset(
"CLayerSNode2PptrOffset", NCLayers,
503 typename LOView2D::HostMirror CLayerSNode2PptrOffsetHost =
504 Kokkos::create_mirror_view(CLayerSNode2PptrOffset);
505 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
506 const int stencilSize = CLayer2StencilSizeHost(clayer);
507 const int startLayer = CLayer2StartLayerHost(clayer);
508 for (
int snode = 0; snode < stencilSize; ++snode) {
509 const int layer = startLayer + snode;
510 CLayerSNode2PptrOffsetHost(clayer, snode) = layerBuckets(layer);
511 layerBuckets(layer)++;
514 Kokkos::deep_copy(CLayerSNode2PptrOffset, CLayerSNode2PptrOffsetHost);
519 const auto localAmat = Amat->getLocalMatrixDevice();
520 const auto zero = impl_ATS::zero();
521 const auto one = impl_ATS::one();
523 using range_policy = Kokkos::RangePolicy<execution_space>;
524 Kokkos::parallel_for(
525 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Fill P",
526 range_policy(0, NVertLines), KOKKOS_LAMBDA(
const int line) {
527 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
531 Kokkos::subview(BandSol, line, Kokkos::ALL(), Kokkos::ALL());
532 for (
int row = 0; row < Nmax; ++row)
533 for (
int dof = 0; dof < DofsPerNode; ++dof)
534 bandSol(row, dof) = zero;
537 const int stencilSize = CLayer2StencilSize(clayer);
538 const int N = stencilSize * DofsPerNode;
540 Kokkos::subview(BandMat, line, Kokkos::ALL(), Kokkos::ALL());
541 for (
int row = 0; row < Nmax; ++row)
542 for (
int col = 0; col < Nmax; ++col)
544 (row == col && row >= N) ? one : zero;
547 const int flayer = CLayer2FLayer(clayer);
548 const int startLayer = CLayer2StartLayer(clayer);
549 for (
int snode = 0; snode < stencilSize; ++snode) {
551 const int layer = startLayer + snode;
552 if (layer == flayer) {
553 for (
int dof = 0; dof < DofsPerNode; ++dof) {
554 const int row = snode * DofsPerNode + dof;
555 bandMat(row, row) = one;
556 bandSol(row, dof) = one;
559 const int AmatBlkRow = LineLayer2Node(line, layer);
560 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
563 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
564 const auto localAmatRow = localAmat.rowConst(AmatRow);
565 const int AmatRowLeng = localAmatRow.length;
567 const int row = snode * DofsPerNode + dofi;
568 for (
int dofj = 0; dofj < DofsPerNode; ++dofj) {
569 const int col = snode * DofsPerNode + dofj;
574 for (
int i = 0; i < AmatRowLeng; ++i) {
575 const int colidx = localAmatRow.colidx(i);
576 if (FCol2Layer(colidx) == layer &&
577 FCol2Dof(colidx) == dofj)
578 val += localAmatRow.value(i);
580 bandMat(row, col) = val;
586 for (
int i = 0; i < AmatRowLeng; ++i) {
587 const int colidx = localAmatRow.colidx(i);
588 if (FCol2Layer(colidx) == layer - 1 &&
589 FCol2Dof(colidx) == dofj)
590 val += localAmatRow.value(i);
592 bandMat(row, col - DofsPerNode) = val;
595 if (snode < stencilSize - 1) {
599 for (
int i = 0; i < AmatRowLeng; ++i) {
600 const int colidx = localAmatRow.colidx(i);
601 if (FCol2Layer(colidx) == layer + 1 &&
602 FCol2Dof(colidx) == dofj)
603 val += localAmatRow.value(i);
605 bandMat(row, col + DofsPerNode) = val;
613 namespace KB = KokkosBatched;
614 using lu_type =
typename KB::SerialLU<KB::Algo::LU::Unblocked>;
615 lu_type::invoke(bandMat);
617 typename KB::SerialTrsm<KB::Side::Left, KB::Uplo::Lower,
618 KB::Trans::NoTranspose, KB::Diag::Unit,
619 KB::Algo::Trsm::Unblocked>;
620 trsv_l_type::invoke(one, bandMat, bandSol);
621 using trsv_u_type =
typename KB::SerialTrsm<
622 KB::Side::Left, KB::Uplo::Upper, KB::Trans::NoTranspose,
623 KB::Diag::NonUnit, KB::Algo::Trsm::Unblocked>;
624 trsv_u_type::invoke(one, bandMat, bandSol);
627 for (
int snode = 0; snode < stencilSize; ++snode) {
628 for (
int dofj = 0; dofj < DofsPerNode; ++dofj) {
629 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
630 const int layer = startLayer + snode;
631 const int AmatBlkRow = LineLayer2Node(line, layer);
632 const int AmatRow = AmatBlkRow * DofsPerNode + dofi;
634 const int pptrOffset = CLayerSNode2PptrOffset(clayer, snode);
636 Pptr(AmatRow) + pptrOffset * DofsPerNode + dofj;
639 line * NCLayers + clayer;
640 Pcols(pptr) = col * DofsPerNode + dofj;
641 Pvals(pptr) = bandSol(snode * DofsPerNode + dofi, dofj);
650 RCP<const Map> rowMap = Amat->getRowMap();
651 Xpetra::global_size_t GNdofs = rowMap->getGlobalNumElements();
652 Xpetra::global_size_t itemp = GNdofs / NFLayers;
653 std::vector<size_t> stridingInfo_{(size_t)DofsPerNode};
654 RCP<const Map> coarseMap = StridedMapFactory::Build(
655 rowMap->lib(), NCLayers * itemp, NCLayers * NVertLines * DofsPerNode, 0,
656 stridingInfo_, rowMap->getComm(), -1, 0);
657 P = rcp(
new CrsMatrixWrap(rowMap, coarseMap, 0));
658 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
659 PCrs->setAllValues(Pptr, Pcols, Pvals);
660 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
663 if (Amat->IsView(
"stridedMaps") ==
true)
664 P->CreateView(
"stridedMaps", Amat->getRowMap(
"stridedMaps"), coarseMap);
666 P->CreateView(
"stridedMaps", P->getRangeMap(), coarseMap);
670 MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
671 const int numVectors = fineNullspace->getNumVectors();
672 const auto fineNullspaceView = fineNullspace->getDeviceLocalView(Xpetra::Access::ReadOnly);
673 const auto coarseNullspaceView = coarseNullspace->getDeviceLocalView(Xpetra::Access::ReadWrite);
674 using range_policy = Kokkos::RangePolicy<execution_space>;
675 Kokkos::parallel_for(
676 "MueLu::SemiCoarsenPFactory_kokkos::BuildSemiCoarsenP Inject Nullspace",
677 range_policy(0, NVertLines), KOKKOS_LAMBDA(
const int line) {
678 for (
int clayer = 0; clayer < NCLayers; ++clayer) {
679 const int layer = CLayer2FLayer(clayer);
680 const int AmatBlkRow =
681 LineLayer2Node(line, layer);
683 line * NCLayers + clayer;
684 for (
int k = 0; k < numVectors; ++k) {
685 for (
int dofi = 0; dofi < DofsPerNode; ++dofi) {
686 const int fRow = AmatBlkRow * DofsPerNode + dofi;
687 const int cRow = col * DofsPerNode + dofi;
688 coarseNullspaceView(cRow, k) = fineNullspaceView(fRow, k);
697#define MUELU_SEMICOARSENPFACTORY_KOKKOS_SHORT
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
MueLu::DefaultGlobalOrdinal GlobalOrdinal
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
int GetLevelID() const
Return level number.
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const Teuchos::ParameterEntry & getEntry(const std::string &name)
Returns default entry from the "master" list corresponding to the specified name.
static const NoFactory * get()
Prolongator factory performing semi-coarsening.
Timer to be used in factories. Similar to SubMonitor but adds a timer level by level.
Namespace for MueLu classes and methods.