MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_NotayAggregationFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
47#define MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_
48
49#include <Xpetra_Map.hpp>
50#include <Xpetra_Vector.hpp>
51#include <Xpetra_MultiVectorFactory.hpp>
52#include <Xpetra_MapFactory.hpp>
53#include <Xpetra_VectorFactory.hpp>
54
55#include "KokkosKernels_Handle.hpp"
56#include "KokkosSparse_spgemm.hpp"
57#include "KokkosSparse_spmv.hpp"
58
60
61#include "MueLu_Aggregates.hpp"
62#include "MueLu_GraphBase.hpp"
63#include "MueLu_Level.hpp"
64#include "MueLu_MasterList.hpp"
65#include "MueLu_Monitor.hpp"
66#include "MueLu_Types.hpp"
67#include "MueLu_Utilities.hpp"
68
69#include "MueLu_Utilities_kokkos.hpp"
70
71namespace MueLu {
72
73 namespace NotayUtils {
74 template <class LocalOrdinal>
76 return min + as<LocalOrdinal>((max-min+1) * (static_cast<double>(std::rand()) / (RAND_MAX + 1.0)));
77 }
78
79 template <class LocalOrdinal>
80 void RandomReorder(Teuchos::Array<LocalOrdinal> & list) {
81 typedef LocalOrdinal LO;
82 LO n = Teuchos::as<LO>(list.size());
83 for(LO i = 0; i < n-1; i++)
84 std::swap(list[i], list[RandomOrdinal(i,n-1)]);
85 }
86 }
87
88 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
90 RCP<ParameterList> validParamList = rcp(new ParameterList());
91
92
93#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
94 SET_VALID_ENTRY("aggregation: pairwise: size");
95 SET_VALID_ENTRY("aggregation: pairwise: tie threshold");
96 SET_VALID_ENTRY("aggregation: compute aggregate qualities");
97 SET_VALID_ENTRY("aggregation: Dirichlet threshold");
98 SET_VALID_ENTRY("aggregation: ordering");
99#undef SET_VALID_ENTRY
100
101 // general variables needed in AggregationFactory
102 validParamList->set< RCP<const FactoryBase> >("A", null, "Generating factory of the matrix");
103 validParamList->set< RCP<const FactoryBase> >("Graph", null, "Generating factory of the graph");
104 validParamList->set< RCP<const FactoryBase> >("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'");
105 validParamList->set< RCP<const FactoryBase> >("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'");
106
107
108 return validParamList;
109 }
110
111 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
113 const ParameterList& pL = GetParameterList();
114
115 Input(currentLevel, "A");
116 Input(currentLevel, "Graph");
117 Input(currentLevel, "DofsPerNode");
118 if (pL.get<bool>("aggregation: compute aggregate qualities")) {
119 Input(currentLevel, "AggregateQualities");
120 }
121
122
123 }
124
125
126 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
128 FactoryMonitor m(*this, "Build", currentLevel);
129 using STS = Teuchos::ScalarTraits<Scalar>;
130 using MT = typename STS::magnitudeType;
131
132 const MT MT_TWO = Teuchos::ScalarTraits<MT>::one() + Teuchos::ScalarTraits<MT>::one();
133
134 RCP<Teuchos::FancyOStream> out;
135 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
136 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
137 out->setShowAllFrontMatter(false).setShowProcRank(true);
138 } else {
139 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
140 }
141
142 const ParameterList& pL = GetParameterList();
143
144 const MT kappa = static_cast<MT>(pL.get<double>("aggregation: Dirichlet threshold"));
145 TEUCHOS_TEST_FOR_EXCEPTION(kappa <= MT_TWO,
147 "Pairwise requires kappa > 2"
148 " otherwise all rows are considered as Dirichlet rows.");
149
150 // Parameters
151 int maxNumIter = 3;
152 if (pL.isParameter("aggregation: pairwise: size"))
153 maxNumIter = pL.get<int>("aggregation: pairwise: size");
154 TEUCHOS_TEST_FOR_EXCEPTION(maxNumIter < 1,
156 "NotayAggregationFactory::Build(): \"aggregation: pairwise: size\""
157 " must be a strictly positive integer");
158
159
160 RCP<const GraphBase> graph = Get< RCP<GraphBase> >(currentLevel, "Graph");
161 RCP<const Matrix> A = Get< RCP<Matrix> >(currentLevel, "A");
162
163 // Setup aggregates & aggStat objects
164 RCP<Aggregates> aggregates = rcp(new Aggregates(*graph));
165 aggregates->setObjectLabel("PW");
166
167 const LO numRows = graph->GetNodeNumVertices();
168
169 // construct aggStat information
170 std::vector<unsigned> aggStat(numRows, READY);
171
172
173 const int DofsPerNode = Get<int>(currentLevel,"DofsPerNode");
174 TEUCHOS_TEST_FOR_EXCEPTION(DofsPerNode != 1, Exceptions::RuntimeError,
175 "Pairwise only supports one dof per node");
176
177 // This follows the paper:
178 // Notay, "Aggregation-based algebraic multigrid for convection-diffusion equations",
179 // SISC 34(3), pp. A2288-2316.
180
181 // Handle Ordering
182 std::string orderingStr = pL.get<std::string>("aggregation: ordering");
183 enum {
184 O_NATURAL,
185 O_RANDOM,
186 O_CUTHILL_MCKEE,
187 } ordering;
188
189 ordering = O_NATURAL;
190 if (orderingStr == "random" ) ordering = O_RANDOM;
191 else if(orderingStr == "natural") {}
192 else if(orderingStr == "cuthill-mckee" || orderingStr == "cm") ordering = O_CUTHILL_MCKEE;
193 else {
194 TEUCHOS_TEST_FOR_EXCEPTION(1,Exceptions::RuntimeError,"Invalid ordering type");
195 }
196
197 // Get an ordering vector
198 // NOTE: The orderingVector only orders *rows* of the matrix. Off-proc columns
199 // will get ignored in the aggregation phases, so we don't need to worry about
200 // running off the end.
201 Array<LO> orderingVector(numRows);
202 for (LO i = 0; i < numRows; i++)
203 orderingVector[i] = i;
204 if (ordering == O_RANDOM)
205 MueLu::NotayUtils::RandomReorder(orderingVector);
206 else if (ordering == O_CUTHILL_MCKEE) {
207 RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities_kokkos<SC,LO,GO,NO>::CuthillMcKee(*A);
208 auto localVector = rcmVector->getData(0);
209 for (LO i = 0; i < numRows; i++)
210 orderingVector[i] = localVector[i];
211 }
212
213 // Get the party stated
214 LO numNonAggregatedNodes = numRows, numDirichletNodes = 0;
215 BuildInitialAggregates(pL, A, orderingVector(), kappa,
216 *aggregates, aggStat, numNonAggregatedNodes, numDirichletNodes);
217 TEUCHOS_TEST_FOR_EXCEPTION(0 < numNonAggregatedNodes, Exceptions::RuntimeError,
218 "Initial pairwise aggregation failed to aggregate all nodes");
219 LO numLocalAggregates = aggregates->GetNumAggregates();
220 GetOStream(Statistics0) << "Init : " << numLocalAggregates << " - "
221 << A->getLocalNumRows() / numLocalAggregates << std::endl;
222
223 // Temporary data storage for further aggregation steps
224 local_matrix_type intermediateP;
225 local_matrix_type coarseLocalA;
226
227 // Compute the on rank part of the local matrix
228 // that the square submatrix that only contains
229 // columns corresponding to local rows.
230 LO numLocalDirichletNodes = numDirichletNodes;
231 Array<LO> localVertex2AggId(aggregates->GetVertex2AggId()->getData(0).view(0, numRows));
232 BuildOnRankLocalMatrix(A->getLocalMatrixDevice(), coarseLocalA);
233 for(LO aggregationIter = 1; aggregationIter < maxNumIter; ++aggregationIter) {
234 // Compute the intermediate prolongator
235 BuildIntermediateProlongator(coarseLocalA.numRows(), numLocalDirichletNodes, numLocalAggregates,
236 localVertex2AggId(), intermediateP);
237
238 // Compute the coarse local matrix and coarse row sum
239 BuildCoarseLocalMatrix(intermediateP, coarseLocalA);
240
241 // Directly compute rowsum from A, rather than coarseA
242 row_sum_type rowSum("rowSum", numLocalAggregates);
243 {
244 std::vector<std::vector<LO> > agg2vertex(numLocalAggregates);
245 auto vertex2AggId = aggregates->GetVertex2AggId()->getData(0);
246 for(LO i=0; i<(LO)numRows; i++) {
247 if(aggStat[i] != AGGREGATED)
248 continue;
249 LO agg=vertex2AggId[i];
250 agg2vertex[agg].push_back(i);
251 }
252
253 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
254 for(LO i = 0; i < numRows; i++) {
255 // If not aggregated already, skip this guy
256 if(aggStat[i] != AGGREGATED)
257 continue;
258 int agg = vertex2AggId[i];
259 std::vector<LO> & myagg = agg2vertex[agg];
260
261 size_t nnz = A->getNumEntriesInLocalRow(i);
262 ArrayView<const LO> indices;
263 ArrayView<const SC> vals;
264 A->getLocalRowView(i, indices, vals);
265
266 SC mysum = Teuchos::ScalarTraits<Scalar>::zero();
267 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
268 bool found = false;
269 if(indices[colidx] < numRows) {
270 for(LO j=0; j<(LO)myagg.size(); j++)
271 if (vertex2AggId[indices[colidx]] == agg)
272 found=true;
273 }
274 if(!found) {
275 *out << "- ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
276 mysum += vals[colidx];
277 }
278 else {
279 *out << "- NOT ADDING col "<<indices[colidx]<<" = "<<vals[colidx] << std::endl;
280 }
281 }
282
283 rowSum_h[agg] = mysum;
284 }
285 Kokkos::deep_copy(rowSum, rowSum_h);
286 }
287
288 // Get local orderingVector
289 Array<LO> localOrderingVector(numRows);
290 for (LO i = 0; i < numRows; i++)
291 localOrderingVector[i] = i;
292 if (ordering == O_RANDOM)
293 MueLu::NotayUtils::RandomReorder(localOrderingVector);
294 else if (ordering == O_CUTHILL_MCKEE) {
295 RCP<Xpetra::Vector<LO,LO,GO,NO> > rcmVector = MueLu::Utilities_kokkos<SC,LO,GO,NO>::CuthillMcKee(*A);
296 auto localVector = rcmVector->getData(0);
297 for (LO i = 0; i < numRows; i++)
298 localOrderingVector[i] = localVector[i];
299 }
300
301 // Compute new aggregates
302 numLocalAggregates = 0;
303 numNonAggregatedNodes = static_cast<LO>(coarseLocalA.numRows());
304 std::vector<LO> localAggStat(numNonAggregatedNodes, READY);
305 localVertex2AggId.resize(numNonAggregatedNodes, -1);
306 BuildFurtherAggregates(pL, A, localOrderingVector, coarseLocalA, kappa, rowSum,
307 localAggStat, localVertex2AggId,
308 numLocalAggregates, numNonAggregatedNodes);
309
310 // After the first initial pairwise aggregation
311 // the Dirichlet nodes have been removed.
312 numLocalDirichletNodes = 0;
313
314 // Update the aggregates
315 RCP<LOMultiVector> vertex2AggIdMV = aggregates->GetVertex2AggId();
316 ArrayRCP<LO> vertex2AggId = vertex2AggIdMV->getDataNonConst(0);
317 for(size_t vertexIdx = 0; vertexIdx < A->getLocalNumRows(); ++vertexIdx) {
318 LO oldAggIdx = vertex2AggId[vertexIdx];
319 if(oldAggIdx != Teuchos::OrdinalTraits<LO>::invalid()) {
320 vertex2AggId[vertexIdx] = localVertex2AggId[oldAggIdx];
321 }
322 }
323
324 // We could probably print some better statistics at some point
325 GetOStream(Statistics0) << "Iter " << aggregationIter << ": " << numLocalAggregates << " - "
326 << A->getLocalNumRows() / numLocalAggregates << std::endl;
327 }
328 aggregates->SetNumAggregates(numLocalAggregates);
329 aggregates->AggregatesCrossProcessors(false);
330 aggregates->ComputeAggregateSizes(true/*forceRecompute*/);
331
332 // DO stuff
333 Set(currentLevel, "Aggregates", aggregates);
334 GetOStream(Statistics0) << aggregates->description() << std::endl;
335 }
336
337
338 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
340 BuildInitialAggregates(const Teuchos::ParameterList& params,
341 const RCP<const Matrix>& A,
342 const Teuchos::ArrayView<const LO> & orderingVector,
343 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
344 Aggregates& aggregates,
345 std::vector<unsigned>& aggStat,
346 LO& numNonAggregatedNodes,
347 LO& numDirichletNodes) const {
348
349 Monitor m(*this, "BuildInitialAggregates");
350 using STS = Teuchos::ScalarTraits<Scalar>;
351 using MT = typename STS::magnitudeType;
352 using RealValuedVector = Xpetra::Vector<MT,LocalOrdinal,GlobalOrdinal,Node>;
353
354 RCP<Teuchos::FancyOStream> out;
355 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
356 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
357 out->setShowAllFrontMatter(false).setShowProcRank(true);
358 } else {
359 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
360 }
361
362
363 const SC SC_ZERO = Teuchos::ScalarTraits<SC>::zero();
364 const MT MT_ZERO = Teuchos::ScalarTraits<MT>::zero();
365 const MT MT_ONE = Teuchos::ScalarTraits<MT>::one();
366 const MT MT_TWO = MT_ONE + MT_ONE;
367 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
368 const LO LO_ZERO = Teuchos::OrdinalTraits<LO>::zero();
369
370 const MT kappa_init = kappa / (kappa - MT_TWO);
371 const LO numRows = aggStat.size();
372 const int myRank = A->getMap()->getComm()->getRank();
373
374 // For finding "ties" where we fall back to the ordering. Making this larger than
375 // hard zero substantially increases code robustness.
376 double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
377 double tie_less = 1.0 - tie_criterion;
378 double tie_more = 1.0 + tie_criterion;
379
380 // NOTE: Assumes 1 dof per node. This constraint is enforced in Build(),
381 // and so we're not doing again here.
382 // This should probably be fixed at some point.
383
384 // Extract diagonal, rowsums, etc
385 // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
388 RCP<RealValuedVector> ghostedAbsRowSum = MueLu::Utilities<SC,LO,GO,NO>::GetMatrixOverlappedAbsDeletedRowsum(*A);
389 const ArrayRCP<const SC> D = ghostedDiag->getData(0);
390 const ArrayRCP<const SC> S = ghostedRowSum->getData(0);
391 const ArrayRCP<const MT> AbsRs = ghostedAbsRowSum->getData(0);
392
393 // Aggregates stuff
394 ArrayRCP<LO> vertex2AggId_rcp = aggregates.GetVertex2AggId()->getDataNonConst(0);
395 ArrayRCP<LO> procWinner_rcp = aggregates.GetProcWinner() ->getDataNonConst(0);
396 ArrayView<LO> vertex2AggId = vertex2AggId_rcp();
397 ArrayView<LO> procWinner = procWinner_rcp();
398
399 // Algorithm 4.2
400
401 // 0,1 : Initialize: Flag boundary conditions
402 // Modification: We assume symmetry here aij = aji
403 for (LO row = 0; row < Teuchos::as<LO>(A->getRowMap()->getLocalNumElements()); ++row) {
404 MT aii = STS::magnitude(D[row]);
405 MT rowsum = AbsRs[row];
406
407 if(aii >= kappa_init * rowsum) {
408 *out << "Flagging index " << row << " as dirichlet "
409 "aii >= kappa*rowsum = " << aii << " >= " << kappa_init << " " << rowsum << std::endl;
410 aggStat[row] = IGNORED;
411 --numNonAggregatedNodes;
412 ++numDirichletNodes;
413 }
414 }
415
416
417 // 2 : Iteration
418 LO aggIndex = LO_ZERO;
419 for(LO i = 0; i < numRows; i++) {
420 LO current_idx = orderingVector[i];
421 // If we're aggregated already, skip this guy
422 if(aggStat[current_idx] != READY)
423 continue;
424
425 MT best_mu = MT_ZERO;
426 LO best_idx = LO_INVALID;
427
428 size_t nnz = A->getNumEntriesInLocalRow(current_idx);
429 ArrayView<const LO> indices;
430 ArrayView<const SC> vals;
431 A->getLocalRowView(current_idx, indices, vals);
432
433 MT aii = STS::real(D[current_idx]);
434 MT si = STS::real(S[current_idx]);
435 for (LO colidx = 0; colidx < static_cast<LO>(nnz); colidx++) {
436 // Skip aggregated neighbors, off-rank neighbors, hard zeros and self
437 LO col = indices[colidx];
438 SC val = vals[colidx];
439 if(current_idx == col || col >= numRows || aggStat[col] != READY || val == SC_ZERO)
440 continue;
441
442 MT aij = STS::real(val);
443 MT ajj = STS::real(D[col]);
444 MT sj = - STS::real(S[col]); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
445 if(aii - si + ajj - sj >= MT_ZERO) {
446 // Modification: We assume symmetry here aij = aji
447 MT mu_top = MT_TWO / ( MT_ONE / aii + MT_ONE / ajj);
448 MT mu_bottom = - aij + MT_ONE / ( MT_ONE / (aii - si) + MT_ONE / (ajj - sj) );
449 MT mu = mu_top / mu_bottom;
450
451 // Modification: Explicitly check the tie criterion here
452 if (mu > MT_ZERO && (best_idx == LO_INVALID || mu < best_mu * tie_less ||
453 (mu < best_mu*tie_more && orderingVector[col] < orderingVector[best_idx]))) {
454 best_mu = mu;
455 best_idx = col;
456 *out << "[" << current_idx << "] Column UPDATED " << col << ": "
457 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
458 << " = " << aii - si + ajj - sj<< ", aij = "<<aij << ", mu = " << mu << std::endl;
459 }
460 else {
461 *out << "[" << current_idx << "] Column NOT UPDATED " << col << ": "
462 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
463 << " = " << aii - si + ajj - sj << ", aij = "<<aij<< ", mu = " << mu << std::endl;
464 }
465 }
466 else {
467 *out << "[" << current_idx << "] Column FAILED " << col << ": "
468 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
469 << " = " << aii - si + ajj - sj << std::endl;
470 }
471 }// end column for loop
472
473 if(best_idx == LO_INVALID) {
474 *out << "No node buddy found for index " << current_idx
475 << " [agg " << aggIndex << "]\n" << std::endl;
476 // We found no potential node-buddy, so let's just make this a singleton
477 // NOTE: The behavior of what to do if you have no un-aggregated neighbors is not specified in
478 // the paper
479
480 aggStat[current_idx] = ONEPT;
481 vertex2AggId[current_idx] = aggIndex;
482 procWinner[current_idx] = myRank;
483 numNonAggregatedNodes--;
484 aggIndex++;
485
486 } else {
487 // We have a buddy, so aggregate, either as a singleton or as a pair, depending on mu
488 if(best_mu <= kappa) {
489 *out << "Node buddies (" << current_idx << "," << best_idx << ") [agg " << aggIndex << "]" << std::endl;
490
491 aggStat[current_idx] = AGGREGATED;
492 aggStat[best_idx] = AGGREGATED;
493 vertex2AggId[current_idx] = aggIndex;
494 vertex2AggId[best_idx] = aggIndex;
495 procWinner[current_idx] = myRank;
496 procWinner[best_idx] = myRank;
497 numNonAggregatedNodes-=2;
498 aggIndex++;
499
500 } else {
501 *out << "No buddy found for index " << current_idx << ","
502 " but aggregating as singleton [agg " << aggIndex << "]" << std::endl;
503
504 aggStat[current_idx] = ONEPT;
505 vertex2AggId[current_idx] = aggIndex;
506 procWinner[current_idx] = myRank;
507 numNonAggregatedNodes--;
508 aggIndex++;
509 } // best_mu
510 } // best_idx
511 }// end Algorithm 4.2
512
513 *out << "vertex2aggid :";
514 for(int i = 0; i < static_cast<int>(vertex2AggId.size()); ++i) {
515 *out << i << "(" << vertex2AggId[i] << ")";
516 }
517 *out << std::endl;
518
519 // update aggregate object
520 aggregates.SetNumAggregates(aggIndex);
521 } // BuildInitialAggregates
522
523 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
525 BuildFurtherAggregates(const Teuchos::ParameterList& params,
526 const RCP<const Matrix>& A,
527 const Teuchos::ArrayView<const LO> & orderingVector,
528 const typename Matrix::local_matrix_type& coarseA,
529 const typename Teuchos::ScalarTraits<Scalar>::magnitudeType kappa,
530 const Kokkos::View<typename Kokkos::ArithTraits<Scalar>::val_type*,
531 Kokkos::LayoutLeft,
532 typename Matrix::local_matrix_type::device_type>& rowSum,
533 std::vector<LocalOrdinal>& localAggStat,
534 Teuchos::Array<LocalOrdinal>& localVertex2AggID,
535 LO& numLocalAggregates,
536 LO& numNonAggregatedNodes) const {
537 Monitor m(*this, "BuildFurtherAggregates");
538
539 // Set debug outputs based on environment variable
540 RCP<Teuchos::FancyOStream> out;
541 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
542 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
543 out->setShowAllFrontMatter(false).setShowProcRank(true);
544 } else {
545 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
546 }
547
548 using value_type = typename local_matrix_type::value_type;
549 const value_type KAT_zero = Kokkos::ArithTraits<value_type>::zero();
550 const magnitude_type MT_zero = Teuchos::ScalarTraits<magnitude_type>::zero();
551 const magnitude_type MT_one = Teuchos::ScalarTraits<magnitude_type>::one();
552 const magnitude_type MT_two = MT_one + MT_one;
553 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid() ;
554
555 // For finding "ties" where we fall back to the ordering. Making this larger than
556 // hard zero substantially increases code robustness.
557 double tie_criterion = params.get<double>("aggregation: pairwise: tie threshold");
558 double tie_less = 1.0 - tie_criterion;
559 double tie_more = 1.0 + tie_criterion;
560
561 typename row_sum_type::HostMirror rowSum_h = Kokkos::create_mirror_view(rowSum);
562 Kokkos::deep_copy(rowSum_h, rowSum);
563
564 // Extracting the diagonal of a KokkosSparse::CrsMatrix
565 // is not currently provided in kokkos-kernels so here
566 // is an ugly way to get that done...
567 const LO numRows = static_cast<LO>(coarseA.numRows());
568 typename local_matrix_type::values_type::HostMirror diagA_h("diagA host", numRows);
569 typename local_matrix_type::row_map_type::HostMirror row_map_h
570 = Kokkos::create_mirror_view(coarseA.graph.row_map);
571 Kokkos::deep_copy(row_map_h, coarseA.graph.row_map);
572 typename local_matrix_type::index_type::HostMirror entries_h
573 = Kokkos::create_mirror_view(coarseA.graph.entries);
574 Kokkos::deep_copy(entries_h, coarseA.graph.entries);
575 typename local_matrix_type::values_type::HostMirror values_h
576 = Kokkos::create_mirror_view(coarseA.values);
577 Kokkos::deep_copy(values_h, coarseA.values);
578 for(LO rowIdx = 0; rowIdx < numRows; ++rowIdx) {
579 for(LO entryIdx = static_cast<LO>(row_map_h(rowIdx));
580 entryIdx < static_cast<LO>(row_map_h(rowIdx + 1));
581 ++entryIdx) {
582 if(rowIdx == static_cast<LO>(entries_h(entryIdx))) {
583 diagA_h(rowIdx) = values_h(entryIdx);
584 }
585 }
586 }
587
588 for(LO currentIdx = 0; currentIdx < numRows; ++currentIdx) {
589 if(localAggStat[currentIdx] != READY) {
590 continue;
591 }
592
593 LO bestIdx = Teuchos::OrdinalTraits<LO>::invalid();
594 magnitude_type best_mu = Teuchos::ScalarTraits<magnitude_type>::zero();
595 const magnitude_type aii = Teuchos::ScalarTraits<value_type>::real(diagA_h(currentIdx));
596 const magnitude_type si = Teuchos::ScalarTraits<value_type>::real(rowSum_h(currentIdx));
597 for(auto entryIdx = row_map_h(currentIdx); entryIdx < row_map_h(currentIdx + 1); ++entryIdx) {
598 const LO colIdx = static_cast<LO>(entries_h(entryIdx));
599 if(currentIdx == colIdx || colIdx >= numRows || localAggStat[colIdx] != READY || values_h(entryIdx) == KAT_zero) {
600 continue;
601 }
602
603 const magnitude_type aij = Teuchos::ScalarTraits<value_type>::real(values_h(entryIdx));
604 const magnitude_type ajj = Teuchos::ScalarTraits<value_type>::real(diagA_h(colIdx));
605 const magnitude_type sj = - Teuchos::ScalarTraits<value_type>::real(rowSum_h(colIdx)); // NOTE: The ghostedRowSum vector here has has the sign flipped from Notay's S
606 if(aii - si + ajj - sj >= MT_zero) {
607 const magnitude_type mu_top = MT_two / ( MT_one/aii + MT_one/ajj );
608 const magnitude_type mu_bottom = -aij + MT_one / (MT_one / (aii - si) + MT_one / (ajj - sj));
609 const magnitude_type mu = mu_top / mu_bottom;
610
611 // Modification: Explicitly check the tie criterion here
612 if (mu > MT_zero && (bestIdx == LO_INVALID || mu < best_mu * tie_less ||
613 (mu < best_mu*tie_more && orderingVector[colIdx] < orderingVector[bestIdx]))) {
614 best_mu = mu;
615 bestIdx = colIdx;
616 *out << "[" << currentIdx << "] Column UPDATED " << colIdx << ": "
617 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
618 << " = " << aii - si + ajj - sj << ", aij = "<<aij<<" mu = " << mu << std::endl;
619 }
620 else {
621 *out << "[" << currentIdx << "] Column NOT UPDATED " << colIdx << ": "
622 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
623 << " = " << aii - si + ajj - sj << ", aij = "<<aij<<", mu = " << mu << std::endl;
624 }
625 } else {
626 *out << "[" << currentIdx << "] Column FAILED " << colIdx << ": "
627 << "aii - si + ajj - sj = " << aii << " - " << si << " + " << ajj << " - " << sj
628 << " = " << aii - si + ajj - sj << std::endl;
629 }
630 } // end loop over row entries
631
632 if(bestIdx == Teuchos::OrdinalTraits<LO>::invalid()) {
633 localAggStat[currentIdx] = ONEPT;
634 localVertex2AggID[currentIdx] = numLocalAggregates;
635 --numNonAggregatedNodes;
636 ++numLocalAggregates;
637 } else {
638 if(best_mu <= kappa) {
639 *out << "Node buddies (" << currentIdx << "," << bestIdx << ") [agg " << numLocalAggregates << "]" << std::endl;
640
641 localAggStat[currentIdx] = AGGREGATED;
642 localVertex2AggID[currentIdx] = numLocalAggregates;
643 --numNonAggregatedNodes;
644
645 localAggStat[bestIdx] = AGGREGATED;
646 localVertex2AggID[bestIdx] = numLocalAggregates;
647 --numNonAggregatedNodes;
648
649 ++numLocalAggregates;
650 } else {
651 *out << "No buddy found for index " << currentIdx << ","
652 " but aggregating as singleton [agg " << numLocalAggregates << "]" << std::endl;
653
654 localAggStat[currentIdx] = ONEPT;
655 localVertex2AggID[currentIdx] = numLocalAggregates;
656 --numNonAggregatedNodes;
657 ++numLocalAggregates;
658 }
659 }
660 } // end loop over matrix rows
661
662 } // BuildFurtherAggregates
663
664 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
666 BuildOnRankLocalMatrix(const typename Matrix::local_matrix_type& localA,
667 typename Matrix::local_matrix_type& onrankA) const {
668 Monitor m(*this, "BuildOnRankLocalMatrix");
669
670 // Set debug outputs based on environment variable
671 RCP<Teuchos::FancyOStream> out;
672 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
673 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
674 out->setShowAllFrontMatter(false).setShowProcRank(true);
675 } else {
676 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
677 }
678
679 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
680 using values_type = typename local_matrix_type::values_type;
681 using size_type = typename local_graph_type::size_type;
682 using col_index_type = typename local_graph_type::data_type;
683 using array_layout = typename local_graph_type::array_layout;
684 using memory_traits = typename local_graph_type::memory_traits;
685 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
686 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
687 // Extract on rank part of A
688 // Simply check that the column index is less than the number of local rows
689 // otherwise remove it.
690
691 const int numRows = static_cast<int>(localA.numRows());
692 row_pointer_type rowPtr("onrankA row pointer", numRows + 1);
693 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
694 typename local_graph_type::row_map_type::HostMirror origRowPtr_h
695 = Kokkos::create_mirror_view(localA.graph.row_map);
696 typename local_graph_type::entries_type::HostMirror origColind_h
697 = Kokkos::create_mirror_view(localA.graph.entries);
698 typename values_type::HostMirror origValues_h
699 = Kokkos::create_mirror_view(localA.values);
700 Kokkos::deep_copy(origRowPtr_h, localA.graph.row_map);
701 Kokkos::deep_copy(origColind_h, localA.graph.entries);
702 Kokkos::deep_copy(origValues_h, localA.values);
703
704 // Compute the number of nnz entries per row
705 rowPtr_h(0) = 0;
706 for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
707 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
708 if(origColind_h(entryIdx) < numRows) {rowPtr_h(rowIdx + 1) += 1;}
709 }
710 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx + 1) + rowPtr_h(rowIdx);
711 }
712 Kokkos::deep_copy(rowPtr, rowPtr_h);
713
714 const LO nnzOnrankA = rowPtr_h(numRows);
715
716 // Now use nnz per row to allocate matrix views and store column indices and values
717 col_indices_type colInd("onrankA column indices", rowPtr_h(numRows));
718 values_type values("onrankA values", rowPtr_h(numRows));
719 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
720 typename values_type::HostMirror values_h = Kokkos::create_mirror_view(values);
721 int entriesInRow;
722 for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
723 entriesInRow = 0;
724 for(size_type entryIdx = origRowPtr_h(rowIdx); entryIdx < origRowPtr_h(rowIdx + 1); ++entryIdx) {
725 if(origColind_h(entryIdx) < numRows) {
726 colInd_h(rowPtr_h(rowIdx) + entriesInRow) = origColind_h(entryIdx);
727 values_h(rowPtr_h(rowIdx) + entriesInRow) = origValues_h(entryIdx);
728 ++entriesInRow;
729 }
730 }
731 }
732 Kokkos::deep_copy(colInd, colInd_h);
733 Kokkos::deep_copy(values, values_h);
734
735 onrankA = local_matrix_type("onrankA", numRows, numRows,
736 nnzOnrankA, values, rowPtr, colInd);
737 }
738
739 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
742 const LocalOrdinal numDirichletNodes,
743 const LocalOrdinal numLocalAggregates,
744 const Teuchos::ArrayView<const LocalOrdinal>& localVertex2AggID,
745 typename Matrix::local_matrix_type& intermediateP) const {
746 Monitor m(*this, "BuildIntermediateProlongator");
747
748 // Set debug outputs based on environment variable
749 RCP<Teuchos::FancyOStream> out;
750 if(const char* dbg = std::getenv("MUELU_PAIRWISEAGGREGATION_DEBUG")) {
751 out = Teuchos::fancyOStream(Teuchos::rcpFromRef(std::cout));
752 out->setShowAllFrontMatter(false).setShowProcRank(true);
753 } else {
754 out = Teuchos::getFancyOStream(rcp(new Teuchos::oblackholestream()));
755 }
756
757 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
758 using values_type = typename local_matrix_type::values_type;
759 using size_type = typename local_graph_type::size_type;
760 using col_index_type = typename local_graph_type::data_type;
761 using array_layout = typename local_graph_type::array_layout;
762 using memory_traits = typename local_graph_type::memory_traits;
763 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
764 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
765
766 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
767
768 const int intermediatePnnz = numRows - numDirichletNodes;
769 row_pointer_type rowPtr("intermediateP row pointer", numRows + 1);
770 col_indices_type colInd("intermediateP column indices", intermediatePnnz);
771 values_type values("intermediateP values", intermediatePnnz);
772 typename row_pointer_type::HostMirror rowPtr_h = Kokkos::create_mirror_view(rowPtr);
773 typename col_indices_type::HostMirror colInd_h = Kokkos::create_mirror_view(colInd);
774
775 rowPtr_h(0) = 0;
776 for(int rowIdx = 0; rowIdx < numRows; ++rowIdx) {
777 // Skip Dirichlet nodes
778 if(localVertex2AggID[rowIdx] == LO_INVALID) {
779 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx);
780 } else {
781 rowPtr_h(rowIdx + 1) = rowPtr_h(rowIdx) + 1;
782 colInd_h(rowPtr_h(rowIdx)) = localVertex2AggID[rowIdx];
783 }
784 }
785
786 Kokkos::deep_copy(rowPtr, rowPtr_h);
787 Kokkos::deep_copy(colInd, colInd_h);
788 Kokkos::deep_copy(values, Kokkos::ArithTraits<typename values_type::value_type>::one());
789
790 intermediateP = local_matrix_type("intermediateP",
791 numRows, numLocalAggregates, intermediatePnnz,
792 values, rowPtr, colInd);
793 } // BuildIntermediateProlongator
794
795 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
797 BuildCoarseLocalMatrix(const typename Matrix::local_matrix_type& intermediateP,
798 typename Matrix::local_matrix_type& coarseA) const {
799 Monitor m(*this, "BuildCoarseLocalMatrix");
800
801 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
802 using values_type = typename local_matrix_type::values_type;
803 using size_type = typename local_graph_type::size_type;
804 using col_index_type = typename local_graph_type::data_type;
805 using array_layout = typename local_graph_type::array_layout;
806 using memory_traits = typename local_graph_type::memory_traits;
807 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
808 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
809
811 localSpGEMM(coarseA, intermediateP, "AP", AP);
812
813 // Note 03/11/20, lbv: does kh need to destroy and recreate the spgemm handle
814 // I am not sure but doing it for safety in case it stashes data from the previous
815 // spgemm computation...
816
817 // Compute Ac = Pt * AP
818 // Two steps needed:
819 // 1. compute Pt
820 // 2. perform multiplication
821
822 // Step 1 compute Pt
823 // Obviously this requires the same amount of storage as P except for the rowPtr
824 row_pointer_type rowPtrPt(Kokkos::ViewAllocateWithoutInitializing("Pt row pointer"),
825 intermediateP.numCols() + 1);
826 col_indices_type colIndPt(Kokkos::ViewAllocateWithoutInitializing("Pt column indices"),
827 intermediateP.nnz());
828 values_type valuesPt(Kokkos::ViewAllocateWithoutInitializing("Pt values"),
829 intermediateP.nnz());
830
831 typename row_pointer_type::HostMirror rowPtrPt_h = Kokkos::create_mirror_view(rowPtrPt);
832 typename col_indices_type::HostMirror entries_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
833 Kokkos::deep_copy(entries_h, intermediateP.graph.entries);
834 Kokkos::deep_copy(rowPtrPt_h, 0);
835 for(size_type entryIdx = 0; entryIdx < intermediateP.nnz(); ++entryIdx) {
836 rowPtrPt_h(entries_h(entryIdx) + 1) += 1;
837 }
838 for(LO rowIdx = 0; rowIdx < intermediateP.numCols(); ++rowIdx) {
839 rowPtrPt_h(rowIdx + 1) += rowPtrPt_h(rowIdx);
840 }
841 Kokkos::deep_copy(rowPtrPt, rowPtrPt_h);
842
843 typename row_pointer_type::HostMirror rowPtrP_h = Kokkos::create_mirror_view(intermediateP.graph.row_map);
844 Kokkos::deep_copy(rowPtrP_h, intermediateP.graph.row_map);
845 typename col_indices_type::HostMirror colIndP_h = Kokkos::create_mirror_view(intermediateP.graph.entries);
846 Kokkos::deep_copy(colIndP_h, intermediateP.graph.entries);
847 typename values_type::HostMirror valuesP_h = Kokkos::create_mirror_view(intermediateP.values);
848 Kokkos::deep_copy(valuesP_h, intermediateP.values);
849 typename col_indices_type::HostMirror colIndPt_h = Kokkos::create_mirror_view(colIndPt);
850 typename values_type::HostMirror valuesPt_h = Kokkos::create_mirror_view(valuesPt);
851 const col_index_type invalidColumnIndex = KokkosSparse::OrdinalTraits<col_index_type>::invalid();
852 Kokkos::deep_copy(colIndPt_h, invalidColumnIndex);
853
854 col_index_type colIdx = 0;
855 for(LO rowIdx = 0; rowIdx < intermediateP.numRows(); ++rowIdx) {
856 for(size_type entryIdxP = rowPtrP_h(rowIdx); entryIdxP < rowPtrP_h(rowIdx + 1); ++entryIdxP) {
857 colIdx = entries_h(entryIdxP);
858 for(size_type entryIdxPt = rowPtrPt_h(colIdx); entryIdxPt < rowPtrPt_h(colIdx + 1); ++entryIdxPt) {
859 if(colIndPt_h(entryIdxPt) == invalidColumnIndex) {
860 colIndPt_h(entryIdxPt) = rowIdx;
861 valuesPt_h(entryIdxPt) = valuesP_h(entryIdxP);
862 break;
863 }
864 } // Loop over entries in row of Pt
865 } // Loop over entries in row of P
866 } // Loop over rows of P
867
868 Kokkos::deep_copy(colIndPt, colIndPt_h);
869 Kokkos::deep_copy(valuesPt, valuesPt_h);
870
871
872 local_matrix_type intermediatePt("intermediatePt",
873 intermediateP.numCols(),
874 intermediateP.numRows(),
875 intermediateP.nnz(),
876 valuesPt, rowPtrPt, colIndPt);
877
878 // Create views for coarseA matrix
879 localSpGEMM(intermediatePt, AP, "coarseA", coarseA);
880 } // BuildCoarseLocalMatrix
881
882 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
884 localSpGEMM(const typename Matrix::local_matrix_type& A,
885 const typename Matrix::local_matrix_type& B,
886 const std::string matrixLabel,
887 typename Matrix::local_matrix_type& C) const {
888
889 using local_graph_type = typename local_matrix_type::staticcrsgraph_type;
890 using values_type = typename local_matrix_type::values_type;
891 using size_type = typename local_graph_type::size_type;
892 using col_index_type = typename local_graph_type::data_type;
893 using array_layout = typename local_graph_type::array_layout;
894 using memory_space = typename device_type::memory_space;
895 using memory_traits = typename local_graph_type::memory_traits;
896 using row_pointer_type = Kokkos::View<size_type*, array_layout, device_type, memory_traits>;
897 using col_indices_type = Kokkos::View<col_index_type*, array_layout, device_type, memory_traits>;
898
899 // Options
900 int team_work_size = 16;
901 std::string myalg("SPGEMM_KK_MEMORY");
902 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
903 KokkosKernels::Experimental::KokkosKernelsHandle<typename row_pointer_type::const_value_type,
904 typename col_indices_type::const_value_type,
905 typename values_type::const_value_type,
907 memory_space,
908 memory_space> kh;
909 kh.create_spgemm_handle(alg_enum);
910 kh.set_team_work_size(team_work_size);
911
912 // Create views for AP matrix
913 row_pointer_type rowPtrC(Kokkos::ViewAllocateWithoutInitializing("C row pointer"),
914 A.numRows() + 1);
915 col_indices_type colIndC;
916 values_type valuesC;
917
918 // Symbolic multiplication
919 KokkosSparse::Experimental::spgemm_symbolic(&kh, A.numRows(),
920 B.numRows(), B.numCols(),
921 A.graph.row_map, A.graph.entries, false,
922 B.graph.row_map, B.graph.entries, false,
923 rowPtrC);
924
925 // allocate column indices and values of AP
926 size_t nnzC = kh.get_spgemm_handle()->get_c_nnz();
927 if (nnzC) {
928 colIndC = col_indices_type(Kokkos::ViewAllocateWithoutInitializing("C column inds"), nnzC);
929 valuesC = values_type(Kokkos::ViewAllocateWithoutInitializing("C values"), nnzC);
930 }
931
932 // Numeric multiplication
933 KokkosSparse::Experimental::spgemm_numeric(&kh, A.numRows(),
934 B.numRows(), B.numCols(),
935 A.graph.row_map, A.graph.entries, A.values, false,
936 B.graph.row_map, B.graph.entries, B.values, false,
937 rowPtrC, colIndC, valuesC);
938 kh.destroy_spgemm_handle();
939
940 C = local_matrix_type(matrixLabel, A.numRows(), B.numCols(), nnzC, valuesC, rowPtrC, colIndC);
941
942 } // localSpGEMM
943
944} //namespace MueLu
945
946#endif /* MUELU_NOTAYAGGREGATIONFACTORY_DEF_HPP_ */
#define SET_VALID_ENTRY(name)
MueLu::DefaultLocalOrdinal LocalOrdinal
Container class for aggregation information.
const RCP< LOMultiVector > & GetVertex2AggId() const
Returns constant vector that maps local node IDs to local aggregates IDs.
const RCP< LOVector > & GetProcWinner() const
Returns constant vector that maps local node IDs to owning processor IDs.
void SetNumAggregates(LO nAggregates)
Set number of local aggregates on current processor.
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
Timer to be used in non-factories.
typename Kokkos::View< impl_scalar_type *, Kokkos::LayoutLeft, device_type > row_sum_type
void BuildInitialAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const ArrayView< const LO > &orderingVector, const magnitude_type kappa, Aggregates &aggregates, std::vector< unsigned > &aggStat, LO &numNonAggregatedNodes, LO &numDirichletNodes) const
Initial aggregation phase.
void BuildFurtherAggregates(const Teuchos::ParameterList &params, const RCP< const Matrix > &A, const Teuchos::ArrayView< const LO > &orderingVector, const local_matrix_type &coarseA, const magnitude_type kappa, const row_sum_type &rowSum, std::vector< LO > &localAggStat, Array< LO > &localVertex2AggID, LO &numLocalAggregates, LO &numNonAggregatedNodes) const
Further aggregation phase increases coarsening rate by a factor of ~2 per iteration.
typename Teuchos::ScalarTraits< Scalar >::magnitudeType magnitude_type
void BuildIntermediateProlongator(const LO numRows, const LO numDirichletNodes, const LO numLocalAggregates, const ArrayView< const LO > &localVertex2AggID, local_matrix_type &intermediateP) const
Construction of a local prolongator with values equal to 1.0.
void BuildCoarseLocalMatrix(const local_matrix_type &intermediateP, local_matrix_type &coarseA) const
Implementation of a local Galerkin projection called inside BuildFurtherAggregates.
void BuildOnRankLocalMatrix(const local_matrix_type &localA, local_matrix_type &onRankA) const
typename device_type::execution_space execution_space
typename Matrix::local_matrix_type local_matrix_type
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
void DeclareInput(Level &currentLevel) const
Input.
void Build(Level &currentLevel) const
Build aggregates.
void localSpGEMM(const local_matrix_type &A, const local_matrix_type &B, const std::string matrixLabel, local_matrix_type &C) const
Wrapper for kokkos-kernels' spgemm that takes in CrsMatrix.
static RCP< Vector > GetMatrixOverlappedDeletedRowsum(const Matrix &A)
Extract Overlapped Matrix Deleted Rowsum.
static RCP< Xpetra::Vector< Magnitude, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedAbsDeletedRowsum(const Matrix &A)
static RCP< Xpetra::Vector< LocalOrdinal, LocalOrdinal, GlobalOrdinal, Node > > CuthillMcKee(const Matrix &Op)
static RCP< Xpetra::Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > GetMatrixOverlappedDiagonal(const Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A)
void RandomReorder(Teuchos::Array< LocalOrdinal > &list)
LocalOrdinal RandomOrdinal(LocalOrdinal min, LocalOrdinal max)
Namespace for MueLu classes and methods.
@ Statistics0
Print statistics that do not involve significant additional computation.
@ AGGREGATED
Definition: MueLu_Types.hpp:73