MueLu Version of the Day
Loading...
Searching...
No Matches
MueLu_SemiCoarsenPFactory_def.hpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// MueLu: A package for multigrid based preconditioning
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact
39// Jonathan Hu (jhu@sandia.gov)
40// Andrey Prokopenko (aprokop@sandia.gov)
41// Ray Tuminaro (rstumin@sandia.gov)
42//
43// ***********************************************************************
44//
45// @HEADER
46#ifndef MUELU_SEMICOARSENPFACTORY_DEF_HPP
47#define MUELU_SEMICOARSENPFACTORY_DEF_HPP
48
49#include <stdlib.h>
50
51#include <Teuchos_LAPACK.hpp>
52
53#include <Xpetra_CrsMatrixWrap.hpp>
54#include <Xpetra_ImportFactory.hpp>
55#include <Xpetra_Matrix.hpp>
56#include <Xpetra_MultiVectorFactory.hpp>
57#include <Xpetra_VectorFactory.hpp>
58
61
62#include "MueLu_MasterList.hpp"
63#include "MueLu_Monitor.hpp"
64
65namespace MueLu {
66
67 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
69 RCP<ParameterList> validParamList = rcp(new ParameterList());
70
71#define SET_VALID_ENTRY(name) validParamList->setEntry(name, MasterList::getEntry(name))
72 SET_VALID_ENTRY("semicoarsen: piecewise constant");
73 SET_VALID_ENTRY("semicoarsen: piecewise linear");
74 SET_VALID_ENTRY("semicoarsen: coarsen rate");
75 SET_VALID_ENTRY("semicoarsen: calculate nonsym restriction");
76#undef SET_VALID_ENTRY
77 validParamList->set< RCP<const FactoryBase> >("A", Teuchos::null, "Generating factory of the matrix A");
78 validParamList->set< RCP<const FactoryBase> >("Nullspace", Teuchos::null, "Generating factory of the nullspace");
79 validParamList->set< RCP<const FactoryBase> >("Coordinates", Teuchos::null, "Generating factory for coordinates");
80
81 validParamList->set< RCP<const FactoryBase> >("LineDetection_VertLineIds", Teuchos::null, "Generating factory for LineDetection vertical line ids");
82 validParamList->set< RCP<const FactoryBase> >("LineDetection_Layers", Teuchos::null, "Generating factory for LineDetection layer ids");
83 validParamList->set< RCP<const FactoryBase> >("CoarseNumZLayers", Teuchos::null, "Generating factory for number of coarse z-layers");
84
85 return validParamList;
86 }
87
88 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
90 Input(fineLevel, "A");
91 Input(fineLevel, "Nullspace");
92
93 Input(fineLevel, "LineDetection_VertLineIds");
94 Input(fineLevel, "LineDetection_Layers");
95 Input(fineLevel, "CoarseNumZLayers");
96
97 // check whether fine level coordinate information is available.
98 // If yes, request the fine level coordinates and generate coarse coordinates
99 // during the Build call
100 if (fineLevel.GetLevelID() == 0) {
101 if (fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
102 fineLevel.DeclareInput("Coordinates", NoFactory::get(), this);
103 bTransferCoordinates_ = true;
104 }
105 } else if (bTransferCoordinates_ == true){
106 // on coarser levels we check the default factory providing "Coordinates"
107 // or the factory declared to provide "Coordinates"
108 // first, check which factory is providing coordinate information
109 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
110 if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
111 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
112 fineLevel.DeclareInput("Coordinates", myCoordsFact.get(), this);
113 }
114 }
115 }
116
117 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
119 return BuildP(fineLevel, coarseLevel);
120 }
121
122 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
124 FactoryMonitor m(*this, "Build", coarseLevel);
125
126 // obtain general variables
127 RCP<Matrix> A = Get< RCP<Matrix> > (fineLevel, "A");
128 RCP<MultiVector> fineNullspace = Get< RCP<MultiVector> > (fineLevel, "Nullspace");
129
130 // get user-provided coarsening rate parameter (constant over all levels)
131 const ParameterList& pL = GetParameterList();
132 LO CoarsenRate = as<LO>(pL.get<int>("semicoarsen: coarsen rate"));
133 bool buildRestriction = pL.get<bool>("semicoarsen: calculate nonsym restriction");
134 bool doLinear = pL.get<bool>("semicoarsen: piecewise linear");
135
136 // collect general input data
137 LO BlkSize = A->GetFixedBlockSize();
138 RCP<const Map> rowMap = A->getRowMap();
139 LO Ndofs = rowMap->getLocalNumElements();
140 LO Nnodes = Ndofs/BlkSize;
141
142 // collect line detection information generated by the LineDetectionFactory instance
143 LO FineNumZLayers = Get< LO >(fineLevel, "CoarseNumZLayers");
144 Teuchos::ArrayRCP<LO> TVertLineId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_VertLineIds");
145 Teuchos::ArrayRCP<LO> TLayerId = Get< Teuchos::ArrayRCP<LO> > (fineLevel, "LineDetection_Layers");
146 LO* VertLineId = TVertLineId.getRawPtr();
147 LO* LayerId = TLayerId.getRawPtr();
148
149 // generate transfer operator with semicoarsening
150 RCP<const Map> theCoarseMap;
151 RCP<Matrix> P, R;
152 RCP<MultiVector> coarseNullspace;
153 GO Ncoarse = MakeSemiCoarsenP(Nnodes,FineNumZLayers,CoarsenRate,LayerId,VertLineId,
154 BlkSize, A, P, theCoarseMap, fineNullspace,coarseNullspace,R,buildRestriction,doLinear);
155
156 // set StridingInformation of P
157 if (A->IsView("stridedMaps") == true)
158 P->CreateView("stridedMaps", A->getRowMap("stridedMaps"), theCoarseMap);
159 else
160 P->CreateView("stridedMaps", P->getRangeMap(), theCoarseMap);
161
162 if (buildRestriction) {
163 if (A->IsView("stridedMaps") == true)
164 R->CreateView("stridedMaps", theCoarseMap, A->getRowMap("stridedMaps"));
165 else
166 R->CreateView("stridedMaps", theCoarseMap, R->getDomainMap());
167 }
168 if (pL.get<bool>("semicoarsen: piecewise constant")) {
169 TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise constant.");
170 RevertToPieceWiseConstant(P, BlkSize);
171 }
172 if (pL.get<bool>("semicoarsen: piecewise linear")) {
173 TEUCHOS_TEST_FOR_EXCEPTION(buildRestriction, Exceptions::RuntimeError, "Cannot use calculate nonsym restriction with piecewise linear.");
174 TEUCHOS_TEST_FOR_EXCEPTION(pL.get<bool>("semicoarsen: piecewise constant"), Exceptions::RuntimeError, "Cannot use piecewise constant with piecewise linear.");
175 }
176
177 // Store number of coarse z-layers on the coarse level container
178 // This information is used by the LineDetectionAlgorithm
179 // TODO get rid of the NoFactory
180
181 LO CoarseNumZLayers = FineNumZLayers*Ncoarse;
182 CoarseNumZLayers /= Ndofs;
183 coarseLevel.Set("NumZLayers", Teuchos::as<LO>(CoarseNumZLayers), MueLu::NoFactory::get());
184
185 // store semicoarsening transfer on coarse level
186 Set(coarseLevel, "P", P);
187 if (buildRestriction) Set(coarseLevel, "RfromPfactory", R);
188
189 Set(coarseLevel, "Nullspace", coarseNullspace);
190
191 // transfer coordinates
192 if(bTransferCoordinates_) {
193 typedef Xpetra::MultiVector<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO> xdMV;
194 RCP<xdMV> fineCoords = Teuchos::null;
195 if (fineLevel.GetLevelID() == 0 &&
196 fineLevel.IsAvailable("Coordinates", NoFactory::get())) {
197 fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", NoFactory::get());
198 } else {
199 RCP<const FactoryBase> myCoordsFact = GetFactory("Coordinates");
200 if (myCoordsFact == Teuchos::null) { myCoordsFact = fineLevel.GetFactoryManager()->GetFactory("Coordinates"); }
201 if (fineLevel.IsAvailable("Coordinates", myCoordsFact.get())) {
202 fineCoords = fineLevel.Get< RCP<xdMV> >("Coordinates", myCoordsFact.get());
203 }
204 }
205
206 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords==Teuchos::null, Exceptions::RuntimeError, "No Coordinates found provided by the user.");
207
208 TEUCHOS_TEST_FOR_EXCEPTION(fineCoords->getNumVectors() != 3, Exceptions::RuntimeError, "Three coordinates arrays must be supplied if line detection orientation not given.");
209 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> x = fineCoords->getDataNonConst(0);
210 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> y = fineCoords->getDataNonConst(1);
211 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> z = fineCoords->getDataNonConst(2);
212
213 // determine the maximum and minimum z coordinate value on the current processor.
214 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_max = -Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
215 typename Teuchos::ScalarTraits<Scalar>::coordinateType zval_min = Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::one() / Teuchos::ScalarTraits<typename Teuchos::ScalarTraits<Scalar>::coordinateType>::sfmin();
216 for ( auto it = z.begin(); it != z.end(); ++it) {
217 if(*it > zval_max) zval_max = *it;
218 if(*it < zval_min) zval_min = *it;
219 }
220
221 LO myCoarseZLayers = Teuchos::as<LO>(CoarseNumZLayers);
222
223 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> myZLayerCoords = Teuchos::arcp<typename Teuchos::ScalarTraits<Scalar>::coordinateType>(myCoarseZLayers);
224 if(myCoarseZLayers == 1) {
225 myZLayerCoords[0] = zval_min;
226 } else {
227 typename Teuchos::ScalarTraits<Scalar>::coordinateType dz = (zval_max-zval_min)/(myCoarseZLayers-1);
228 for(LO k = 0; k<myCoarseZLayers; ++k) {
229 myZLayerCoords[k] = k*dz;
230 }
231 }
232
233 // Note, that the coarse level node coordinates have to be in vertical ordering according
234 // to the numbering of the vertical lines
235
236 // number of vertical lines on current node:
237 LO numVertLines = Nnodes / FineNumZLayers;
238 LO numLocalCoarseNodes = numVertLines * myCoarseZLayers;
239
240 RCP<const Map> coarseCoordMap =
241 MapFactory::Build (fineCoords->getMap()->lib(),
242 Teuchos::OrdinalTraits<Xpetra::global_size_t>::invalid(),
243 Teuchos::as<size_t>(numLocalCoarseNodes),
244 fineCoords->getMap()->getIndexBase(),
245 fineCoords->getMap()->getComm());
246 RCP<xdMV> coarseCoords = Xpetra::MultiVectorFactory<typename Teuchos::ScalarTraits<Scalar>::coordinateType,LO,GO,NO>::Build(coarseCoordMap, fineCoords->getNumVectors());
247 coarseCoords->putScalar(-1.0);
248 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cx = coarseCoords->getDataNonConst(0);
249 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cy = coarseCoords->getDataNonConst(1);
250 ArrayRCP<typename Teuchos::ScalarTraits<Scalar>::coordinateType> cz = coarseCoords->getDataNonConst(2);
251
252 // loop over all vert line indices (stop as soon as possible)
253 LO cntCoarseNodes = 0;
254 for( LO vt = 0; vt < TVertLineId.size(); ++vt) {
255 //vertical line id in *vt
256 LO curVertLineId = TVertLineId[vt];
257
258 if(cx[curVertLineId * myCoarseZLayers] == -1.0 &&
259 cy[curVertLineId * myCoarseZLayers] == -1.0) {
260 // loop over all local myCoarseZLayers
261 for (LO n=0; n<myCoarseZLayers; ++n) {
262 cx[curVertLineId * myCoarseZLayers + n] = x[vt];
263 cy[curVertLineId * myCoarseZLayers + n] = y[vt];
264 cz[curVertLineId * myCoarseZLayers + n] = myZLayerCoords[n];
265 }
266 cntCoarseNodes += myCoarseZLayers;
267 }
268
269 TEUCHOS_TEST_FOR_EXCEPTION(cntCoarseNodes > numLocalCoarseNodes, Exceptions::RuntimeError, "number of coarse nodes is inconsistent.");
270 if(cntCoarseNodes == numLocalCoarseNodes) break;
271 }
272
273 // set coarse level coordinates
274 Set(coarseLevel, "Coordinates", coarseCoords);
275 } /* end bool bTransferCoordinates */
276
277 }
278
279 template <class Scalar,class LocalOrdinal, class GlobalOrdinal, class Node>
281 /*
282 * Given the number of points in the z direction (PtsPerLine) and a
283 * coarsening rate (CoarsenRate), determine which z-points will serve
284 * as Cpts and return the total number of Cpts.
285 *
286 * Input
287 * PtsPerLine: Number of fine level points in the z direction
288 *
289 * CoarsenRate: Roughly, number of Cpts = (PtsPerLine+1)/CoarsenRate - 1
290 *
291 * Thin: Must be either 0 or 1. Thin decides what to do when
292 * (PtsPerLine+1)/CoarsenRate is not an integer.
293 *
294 * Thin == 0 ==> ceil() the above fraction
295 * Thin == 1 ==> floor() the above fraction
296 *
297 * Output
298 * LayerCpts Array where LayerCpts[i] indicates that the
299 * LayerCpts[i]th fine level layer is a Cpt Layer.
300 * Note: fine level layers are assumed to be numbered starting
301 * a one.
302 */
303 typename Teuchos::ScalarTraits<Scalar>::coordinateType temp, RestStride, di;
304 LO NCpts, i;
305 LO NCLayers = -1;
306 LO FirstStride;
307
308 temp = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (CoarsenRate)) - 1.0;
309 if (Thin == 1) NCpts = (LO) ceil(temp);
310 else NCpts = (LO) floor(temp);
311
312 TEUCHOS_TEST_FOR_EXCEPTION(PtsPerLine == 1, Exceptions::RuntimeError, "SemiCoarsenPFactory::FindCpts: cannot coarsen further.");
313
314 if (NCpts < 1) NCpts = 1;
315
316 FirstStride= (LO) ceil( ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) PtsPerLine+1)/( (typename Teuchos::ScalarTraits<Scalar>::coordinateType) (NCpts+1)));
317 RestStride = ((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/((typename Teuchos::ScalarTraits<Scalar>::coordinateType) NCpts);
318
319 NCLayers = (LO) floor((((typename Teuchos::ScalarTraits<Scalar>::coordinateType) (PtsPerLine-FirstStride+1))/RestStride)+.00001);
320
321 TEUCHOS_TEST_FOR_EXCEPTION(NCLayers != NCpts, Exceptions::RuntimeError, "sizes do not match.");
322
323 di = (typename Teuchos::ScalarTraits<Scalar>::coordinateType) FirstStride;
324 for (i = 1; i <= NCpts; i++) {
325 (*LayerCpts)[i] = (LO) floor(di);
326 di += RestStride;
327 }
328
329 return(NCLayers);
330 }
331
332#define MaxHorNeighborNodes 75
333
334 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
336 MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[],
337 LO const VertLineId[], LO const DofsPerNode, RCP<Matrix> & Amat, RCP<Matrix>& P,
338 RCP<const Map>& coarseMap, const RCP<MultiVector> fineNullspace,
339 RCP<MultiVector>& coarseNullspace, RCP<Matrix>& R, bool buildRestriction, bool doLinear) const {
340
341 /*
342 * Given a CSR matrix (OrigARowPtr, OrigAcols, OrigAvals), information
343 * describing the z-layer and vertical line (LayerId and VertLineId)
344 * of each matrix block row, a coarsening rate, and dofs/node information,
345 * construct a prolongator that coarsening to semicoarsening in the z-direction
346 * using something like an operator-dependent grid transfer. In particular,
347 * matrix stencils are collapsed to vertical lines. Thus, each vertical line
348 * gives rise to a block tridiagonal matrix. BlkRows corresponding to
349 * Cpts are replaced by identity matrices. This tridiagonal is solved
350 * to determine each interpolation basis functions. Each Blk Rhs corresponds
351 * to all zeros except at the corresponding C-pt which has an identity
352 *
353 * On termination, return the number of local prolongator columns owned by
354 * this processor.
355 *
356 * Note: This code was adapted from a matlab code where offsets/arrays
357 * start from 1. In most parts of the code, this 1 offset is kept
358 * (in some cases wasting the first element of the array). The
359 * input and output matrices of this function has been changed to
360 * have offsets/rows/columns which start from 0. LayerId[] and
361 * VertLineId[] currently start from 1.
362 *
363 * Input
364 * =====
365 * Ntotal Number of fine level Blk Rows owned by this processor
366 *
367 * nz Number of vertical layers. Note: partitioning must be done
368 * so that each processor owns an entire vertical line. This
369 * means that nz is the global number of layers, which should
370 * be equal to the local number of layers.
371 * CoarsenRate Rate of z semicoarsening. Smoothed aggregation-like coarsening
372 * would correspond to CoarsenRate = 3.
373 * LayerId Array from 0 to Ntotal-1 + Ghost. LayerId(BlkRow) gives the
374 * layer number associated with the dofs within BlkRow.
375 * VertLineId Array from 1 to Ntotal, VertLineId(BlkRow) gives a unique
376 * vertical line id (from 0 to Ntotal/nz-1) of BlkRow. All
377 * BlkRows associated with nodes along the same vertical line
378 * in the mesh should have the same LineId.
379 * DofsPerNode Number of degrees-of-freedom per mesh node.
380 *
381 * OrigARowPtr, CSR arrays corresponding to the fine level matrix.
382 * OrigAcols,
383 * OrigAvals
384 *
385 * Output
386 * =====
387 * ParamPptr, CSR arrays corresponding to the final prolongation matrix.
388 * ParamPcols,
389 * ParamsPvals
390 */
391 int NLayers, NVertLines, MaxNnz, NCLayers, MyLine, MyLayer;
392 int *InvLineLayer=NULL, *CptLayers=NULL, StartLayer, NStencilNodes;
393 int BlkRow=-1, dof_j, node_k, *Sub2FullMap=NULL, RowLeng;
394 int i, j, iii, col, count, index, loc, PtRow, PtCol;
395 SC *BandSol=NULL, *BandMat=NULL, TheSum, *RestrictBandMat=NULL, *RestrictBandSol=NULL;
396 int *IPIV=NULL, KL, KU, KLU, N, NRHS, LDAB,INFO;
397 int *Pcols;
398 size_t *Pptr;
399 SC *Pvals;
400 LO *Rcols=NULL;
401 size_t *Rptr=NULL;
402 SC *Rvals=NULL;
403 int MaxStencilSize, MaxNnzPerRow;
404 LO *LayDiff=NULL;
405 int CurRow, LastGuy = -1, NewPtr, RLastGuy = -1;
406 int Ndofs;
407 int Nghost;
408 LO *Layerdofs = NULL, *Col2Dof = NULL;
409
410 Teuchos::LAPACK<LO,SC> lapack;
411
412 char notrans[3];
413 notrans[0] = 'N';
414 notrans[1] = 'N';
415 char trans[3];
416 trans[0] = 'T';
417 trans[1] = 'T';
418
419
420 MaxNnzPerRow = MaxHorNeighborNodes*DofsPerNode*3;
421 Teuchos::ArrayRCP<LO> TLayDiff = Teuchos::arcp<LO>(1+MaxNnzPerRow); LayDiff = TLayDiff.getRawPtr();
422
423 Nghost = Amat->getColMap()->getLocalNumElements() - Amat->getDomainMap()->getLocalNumElements();
424 if (Nghost < 0) Nghost = 0;
425 Teuchos::ArrayRCP<LO> TLayerdofs= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Layerdofs = TLayerdofs.getRawPtr();
426 Teuchos::ArrayRCP<LO> TCol2Dof= Teuchos::arcp<LO>(Ntotal*DofsPerNode+Nghost+1); Col2Dof= TCol2Dof.getRawPtr();
427
428 RCP<Xpetra::Vector<LO,LO,GO,NO> > localdtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getDomainMap());
429 RCP<Xpetra::Vector<LO,LO,GO,NO> > dtemp = Xpetra::VectorFactory<LO,LO,GO,NO>::Build(Amat->getColMap());
430 ArrayRCP<LO> valptr= localdtemp->getDataNonConst(0);
431
432 for (i = 0; i < Ntotal*DofsPerNode; i++)
433 valptr[i]= LayerId[i/DofsPerNode];
434 valptr=ArrayRCP<LO>();
435
436 RCP< const Import> importer;
437 importer = Amat->getCrsGraph()->getImporter();
438 if (importer == Teuchos::null) {
439 importer = ImportFactory::Build(Amat->getDomainMap(), Amat->getColMap());
440 }
441 dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
442
443 valptr= dtemp->getDataNonConst(0);
444 for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Layerdofs[i]= valptr[i];
445 valptr= localdtemp->getDataNonConst(0);
446 for (i = 0; i < Ntotal*DofsPerNode; i++) valptr[i]= i%DofsPerNode;
447 valptr=ArrayRCP<LO>();
448 dtemp->doImport(*localdtemp, *(importer), Xpetra::INSERT);
449
450 valptr= dtemp->getDataNonConst(0);
451 for (i = 0; i < Ntotal*DofsPerNode+Nghost; i++) Col2Dof[i]= valptr[i];
452 valptr=ArrayRCP<LO>();
453
454 if (Ntotal != 0) {
455 NLayers = LayerId[0];
456 NVertLines= VertLineId[0];
457 }
458 else { NLayers = -1; NVertLines = -1; }
459
460 for (i = 1; i < Ntotal; i++) {
461 if ( VertLineId[i] > NVertLines ) NVertLines = VertLineId[i];
462 if ( LayerId[i] > NLayers ) NLayers = LayerId[i];
463 }
464 NLayers++;
465 NVertLines++;
466
467 /*
468 * Make an inverse map so that we can quickly find the dof
469 * associated with a particular vertical line and layer.
470 */
471
472 Teuchos::ArrayRCP<LO> TInvLineLayer= Teuchos::arcp<LO>(1+NVertLines*NLayers); InvLineLayer = TInvLineLayer.getRawPtr();
473 for (i=0; i < Ntotal; i++) {
474 InvLineLayer[ VertLineId[i]+1+LayerId[i]*NVertLines ] = i;
475 }
476
477 /*
478 * Determine coarse layers where injection will be applied.
479 */
480
481 Teuchos::ArrayRCP<LO> TCptLayers = Teuchos::arcp<LO>(nz+1); CptLayers = TCptLayers.getRawPtr();
482 NCLayers = FindCpts(nz,CoarsenRate,0, &CptLayers);
483
484 /*
485 * Compute the largest possible interpolation stencil width based
486 * on the location of the Clayers. This stencil width is actually
487 * nodal (i.e. assuming 1 dof/node). To get the true max stencil width
488 * one needs to multiply this by DofsPerNode.
489 */
490
491 if (NCLayers < 2) MaxStencilSize = nz;
492 else MaxStencilSize = CptLayers[2];
493
494 for (i = 3; i <= NCLayers; i++) {
495 if (MaxStencilSize < CptLayers[i]- CptLayers[i-2])
496 MaxStencilSize = CptLayers[i]- CptLayers[i-2];
497 }
498 if (NCLayers > 1) {
499 if (MaxStencilSize < nz - CptLayers[NCLayers-1]+1)
500 MaxStencilSize = nz - CptLayers[NCLayers-1]+1;
501 }
502
503 /*
504 * Allocate storage associated with solving a banded sub-matrix needed to
505 * determine the interpolation stencil. Note: we compute interpolation stencils
506 * for all dofs within a node at the same time, and so the banded solution
507 * must be large enough to hold all DofsPerNode simultaneously.
508 */
509
510 Teuchos::ArrayRCP<LO> TSub2FullMap= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); Sub2FullMap= TSub2FullMap.getRawPtr();
511 Teuchos::ArrayRCP<SC> TBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); BandSol = TBandSol.getRawPtr();
512 Teuchos::ArrayRCP<SC> TResBandSol;
513 if (buildRestriction) {
514 TResBandSol= Teuchos::arcp<SC>((MaxStencilSize+1)*DofsPerNode*DofsPerNode); RestrictBandSol = TResBandSol.getRawPtr();
515 }
516 /*
517 * Lapack variables. See comments for dgbsv().
518 */
519 KL = 2*DofsPerNode-1;
520 KU = 2*DofsPerNode-1;
521 KLU = KL+KU;
522 LDAB = 2*KL+KU+1;
523 NRHS = DofsPerNode;
524 Teuchos::ArrayRCP<SC> TBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); BandMat = TBandMat.getRawPtr();
525 Teuchos::ArrayRCP<LO> TIPIV= Teuchos::arcp<LO>((MaxStencilSize+1)*DofsPerNode); IPIV = TIPIV.getRawPtr();
526
527 Teuchos::ArrayRCP<SC> TResBandMat;
528 if (buildRestriction) {
529 TResBandMat= Teuchos::arcp<SC>(LDAB*MaxStencilSize*DofsPerNode+1); RestrictBandMat = TResBandMat.getRawPtr();
530 }
531
532 /*
533 * Allocate storage for the final interpolation matrix. Note: each prolongator
534 * row might have entries corresponding to at most two nodes.
535 * Note: the total fine level dofs equals DofsPerNode*Ntotal and the max
536 * nnz per prolongator row is DofsPerNode*2.
537 */
538
539 Ndofs = DofsPerNode*Ntotal;
540 MaxNnz = 2*DofsPerNode*Ndofs;
541
542 RCP<const Map> rowMap = Amat->getRowMap();
543 Xpetra::global_size_t GNdofs= rowMap->getGlobalNumElements();
544
545 std::vector<size_t> stridingInfo_;
546 stridingInfo_.push_back(DofsPerNode);
547
548 Xpetra::global_size_t itemp = GNdofs/nz;
549 coarseMap = StridedMapFactory::Build(rowMap->lib(),
550 NCLayers*itemp,
551 NCLayers*NVertLines*DofsPerNode,
552 0, /* index base */
553 stridingInfo_,
554 rowMap->getComm(),
555 -1, /* strided block id */
556 0 /* domain gid offset */);
557
558
559 //coarseMap = MapFactory::createContigMapWithNode(rowMap->lib(),(NCLayers*GNdofs)/nz, NCLayers*NVertLines*DofsPerNode,(rowMap->getComm()), rowMap->getNode());
560
561
562 P = rcp(new CrsMatrixWrap(rowMap, coarseMap , 0));
563 coarseNullspace = MultiVectorFactory::Build(coarseMap, fineNullspace->getNumVectors());
564
565
566 Teuchos::ArrayRCP<SC> TPvals= Teuchos::arcp<SC>(1+MaxNnz); Pvals= TPvals.getRawPtr();
567 Teuchos::ArrayRCP<size_t> TPptr = Teuchos::arcp<size_t>(DofsPerNode*(2+Ntotal)); Pptr = TPptr.getRawPtr();
568 Teuchos::ArrayRCP<LO> TPcols= Teuchos::arcp<LO>(1+MaxNnz); Pcols= TPcols.getRawPtr();
569
570 TEUCHOS_TEST_FOR_EXCEPTION(Pcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
571 Pptr[0] = 0; Pptr[1] = 0;
572
573
574 Teuchos::ArrayRCP<SC> TRvals;
575 Teuchos::ArrayRCP<size_t> TRptr;
576 Teuchos::ArrayRCP<LO> TRcols;
577 LO RmaxNnz=-1, RmaxPerRow=-1;
578 if (buildRestriction) {
579 RmaxPerRow = ((LO) ceil(((double) (MaxNnz+7))/((double) (coarseMap->getLocalNumElements()))));
580 RmaxNnz = RmaxPerRow*coarseMap->getLocalNumElements();
581 TRvals= Teuchos::arcp<SC>(1+RmaxNnz); Rvals= TRvals.getRawPtr();
582 TRptr = Teuchos::arcp<size_t>((2+coarseMap->getLocalNumElements())); Rptr = TRptr.getRawPtr();
583 TRcols= Teuchos::arcp<LO>(1+RmaxNnz); Rcols= TRcols.getRawPtr();
584 TEUCHOS_TEST_FOR_EXCEPTION(Rcols == NULL, Exceptions::RuntimeError, "MakeSemiCoarsenP: Not enough space \n");
585 Rptr[0] = 0; Rptr[1] = 0;
586 }
587 /*
588 * Setup P's rowptr as if each row had its maximum of 2*DofsPerNode nonzeros.
589 * This will be useful while filling up P, and then later we will squeeze out
590 * the unused nonzeros locations.
591 */
592
593 for (i = 1; i <= MaxNnz; i++) Pcols[i] = -1; /* mark all entries as unused */
594 count = 1;
595 for (i = 1; i <= DofsPerNode*Ntotal+1; i++) {
596 Pptr[i] = count;
597 count += (2*DofsPerNode);
598 }
599 if (buildRestriction) {
600 for (i = 1; i <= RmaxNnz; i++) Rcols[i] = -1; /* mark all entries as unused */
601 count = 1;
602 for (i = 1; i <= (int) (coarseMap->getLocalNumElements()+1); i++) {
603 Rptr[i] = count;
604 count += RmaxPerRow;
605 }
606 }
607
608 /*
609 * Build P column by column. The 1st block column corresponds to the 1st coarse
610 * layer and the first line. The 2nd block column corresponds to the 2nd coarse
611 * layer and the first line. The NCLayers+1 block column corresponds to the
612 * 1st coarse layer and the 2nd line, etc.
613 */
614
615
616 col = 0;
617 for (MyLine=1; MyLine <= NVertLines; MyLine += 1) {
618 for (iii=1; iii <= NCLayers; iii+= 1) {
619 col = col+1;
620 MyLayer = CptLayers[iii];
621
622 /*
623 * StartLayer gives the layer number of the lowest layer that
624 * is nonzero in the interpolation stencil that is currently
625 * being computed. Normally, if we are not near a boundary this
626 * is simply CptsLayers[iii-1]+1.
627 *
628 * NStencilNodes indicates the number of nonzero nodes in the
629 * interpolation stencil that is currently being computed. Normally,
630 * if we are not near a boundary this is CptLayers[iii+1]-StartLayer.
631 */
632
633 if (iii != 1 ) StartLayer = CptLayers[iii-1]+1;
634 else StartLayer = 1;
635
636 if (iii != NCLayers) NStencilNodes= CptLayers[iii+1]-StartLayer;
637 else NStencilNodes= NLayers - StartLayer + 1;
638
639
640 N = NStencilNodes*DofsPerNode;
641
642 /*
643 * dgbsv() does not require that the first KL rows be initialized,
644 * so we could avoid zeroing out some entries?
645 */
646
647 for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++) BandSol[ i] = 0.0;
648 for (i = 0; i < LDAB*N; i++) BandMat[ i] = 0.0;
649 if (buildRestriction) {
650 for (i = 0; i < NStencilNodes*DofsPerNode*DofsPerNode; i++) RestrictBandSol[ i] = 0.0;
651 for (i = 0; i < LDAB*N; i++) RestrictBandMat[ i] = 0.0;
652 }
653
654 /*
655 * Fill BandMat and BandSol (which is initially the rhs) for each
656 * node in the interpolation stencil that is being computed.
657 */
658
659 if (!doLinear) {
660 for (node_k=1; node_k <= NStencilNodes ; node_k++) {
661
662 /* Map a Line and Layer number to a BlkRow in the fine level matrix
663 * and record the mapping from the sub-system to the BlkRow of the
664 * fine level matrix.
665 */
666 BlkRow = InvLineLayer[MyLine+(StartLayer+node_k-2)*NVertLines]+1;
667 Sub2FullMap[node_k] = BlkRow;
668
669 /* Two cases:
670 * 1) the current layer is not a Cpoint layer. In this case we
671 * want to basically stick the matrix couplings to other
672 * nonzero stencil rows into the band matrix. One way to do
673 * this is to include couplings associated with only MyLine
674 * and ignore all the other couplings. However, what we do
675 * instead is to sum all the coupling at each layer participating
676 * in this interpolation stencil and stick this sum into BandMat.
677 * 2) the current layer is a Cpoint layer and so we
678 * stick an identity block in BandMat and rhs.
679 */
680 for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
681
682 j = (BlkRow-1)*DofsPerNode+dof_i;
683 ArrayView<const LO> AAcols;
684 ArrayView<const SC> AAvals;
685 Amat->getLocalRowView(j, AAcols, AAvals);
686 const int *Acols = AAcols.getRawPtr();
687 const SC *Avals = AAvals.getRawPtr();
688 RowLeng = AAvals.size();
689
690 TEUCHOS_TEST_FOR_EXCEPTION(RowLeng >= MaxNnzPerRow, Exceptions::RuntimeError, "MakeSemiCoarsenP: recompile with larger Max(HorNeighborNodes)\n");
691
692 for (i = 0; i < RowLeng; i++) {
693 LayDiff[i] = Layerdofs[Acols[i]]-StartLayer-node_k+2;
694
695 /* This is the main spot where there might be off- */
696 /* processor communication. That is, when we */
697 /* average the stencil in the horizontal direction,*/
698 /* we need to know the layer id of some */
699 /* neighbors that might reside off-processor. */
700 }
701 PtRow = (node_k-1)*DofsPerNode+dof_i+1;
702 for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
703 PtCol = (node_k-1)*DofsPerNode+dof_j + 1;
704 /* Stick in entry corresponding to Mat(PtRow,PtCol) */
705 /* see dgbsv() comments for matrix format. */
706 TheSum = 0.0;
707 for (i = 0; i < RowLeng; i++) {
708 if ((LayDiff[i] == 0) && (Col2Dof[Acols[i]] == dof_j))
709 TheSum += Avals[i];
710 }
711 index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
712 BandMat[index] = TheSum;
713 if (buildRestriction) RestrictBandMat[index] = TheSum;
714 if (node_k != NStencilNodes) {
715 /* Stick Mat(PtRow,PtCol+DofsPerNode) entry */
716 /* see dgbsv() comments for matrix format. */
717 TheSum = 0.0;
718 for (i = 0; i < RowLeng; i++) {
719 if ((LayDiff[i] == 1) &&(Col2Dof[Acols[i]]== dof_j))
720 TheSum += Avals[i];
721 }
722 j = PtCol+DofsPerNode;
723 index=LDAB*(j-1)+KLU+PtRow-j;
724 BandMat[index] = TheSum;
725 if (buildRestriction) RestrictBandMat[index] = TheSum;
726 }
727 if (node_k != 1) {
728 /* Stick Mat(PtRow,PtCol-DofsPerNode) entry */
729 /* see dgbsv() comments for matrix format. */
730 TheSum = 0.0;
731 for (i = 0; i < RowLeng; i++) {
732 if ((LayDiff[i]== -1) &&(Col2Dof[Acols[i]]== dof_j))
733 TheSum += Avals[i];
734 }
735 j = PtCol-DofsPerNode;
736 index=LDAB*(j-1)+KLU+PtRow-j;
737 BandMat[index] = TheSum;
738 if (buildRestriction) RestrictBandMat[index] = TheSum;
739 }
740 }
741 }
742 }
743 node_k = MyLayer - StartLayer+1;
744 for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
745 /* Stick Mat(PtRow,PtRow) and Rhs(PtRow,dof_i+1) */
746 /* see dgbsv() comments for matrix format. */
747 PtRow = (node_k-1)*DofsPerNode+dof_i+1;
748 BandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
749 if (buildRestriction) RestrictBandSol[(dof_i)*DofsPerNode*NStencilNodes+PtRow-1] = 1.;
750
751 for (dof_j = 0; dof_j < DofsPerNode; dof_j++) {
752 PtCol = (node_k-1)*DofsPerNode+dof_j+1;
753 index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
754 if (PtCol == PtRow) BandMat[index] = 1.0;
755 else BandMat[index] = 0.0;
756 if (buildRestriction) {
757 index = LDAB*(PtRow-1)+KLU+PtCol-PtRow;
758 if (PtCol == PtRow) RestrictBandMat[index] = 1.0;
759 else RestrictBandMat[index] = 0.0;
760 }
761 if (node_k != NStencilNodes) {
762 PtCol = (node_k )*DofsPerNode+dof_j+1;
763 index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
764 BandMat[index] = 0.0;
765 if (buildRestriction) {
766 index = LDAB*(PtRow-1)+KLU+PtCol-PtRow;
767 RestrictBandMat[index] = 0.0;
768 }
769 }
770 if (node_k != 1) {
771 PtCol = (node_k-2)*DofsPerNode+dof_j+1;
772 index = LDAB*(PtCol-1)+KLU+PtRow-PtCol;
773 BandMat[index] = 0.0;
774 if (buildRestriction) {
775 index = LDAB*(PtRow-1)+KLU+PtCol-PtRow;
776 RestrictBandMat[index] = 0.0;
777 }
778 }
779 }
780 }
781
782 /* Solve banded system and then stick result in Pmatrix arrays */
783
784 lapack.GBTRF( N, N, KL, KU, BandMat, LDAB, IPIV, &INFO);
785
786 TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
787
788 lapack.GBTRS(notrans[0], N, KL, KU, NRHS, BandMat, LDAB, IPIV,
789 BandSol, N, &INFO );
790
791 TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
792
793 for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
794 for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
795 for (i =1; i <= NStencilNodes ; i++) {
796 index = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
797 loc = Pptr[index];
798 Pcols[loc] = (col-1)*DofsPerNode+dof_j+1;
799 Pvals[loc] = BandSol[dof_j*DofsPerNode*NStencilNodes +
800 (i-1)*DofsPerNode + dof_i];
801 Pptr[index]= Pptr[index] + 1;
802 }
803 }
804 }
805 if (buildRestriction) {
806 lapack.GBTRF( N, N, KL, KU, RestrictBandMat, LDAB, IPIV, &INFO);
807 TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band factorization failed");
808 lapack.GBTRS(trans[0], N, KL, KU, NRHS, RestrictBandMat, LDAB, IPIV, RestrictBandSol, N, &INFO );
809 TEUCHOS_TEST_FOR_EXCEPTION(INFO != 0, Exceptions::RuntimeError, "Lapack band solve back substitution failed");
810 for (dof_j=0; dof_j < DofsPerNode; dof_j++) {
811 for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
812 for (i =1; i <= NStencilNodes ; i++) {
813 index = (col-1)*DofsPerNode+dof_j+1;
814 loc = Rptr[index];
815 Rcols[loc] = (Sub2FullMap[i]-1)*DofsPerNode+dof_i+1;
816 Rvals[loc] = RestrictBandSol[dof_j*DofsPerNode*NStencilNodes +
817 (i-1)*DofsPerNode + dof_i];
818 Rptr[index]= Rptr[index] + 1;
819 }
820 }
821 }
822 }
823 }
824 else {
825 int denom1 = MyLayer-StartLayer+1;
826 int denom2 = StartLayer+NStencilNodes-MyLayer;
827 for (int dof_i=0; dof_i < DofsPerNode; dof_i++) {
828 for (i =1; i <= NStencilNodes ; i++) {
829 index = (InvLineLayer[MyLine+(StartLayer+i-2)*NVertLines])*DofsPerNode+dof_i+1;
830 loc = Pptr[index];
831 Pcols[loc] = (col-1)*DofsPerNode+dof_i+1;
832 if (i > denom1) Pvals[loc] = 1.0 + ((double)( denom1 - i))/((double) denom2);
833 else Pvals[loc] = ((double)( i))/((double) denom1);
834 Pptr[index]= Pptr[index] + 1;
835 }
836 }
837 }
838 /* inject the null space */
839 // int FineStride = Ntotal*DofsPerNode;
840 // int CoarseStride= NVertLines*NCLayers*DofsPerNode;
841
842 BlkRow = InvLineLayer[MyLine+(MyLayer-1)*NVertLines]+1;
843 for (int k = 0; k < static_cast<int>(fineNullspace->getNumVectors()); k++) {
844 Teuchos::ArrayRCP<SC> OneCNull = coarseNullspace->getDataNonConst(k);
845 Teuchos::ArrayRCP<SC> OneFNull = fineNullspace->getDataNonConst(k);
846 for (int dof_i = 0; dof_i < DofsPerNode; dof_i++) {
847 OneCNull[( col-1)*DofsPerNode+dof_i] = OneFNull[ (BlkRow-1)*DofsPerNode+dof_i];
848 }
849 }
850
851 }
852 }
853
854 /*
855 * Squeeze the -1's out of the columns. At the same time convert Pcols
856 * so that now the first column is numbered '0' as opposed to '1'.
857 * Also, the arrays Pcols and Pvals should now use the zeroth element
858 * as opposed to just starting with the first element. Pptr will be
859 * fixed in the for loop below so that Pptr[0] = 0, etc.
860 */
861 CurRow = 1;
862 NewPtr = 1;
863 for (size_t ii=1; ii <= Pptr[Ntotal*DofsPerNode]-1; ii++) {
864 if (ii == Pptr[CurRow]) {
865 Pptr[CurRow] = LastGuy;
866 CurRow++;
867 while (ii > Pptr[CurRow]) {
868 Pptr[CurRow] = LastGuy;
869 CurRow++;
870 }
871 }
872 if (Pcols[ii] != -1) {
873 Pcols[NewPtr-1] = Pcols[ii]-1; /* these -1's fix the offset and */
874 Pvals[NewPtr-1] = Pvals[ii]; /* start using the zeroth element */
875 LastGuy = NewPtr;
876 NewPtr++;
877 }
878 }
879 for (i = CurRow; i <= Ntotal*DofsPerNode; i++) Pptr[CurRow] = LastGuy;
880
881 /* Now move the pointers so that they now point to the beginning of each
882 * row as opposed to the end of each row
883 */
884 for (i=-Ntotal*DofsPerNode+1; i>= 2 ; i--) {
885 Pptr[i-1] = Pptr[i-2]; /* extra -1 added to start from 0 */
886 }
887 Pptr[0] = 0;
888
889 // do the same for R
890 if (buildRestriction) {
891 CurRow = 1;
892 NewPtr = 1;
893 for (size_t ii=1; ii <= Rptr[coarseMap->getLocalNumElements()]-1; ii++) {
894 if (ii == Rptr[CurRow]) {
895 Rptr[CurRow] = RLastGuy;
896 CurRow++;
897 while (ii > Rptr[CurRow]) {
898 Rptr[CurRow] = RLastGuy;
899 CurRow++;
900 }
901 }
902 if (Rcols[ii] != -1) {
903 Rcols[NewPtr-1] = Rcols[ii]-1; /* these -1's fix the offset and */
904 Rvals[NewPtr-1] = Rvals[ii]; /* start using the zeroth element */
905 RLastGuy = NewPtr;
906 NewPtr++;
907 }
908 }
909 for (i = CurRow; i <= ((int) coarseMap->getLocalNumElements()); i++) Rptr[CurRow] = RLastGuy;
910 for (i=-coarseMap->getLocalNumElements()+1; i>= 2 ; i--) {
911 Rptr[i-1] = Rptr[i-2]; /* extra -1 added to start from 0 */
912 }
913 Rptr[0] = 0;
914 }
915 ArrayRCP<size_t> rcpRowPtr;
916 ArrayRCP<LO> rcpColumns;
917 ArrayRCP<SC> rcpValues;
918
919 RCP<CrsMatrix> PCrs = rcp_dynamic_cast<CrsMatrixWrap>(P)->getCrsMatrix();
920 LO nnz = Pptr[Ndofs];
921 PCrs->allocateAllValues(nnz, rcpRowPtr, rcpColumns, rcpValues);
922
923 ArrayView<size_t> rowPtr = rcpRowPtr();
924 ArrayView<LO> columns = rcpColumns();
925 ArrayView<SC> values = rcpValues();
926
927 // copy data over
928
929 for (LO ii = 0; ii <= Ndofs; ii++) rowPtr[ii] = Pptr[ii];
930 for (LO ii = 0; ii < nnz; ii++) columns[ii] = Pcols[ii];
931 for (LO ii = 0; ii < nnz; ii++) values[ii] = Pvals[ii];
932 PCrs->setAllValues(rcpRowPtr, rcpColumns, rcpValues);
933 PCrs->expertStaticFillComplete(coarseMap, Amat->getDomainMap());
934 ArrayRCP<size_t> RrcpRowPtr;
935 ArrayRCP<LO> RrcpColumns;
936 ArrayRCP<SC> RrcpValues;
937 RCP<CrsMatrix> RCrs;
938 if (buildRestriction) {
939 R = rcp(new CrsMatrixWrap(coarseMap, rowMap, 0));
940 RCrs = rcp_dynamic_cast<CrsMatrixWrap>(R)->getCrsMatrix();
941 nnz = Rptr[coarseMap->getLocalNumElements()];
942 RCrs->allocateAllValues(nnz, RrcpRowPtr, RrcpColumns, RrcpValues);
943
944 ArrayView<size_t> RrowPtr = RrcpRowPtr();
945 ArrayView<LO> Rcolumns = RrcpColumns();
946 ArrayView<SC> Rvalues = RrcpValues();
947
948 // copy data over
949
950 for (LO ii = 0; ii <= ((LO)coarseMap->getLocalNumElements()); ii++) RrowPtr[ii] = Rptr[ii];
951 for (LO ii = 0; ii < nnz; ii++) Rcolumns[ii] = Rcols[ii];
952 for (LO ii = 0; ii < nnz; ii++) Rvalues[ii] = Rvals[ii];
953 RCrs->setAllValues(RrcpRowPtr, RrcpColumns, RrcpValues);
954 RCrs->expertStaticFillComplete(Amat->getRangeMap(), coarseMap);
955 }
956
957 return NCLayers*NVertLines*DofsPerNode;
958 }
959 template <class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
961 // This function is a bit of a hack. We basically compute the semi-coarsening transfers and then throw
962 // away all the interpolation coefficients, instead replacing them by piecewise constants. The reason for this
963 // is that SemiCoarsening has no notion of aggregates so defining piecewise constants in the "usual way" is
964 // not possible. Instead, we look for the largest entry in each row, make it a one, and zero out the other
965 // non-zero entries
966
967 ArrayView<const LocalOrdinal> inds;
968 ArrayView<const Scalar> vals1;
969 ArrayView< Scalar> vals;
970 Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero();
971 Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
972
973 for (size_t i = 0; i < P->getRowMap()->getLocalNumElements(); i++) {
974 P->getLocalRowView(i, inds, vals1);
975
976 size_t nnz = inds.size();
977 if (nnz != 0) vals = ArrayView<Scalar>(const_cast<Scalar*>(vals1.getRawPtr()), nnz);
978
979 LO largestIndex = -1;
980 Scalar largestValue = ZERO;
981 /* find largest value in row and change that one to a 1 while the others are set to 0 */
982
983 LO rowDof = i%BlkSize;
984 for (size_t j =0; j < nnz; j++) {
985 if (Teuchos::ScalarTraits<SC>::magnitude(vals[ j ]) >= Teuchos::ScalarTraits<SC>::magnitude(largestValue)) {
986 if ( inds[j]%BlkSize == rowDof ) {
987 largestValue = vals[j];
988 largestIndex = (int) j;
989 }
990 }
991 vals[j] = ZERO;
992 }
993 if (largestIndex != -1) vals[largestIndex] = ONE;
994 else
995 TEUCHOS_TEST_FOR_EXCEPTION(nnz > 0, Exceptions::RuntimeError, "no nonzero column associated with a proper dof within node.");
996
997 if (Teuchos::ScalarTraits<SC>::magnitude(largestValue) == Teuchos::ScalarTraits<SC>::magnitude(ZERO)) vals[largestIndex] = ZERO;
998 }
999 }
1000
1001} //namespace MueLu
1002
1003#define MUELU_SEMICOARSENPFACTORY_SHORT
1004#endif // MUELU_SEMICOARSENPFACTORY_DEF_HPP
#define SET_VALID_ENTRY(name)
#define MaxHorNeighborNodes
MueLu::DefaultLocalOrdinal LocalOrdinal
MueLu::DefaultScalar Scalar
Exception throws to report errors in the internal logical of the program.
Timer to be used in factories. Similar to Monitor but with additional timers.
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
bool IsAvailable(const std::string &ename, const FactoryBase *factory=NoFactory::get()) const
Test whether a need's value has been saved.
void DeclareInput(const std::string &ename, const FactoryBase *factory, const FactoryBase *requestedBy=NoFactory::get())
Callback from FactoryBase::CallDeclareInput() and FactoryBase::DeclareInput()
const RCP< const FactoryManagerBase > GetFactoryManager()
returns the current factory manager
Definition: MueLu_Level.cpp:96
int GetLevelID() const
Return level number.
Definition: MueLu_Level.cpp:76
T & Get(const std::string &ename, const FactoryBase *factory=NoFactory::get())
Get data without decrementing associated storage counter (i.e., read-only access)....
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static const NoFactory * get()
LO FindCpts(LO const PtsPerLine, LO const CoarsenRate, LO const Thin, LO **LayerCpts) const
void DeclareInput(Level &fineLevel, Level &coarseLevel) const
Input.
RCP< const ParameterList > GetValidParameterList() const
Return a const parameter list of valid parameters that setParameterList() will accept.
LO MakeSemiCoarsenP(LO const Ntotal, LO const nz, LO const CoarsenRate, LO const LayerId[], LO const VertLineId[], LO const DofsPerNode, RCP< Matrix > &Amat, RCP< Matrix > &P, RCP< const Map > &coarseMap, const RCP< MultiVector > fineNullspace, RCP< MultiVector > &coarseNullspace, RCP< Matrix > &R, bool buildRestriction, bool doLinear) const
void BuildP(Level &fineLevel, Level &coarseLevel) const
Abstract Build method.
void Build(Level &fineLevel, Level &coarseLevel) const
Build an object with this factory.
void RevertToPieceWiseConstant(RCP< Matrix > &P, LO BlkSize) const
Namespace for MueLu classes and methods.