Amesos Package Browser (Single Doxygen Collection) Development
Loading...
Searching...
No Matches
SuperludistOO.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Amesos: Direct Sparse Solver Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
25//
26// ***********************************************************************
27// @HEADER
28
29#include "SuperludistOO.h"
30#ifdef EPETRA_MPI
31#include "Epetra_MpiComm.h"
32#else
33 This code cannot be compiled without mpi.h.
34#include "Epetra_Comm.h"
35#endif
36#include "Epetra_Map.h"
37#include "Epetra_LocalMap.h"
38#include "Epetra_Vector.h"
39#include "Epetra_RowMatrix.h"
40#include "Epetra_Operator.h"
41#include "Epetra_Import.h"
42#include "Epetra_Export.h"
43#include "Epetra_CrsMatrix.h"
44#include "supermatrix.h"
45#include "superlu_ddefs.h"
46#include "CrsMatrixTranspose.h"
47#include <vector>
48
49#ifdef DEBUG
50#include "Comm_assert_equal.h"
51 // #include "CrsMatricesAreIdentical.h"
52#endif
53#ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
54#include "ExtractCrsFromRowMatrix.h"
55#endif
56/*
57 Returns the largest number of rows that allows NumProcs to be used within a
58 rectangular grid with no more rows than columns.
59
60 i.e. max i such that there exists j > i such that i*j = NumProcs
61*/
62int SLU_NumRows( int NumProcs ) {
63#ifdef TFLOP
64 // Else, parameter 6 of DTRSV CTNLU is incorrect
65 return 1;
66#else
67 int i;
68 int numrows ;
69 for ( i = 1; i*i <= NumProcs; i++ )
70 ;
71 bool done = false ;
72 for ( numrows = i-1 ; done == false ; ) {
73 int NumCols = NumProcs / numrows ;
74 if ( numrows * NumCols == NumProcs )
75 done = true;
76 else
77 numrows-- ;
78 }
79 return numrows;
80#endif
81}
82
83//=============================================================================
84SuperludistOO::SuperludistOO(const Epetra_LinearProblem &prob ) {
85 // AllocAzArrays();
86
87 Problem_ = &prob ;
88 Transpose_ = false;
89 A_and_LU_built = false ;
90 Factored_ = false ;
91 FirstCallToSolve_ = true ;
92 //
93 // The following are initialized just on general principle
94 //
95 numprocs = -13 ;
96 nprow = -13 ;
97 npcol = -13 ;
98 numrows = -13 ;
99
100}
101
102//=============================================================================
104 // DeleteMemory();
105 // DeleteAzArrays();
106
107 if ( A_and_LU_built ) {
108 // Destroy_CompCol_Matrix_dist(&A);
109 SUPERLU_FREE(A.Store);
110 Destroy_LU(numrows, &grid, &LUstruct);
111 ScalePermstructFree(&ScalePermstruct);
112 LUstructFree(&LUstruct);
113 SUPERLU_FREE(berr);
114 }
115
116}
117
118//=============================================================================
119
120//
121// Solve() uses several intermediate matrices to convert the input matrix
122// to one that we can pass to the Sparse Direct Solver
123//
124// Epetra_RowMatrix *RowMatrixA - The input matrix
125// Epetra_CrsMatrix *CastCrsMatrixA - The input matrix casted to a crs matrix
126// Epetra_CrsMatrix ExtractCrsMatrixA - Converted to a Crs matrix
127// (Unused if RowMatrix is an Epetra_CrsMatrix)
128// Epetra_CrsMatrix *Phase2Mat - Guaranteed to be a CrsMatrix
129// Epetra_CrsMatrix SerialCrsMatrixA - Phase2Mat coalesced to one process
130// Epetra_CrsMatrix *Phase3Mat - A pseudo-distributed CrsMatrix
131// (i.e. stored exclusively on one process)
132// Epetra_CrsMatrix Phase3MatTrans - The transpose of Phase3Mat
133// Epetra_CrsMatrix *Phase4Mat - A pseudo-serial CrsMatrix with the
134// proper transposition
135// Epetra_CrsMatrix Phase5Mat - A replicated CrsMatrix with the
136// proper transposition
137//
138// This is what the code does:
139// Step 1) Convert the matrix to an Epetra_CrsMatrix
140// Step 2) Coalesce the matrix onto process 0
141// Step 3) Transpose the matrix
142// Step 4) Replicate the matrix
143// Step 5) Convert vector b to a replicated vector
144// Step 6) Convert the matrix to Ap, Ai, Aval
145// Step 7) Call SuperLUdist
146// Step 8) Convert vector x back to a distributed vector
147//
148// Remaining tasks:
149// 1) I still need to make it possible for SuperludistOO to accept a
150// replicated matrix without using any additional memory.
151// I believe that all that I need is to move the definition
152// of ExtractCrsMatrixA, SerialCrsMatrixA and Phase3MatTrans up
153// to the top of the code and add a conditional which tests to
154// see if RowMatrixA is actually in the exact format that we need,
155// and if so, skip all the matrix transformations. Also, Phase5Mat
156// needs to be changed to Phase4Replicated with an added pointer, named
157// *Phase5Mat.
158// 2) Can we handle a transposed matrix any cleaner? We build Ap,
159// Ai and Avalues - can we build that as a transpose more efficiently
160// than doing a full CrsMatrix to CrsMatrix transpose?
161//
162// Memory usage:
163// ExtractCrsMatrixA - 1 if RowMAtrixA is not a CrsMatrix
164// SerialCrsMatrixA - 1 if RowMatrixA is not a serial matrix
165// Phase3MatTrans - 1 if RowMatrixA unless a transpose solve is requested
166// Phase5Mat - 1
167// If we need SerialCrsMAttrixA, then ExtractCrsMatrixA will not be a
168// serial matrix, hence three extra serial copies is the maximum.
169//
170int SuperludistOO::Solve(bool factor) {
171 //
172 // I am going to put these here until I determine that I need them in
173 // SuperludistOO.h
174 //
175
176 bool CheckExtraction = false; // Set to true to force extraction for unit test
177
178 Epetra_RowMatrix *RowMatrixA =
179 dynamic_cast<Epetra_RowMatrix *>(Problem_->GetOperator());
180
181 EPETRA_CHK_ERR( RowMatrixA == 0 ) ;
182
183 Epetra_CrsMatrix *CastCrsMatrixA = dynamic_cast<Epetra_CrsMatrix*>(RowMatrixA) ;
184#ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
185 Epetra_CrsMatrix *ExtractCrsMatrixA = 0;
186#endif
187 Epetra_CrsMatrix *Phase2Mat = 0 ;
188 const Epetra_Comm &Comm = RowMatrixA->Comm();
189
190 int iam = Comm.MyPID() ;
191#if 0
192 //
193 // The following lines allow us time to attach the debugger
194 //
195 int hatever;
196 if ( iam == 0 ) cin >> hatever ;
197 Comm.Barrier();
198#endif
199
200 //
201 // Step 1) Convert the matrix to an Epetra_CrsMatrix
202 //
203 // If RowMatrixA is not a CrsMatrix, i.e. the dynamic cast fails,
204 // extract a CrsMatrix from the RowMatrix.
205 //
206 if ( CastCrsMatrixA != 0 && ! CheckExtraction ) {
207 Phase2Mat = CastCrsMatrixA ;
208 } else {
209#ifdef EPETRA_CRSMATRIX_CONSTRUCT_FROM_ROWMATRIX
210 ExtractCrsMatrixA = new Epetra_CrsMatrix( *RowMatrixA ) ;
211
212 Phase2Mat = ExtractCrsMatrixA ;
213#ifdef DEBUG
214 if ( CheckExtraction )
215 assert( CrsMatricesAreIdentical( CastCrsMatrixA, ExtractCrsMatrixA ) ) ;
216#endif
217#else
218 assert( false ) ;
219#endif
220 }
221
222 assert( Phase2Mat != NULL ) ;
223 const Epetra_Map &Phase2Matmap = Phase2Mat->RowMap() ;
224
225 //
226 // Step 2) Coalesce the matrix onto process 0
227 //
228 const Epetra_MpiComm & comm1 = dynamic_cast<const Epetra_MpiComm &> (Comm);
229 MPI_Comm MPIC = comm1.Comm() ;
230
231 int IsLocal = ( Phase2Matmap.NumMyElements() ==
232 Phase2Matmap.NumGlobalElements() )?1:0;
233 Comm.Broadcast( &IsLocal, 1, 0 ) ;
234#ifdef DEBUG
235 assert( Comm_assert_equal( &Comm, IsLocal ) );
236#endif
237
238 Epetra_CrsMatrix *Phase3Mat = 0 ;
239
240 int NumGlobalElements_ = Phase2Matmap.NumGlobalElements() ;
241 // Create a serial map in case we end up needing it
242 // If it is created inside the else block below it would have to
243 // be with a call to new().
244 int NumMyElements_ = 0 ;
245 if (iam==0) NumMyElements_ = NumGlobalElements_;
246 Epetra_Map SerialMap( NumGlobalElements_, NumMyElements_, 0, Comm );
247 Epetra_CrsMatrix SerialCrsMatrixA(Copy, SerialMap, 0);
248
249
250 if ( IsLocal==1 ) {
251 Phase3Mat = Phase2Mat ;
252 } else {
253
254 Epetra_Export export_to_serial( Phase2Matmap, SerialMap);
255
256 SerialCrsMatrixA.Export( *Phase2Mat, export_to_serial, Add );
257
258 SerialCrsMatrixA.FillComplete() ;
259 Phase3Mat = &SerialCrsMatrixA ;
260
261 }
262 Comm.Barrier() ;
263
264
265
266 //
267 // Step 3) Transpose the matrix
268 //
269 const Epetra_Map &Phase3Matmap = Phase3Mat->RowMap() ;
270
271 numrows = Phase3Mat->NumGlobalRows();
272 int numcols = Phase3Mat->NumGlobalCols();
273 int numentries = Phase3Mat->NumGlobalNonzeros();
274
275 Epetra_CrsMatrix Phase3MatTrans(Copy, Phase3Matmap, 0);
276 Epetra_CrsMatrix *Phase4Mat;
277
278 if ( GetTrans() ) {
279 Phase4Mat = Phase3Mat ;
280 } else {
281 assert( CrsMatrixTranspose( Phase3Mat, &Phase3MatTrans ) == 0 ) ;
282 Phase4Mat = &Phase3MatTrans ;
283 }
284
285
286 //
287 // Step 4) Replicate the matrix
288 //
289 int * AllIDs = new int[numrows];
290 for ( int i = 0; i < numrows ; i++ ) AllIDs[i] = i ;
291
292 // Create a replicated map and matrix
293 Epetra_Map ReplicatedMap( -1, numrows, AllIDs, 0, Comm);
294 // Epetra_LocalMap ReplicatedMap( numrows, 0, Comm); // Compiles,runs, fails to replicate
295
296 delete [] AllIDs;
297
298 Epetra_Import importer( ReplicatedMap, Phase3Matmap );
299
300 Epetra_CrsMatrix Phase5Mat(Copy, ReplicatedMap, 0);
301 EPETRA_CHK_ERR( Phase5Mat.Import( *Phase4Mat, importer, Insert) );
302 EPETRA_CHK_ERR( Phase5Mat.FillComplete() ) ;
303
304 assert( Phase5Mat.NumMyRows() == Phase4Mat->NumGlobalRows() ) ;
305
306 //
307 // Step 5) Convert vector b to a replicated vector
308 //
309 Epetra_MultiVector *vecX = Problem_->GetLHS() ;
310 Epetra_MultiVector *vecB = Problem_->GetRHS() ;
311
312 int nrhs;
313 if ( vecX == 0 ) {
314 nrhs = 0 ;
315 EPETRA_CHK_ERR( vecB != 0 ) ;
316 } else {
317 nrhs = vecX->NumVectors() ;
318 EPETRA_CHK_ERR( vecB->NumVectors() != nrhs ) ;
319 }
320
321 int nArows = Phase3Mat->NumGlobalRows() ;
322 int nAcols = Phase3Mat->NumGlobalCols() ;
323
324#ifdef ONE_VECTOR_ONLY
325 assert( vecX->NumVectors() == 1 ) ;
326 assert( vecB->NumVectors() == 1 ) ;
327
328 Epetra_Vector *vecXvector = dynamic_cast<Epetra_Vector*>(vecX) ;
329 Epetra_Vector *vecBvector = dynamic_cast<Epetra_Vector*>(vecB) ;
330
331 assert( vecXvector != 0 ) ;
332 assert( vecBvector != 0 ) ;
333
334 Epetra_Vector vecXreplicated( ReplicatedMap ) ;
335 Epetra_Vector vecBreplicated( ReplicatedMap ) ;
336#else
337 Epetra_MultiVector *vecXvector = (vecX) ;
338 Epetra_MultiVector *vecBvector = (vecB) ;
339
340 Epetra_MultiVector vecXreplicated( ReplicatedMap, nrhs ) ;
341 Epetra_MultiVector vecBreplicated( ReplicatedMap, nrhs ) ;
342#endif
343
344 Epetra_Import ImportToReplicated( ReplicatedMap, Phase2Matmap);
345
346 vecXreplicated.Import( *vecXvector, ImportToReplicated, Insert ) ;
347 vecBreplicated.Import( *vecBvector, ImportToReplicated, Insert ) ;
348
349 assert( nArows == vecXreplicated.MyLength() ) ;
350 assert( nAcols == vecBreplicated.MyLength() ) ;
351
352 double *bValues ;
353 double *xValues ;
354 int bLda, xLda ;
355
356 assert( vecBreplicated.ExtractView( &bValues, &bLda ) == 0 ) ;
357 assert( vecXreplicated.ExtractView( &xValues, &xLda ) == 0 ) ;
358
359 if ( factor ) {
360 //
361 // Step 6) Convert the matrix to Ap, Ai, Aval
362 //
363 Ap.resize( numrows+1 );
364 Ai.resize( EPETRA_MAX( numrows, numentries) ) ;
365 Aval.resize( EPETRA_MAX( numrows, numentries) ) ;
366
367 int NumEntries ;
368 double *RowValues;
369 int *ColIndices;
370 int Ai_index = 0 ;
371 int MyRow;
372 for ( MyRow = 0; MyRow <numrows; MyRow++ ) {
373 int status = Phase5Mat.ExtractMyRowView( MyRow, NumEntries, RowValues, ColIndices ) ;
374 assert( status == 0 ) ;
375 Ap[MyRow] = Ai_index ;
376 for ( int j = 0; j < NumEntries; j++ ) {
377 Ai[Ai_index] = ColIndices[j] ;
378 Aval[Ai_index] = RowValues[j] ;
379 Ai_index++;
380 }
381 }
382 assert( numrows == MyRow );
383 Ap[ numrows ] = Ai_index ;
384 }
385
386 //
387 // Step 7) Call SuperLUdist
388 //
389
390 //
391 // This really belongs somewhere else - perhaps in the constructor
392 //
393 if ( factor ) {
394 numprocs = Comm.NumProc() ;
396 npcol = numprocs / nprow ;
397 assert ( nprow * npcol == numprocs ) ;
398 superlu_gridinit( MPIC, nprow, npcol, &grid);
399
400#ifdef DEBUG
401 assert( Comm_assert_equal( &Comm, numentries ) );
402 assert( Comm_assert_equal( &Comm, numrows ) );
403 assert( Comm_assert_equal( &Comm, numcols ) );
404#endif
405
406 } else {
407 assert( numprocs == Comm.NumProc() ) ;
408 }
409 int ldb = numrows ;
410 /* Bail out if I do not belong in the grid. */
411 if ( iam < nprow * npcol ) {
412 //
413 // All processes need to have identical values of:
414 // numrows(m), numcols(n), nnz(NumEntries),
415 // Aval(a), Ap(xa), Ai(asub)
416 // double(numentries), int(n+1), int(numentries)
417
418#ifdef DEBUG
419 for ( int ii = 0; ii < min( numentries, 10 ) ; ii++ ) {
420 assert( Comm_assert_equal( &Comm, Aval[ii] ) ) ;
421 }
422 for ( int ii = 0; ii < min( numcols+1, 10 ) ; ii++ ) {
423 assert( Comm_assert_equal( &Comm, Ai[ii] ) );
424 assert( Comm_assert_equal( &Comm, Ap[ii] ) );
425 }
426 for ( int ii = 0; ii < min( numrows, 10 ) ; ii++ ) {
427 assert( Comm_assert_equal( &Comm, bValues[ii] ) );
428 }
429#endif
430
431 if ( factor ) {
432 set_default_options(&options);
433
434 if ( !(berr = doubleMalloc_dist(nrhs)) )
435 EPETRA_CHK_ERR( -1 ) ;
436
437 /* Create compressed column matrix for A. */
438 dCreate_CompCol_Matrix_dist(&A, numrows, numcols,
439 numentries, &Aval[0], &Ai[0],
440 &Ap[0], SLU_NC, SLU_D, SLU_GE);
441 A_and_LU_built = true;
442
443#if 0
444 cout << " Here is A " << endl ;
445 dPrint_CompCol_Matrix_dist( &A );
446 cout << " That was A " << "numrows = " << numrows << endl ;
447 cout << "numcols = " << numcols << endl ;
448#endif
449
450 /* Initialize ScalePermstruct and LUstruct. */
451 ScalePermstructInit(numrows, numcols, &ScalePermstruct);
452 LUstructInit(numrows, numcols, &LUstruct);
453
454 assert( options.Fact == DOFACT );
455 options.Fact = DOFACT ;
456
457 Factored_ = true;
458 } else {
459 assert( Factored_ == true ) ;
460 EPETRA_CHK_ERR( Factored_ == false ) ;
461 options.Fact = FACTORED ;
462 }
463 //
464 // pdgssvx_ABglobal returns x in b, so we copy b into x and pass x as b.
465 //
466 for ( int j = 0 ; j < nrhs; j++ )
467 for ( int i = 0 ; i < numrows; i++ ) xValues[i+j*xLda] = bValues[i+j*xLda];
468
469 /* Initialize the statistics variables. */
470 PStatInit(&stat);
471
472 /* Call the linear equation solver. */
473 int info ;
474 pdgssvx_ABglobal(&options, &A, &ScalePermstruct, &xValues[0],
475 ldb, nrhs, &grid, &LUstruct, berr, &stat, &info);
476 EPETRA_CHK_ERR( info ) ;
477
478 PStatFree(&stat);
479
480 }
481
482
483 //
484 // Step 8) Convert vector x back to a distributed vector
485 //
486 // This is an ugly hack - it should be cleaned up someday
487 //
488 for (int i = 0 ; i < numrows; i++ ) {
489 int lid[1] ;
490 lid[0] = Phase2Matmap.LID( i ) ;
491 if ( lid[0] >= 0 ) {
492 for (int j = 0 ; j < nrhs; j++ ) {
493 vecXvector->ReplaceMyValue( lid[0], j, xValues[i + ldb * j] ) ;
494 }
495 }
496 }
497
498 return(0) ;
499}
int CrsMatrixTranspose(Epetra_CrsMatrix *In, Epetra_CrsMatrix *Out)
#define EPETRA_CHK_ERR(xxx)
int SLU_NumRows(int NumProcs)
SuperludistOO(const Epetra_LinearProblem &LinearProblem)
SuperludistOO Constructor.
vector< int > Ap
virtual ~SuperludistOO(void)
SuperludistOO Destructor.
vector< int > Ai
int Solve(bool Factor)
All computation is performed during the call to Solve()
bool GetTrans() const
Return the transpose flag.
SuperMatrix A
bool FirstCallToSolve_
LUstruct_t LUstruct
ScalePermstruct_t ScalePermstruct
gridinfo_t grid
const Epetra_LinearProblem * Problem_
SuperLUStat_t stat
vector< double > Aval
superlu_options_t options