44#include "Petra_Comm.h"
46#include "Petra_Time.h"
47#include "Petra_BlockMap.h"
48#include "Petra_RDP_MultiVector.h"
49#include "Petra_RDP_Vector.h"
50#include "Petra_RDP_DVBR_Matrix.h"
51#include "Petra_RDP_CRS_Matrix.h"
52#include "Ifpack_ILUK_Graph.h"
53#include "Ifpack_RDP_CRS_RILUK.h"
55#define perror(str) { fprintf(stderr,"%s\n",str); exit(-1); }
56#define perror1(str,ierr) { fprintf(stderr,"%s %d\n",str,ierr); exit(-1); }
57#define double_quote '"'
58void BiCGSTAB(Petra_RDP_CRS_Matrix &A,
61 Ifpack_RDP_CRS_RILUK *M,
64 double *residual, double & FLOPS, bool verbose);
66int main(int argc, char *argv[])
71 int *update; /* vector elements updated on this node. */
72 int *indx; /* MSR format of real and imag parts */
79 double *xguess, *b, *bt, *xexact, *xsolve;
80 int n_nonzeros, n_blk_nonzeros, ierr;
81 int N_update; /* # of block unknowns updated on this node */
82 int numLocalEquations;
83 /* Number scalar equations on this node */
84 int numGlobalEquations, numGlobalBlocks; /* Total number of equations */
86 int *blockSizes, *numNzBlks, *blkColInds;
88 int N_external, N_blk_eqns;
89 int blk_row, *blk_col_inds;
90 int row, *col_inds, numEntries;
100 int has_global_indices, option;
103 int *proc_config = new int[PAZ_PROC_SIZE];
107 MPI_Init(&argc,&argv);
110 /* get number of processors and the name of this processor */
113 Petra_Comm& Comm = *new Petra_Comm(MPI_COMM_WORLD);
115 Petra_Comm& Comm = *new Petra_Comm();
118 printf("proc %d of %d is alive\n",
119 Comm.MyPID(),Comm.NumProc());
121 int MyPID = Comm.MyPID();
123 bool verbose = false;
124 if (MyPID==0) verbose = true;
127/* Still need Aztec proc info for the HB routines, can switch to Petra later */
130 PAZ_set_proc_config(proc_config,MPI_COMM_WORLD);
132 PAZ_set_proc_config(proc_config,0);
139 perror("error: enter name of data and partition file on command line, followed by levelfill and shift value") ;
141 if(argc != 5) perror("error: enter name of data file on command line, followed by levelfill and shift value") ;
143 /* Set exact solution to NULL */
146 /* Read matrix file and distribute among processors.
147 Returns with this processor's set of rows */
151 read_hb(argv[1], &numGlobalEquations, &n_nonzeros,
152 &val_msr, &bindx_msr, &xguess, &b, &bt, &xexact);
154 create_vbr(argv[2], proc_config, &numGlobalEquations, &numGlobalBlocks,
155 &n_nonzeros, &n_blk_nonzeros, &N_update, &update,
156 bindx_msr, val_msr, &val, &indx,
157 &rpntr, &cpntr, &bpntr, &bindx);
159 if(proc_config[PAZ_node] == 0)
161 free ((void *) val_msr);
162 free ((void *) bindx_msr);
163 free ((void *) cpntr);
165 matrix_type = PAZ_VBR_MATRIX;
169 distrib_vbr_matrix( proc_config, numGlobalEquations, numGlobalBlocks,
170 &n_nonzeros, &n_blk_nonzeros,
172 &val, &indx, &rpntr, &cpntr, &bpntr, &bindx,
173 &xguess, &b, &bt, &xexact);
177 read_hb(argv[1], &numGlobalEquations, &n_nonzeros,
178 &val, &bindx, &xguess, &b, &bt, &xexact);
182 distrib_msr_matrix(proc_config, &numGlobalEquations, &n_nonzeros, &N_update,
183 &update, &val, &bindx, &xguess, &b, &bt, &xexact);
186 for (i = 0; i<N_update; i++)
187 if (val[i] == 0.0 ) printf("Zero diagonal at row %d\n",i);
189 matrix_type = PAZ_MSR_MATRIX;
193 numLocalEquations = rpntr[N_update];
195 numLocalEquations = N_update;
200 /******** Make integer data structures needed for this interface *********/
202 /* Make blockSizes - number of equations in each block row on this proc */
204 numLocalBlocks = N_update;
206 blockSizes = new int[numLocalBlocks];
207 for (i=0; i<numLocalBlocks; i++)
208 blockSizes[i] = rpntr[i+1]-rpntr[i];
210 /* Make numNzBlks - number of block entries in each block row */
212 numNzBlks = new int[numLocalBlocks];
213 for (i=0; i<numLocalBlocks; i++)
214 numNzBlks[i] = bpntr[i+1] - bpntr[i];
216 /* Make blkColInds - Exactly bindx (just copy pointer) */
220 Petra_BlockMap& map = *new Petra_BlockMap(numGlobalEquations, numLocalEquations,
221 update, indexBase, Comm, numGlobalBlocks, numLocalBlocks,
224 Petra_RDP_DVBR_Matrix& A = *new Petra_RDP_DVBR_Matrix(map);
226 if ((ierr=A.allocate(numNzBlks, blkColInds)))
227 perror1("Error in DVBR_Matrix_allocate:",ierr);
229 /* Add block rows one-at-a-time */
231 for (blk_row=0; blk_row<numLocalBlocks; blk_row++)
233 row_vals = val + indx[bpntr[blk_row]];
234 blk_col_inds = bindx + bpntr[blk_row];
235 if ((ierr=A.putBlockRow(update[blk_row], numNzBlks[blk_row], row_vals,
238 printf("Row %d:",update[row]);
239 perror1("Error putting block row:",ierr);
243 if ((ierr=A.FillComplete()))
244 perror1("Error in DVBR_Matrix_FillComplete:",ierr);
247 /* Make numNzBlks - number of block entries in each block row */
249 numNz = new int[numLocalEquations];
250 for (i=0; i<numLocalEquations; i++) numNz[i] = bindx[i+1] - bindx[i];
252 /* Make ColInds - Exactly bindx, offset by diag (just copy pointer) */
253 ColInds = bindx+numLocalEquations+1;
255 Petra_Map& map = *new Petra_Map(numGlobalEquations, numLocalEquations,
259 Petra_Time & FillTimer = *new Petra_Time(Comm);
260 Petra_RDP_CRS_Matrix& A = *new Petra_RDP_CRS_Matrix(Copy, map, numNz);
262 /* Add rows one-at-a-time */
264 for (row=0; row<numLocalEquations; row++)
266 row_vals = val + bindx[row];
267 col_inds = bindx + bindx[row];
268 numEntries = bindx[row+1] - bindx[row];
269 if ((ierr = A.InsertGlobalValues(update[row], numEntries, row_vals, col_inds)))
271 printf("Row %d:",update[row]);
272 perror1("Error putting row:",ierr);
274 if ((ierr=(A.InsertGlobalValues(update[row], 1, val+row, update+row)<0)))
275 perror1("Error putting diagonal",ierr);
278 double FillTime = FillTimer.ElapsedTime();
279 if ((ierr=A.FillComplete()))
280 perror1("Error in Petra_RDP_Vector_fillComplete",ierr);
281 double FillCompleteTime = FillTimer.ElapsedTime() - FillTime;
282 if (Comm.MyPID()==0) {
283 cout << "\n\n****************************************************" << endl;
284 cout << "\n\nMatrix construction time (sec) = " << FillTime<< endl;
285 cout << " Matrix FillComplete time (sec) = " << FillCompleteTime << endl;
286 cout << " Total construction time (sec) = " << FillTime+FillCompleteTime << endl<< endl;
295 // Make a second x and b vector that are 2x original x and b; make into a 2-vector.
298 double **allx = new double*[nrhs];
299 double *xexact2 = new double[numLocalEquations];
300 for (i = 0;i<numLocalEquations; i++) xexact2[i] = 2.0 * xexact[i];
301 allx[0] = xexact; allx[1] = xexact2;
303 double **allb = new double*[nrhs];
304 double *b2 = new double[numLocalEquations];
305 for (i = 0;i<numLocalEquations; i++) b2[i] = 2.0 * b[i];
306 allb[0] = b; allb[1] = b2;
308 double **allbt = new double*[nrhs];
309 double *bt2 = new double[numLocalEquations];
310 for (i = 0;i<numLocalEquations; i++) bt2[i] = 2.0 * bt[i];
311 allbt[0] = bt; allbt[1] = bt2;
313 Petra_RDP_MultiVector& xtrue = *new Petra_RDP_MultiVector(Copy, map, allx, nrhs);
314 Petra_RDP_MultiVector& bexact = *new Petra_RDP_MultiVector(Copy, map, allb, nrhs);
315 Petra_RDP_MultiVector& btexact = *new Petra_RDP_MultiVector(Copy, map, allbt, nrhs);
316 Petra_RDP_MultiVector& bcomp = *new Petra_RDP_MultiVector(map, nrhs);
317 Petra_RDP_MultiVector& xcomp = *new Petra_RDP_MultiVector(map, nrhs);
318 Petra_RDP_MultiVector& resid = *new Petra_RDP_MultiVector(map, nrhs);
322 Petra_RDP_Vector& xtrue = *new Petra_RDP_Vector(Copy, map, xexact);
323 Petra_RDP_Vector& bexact = *new Petra_RDP_Vector(Copy, map, b);
324 Petra_RDP_Vector& btexact = *new Petra_RDP_Vector(Copy, map, bt);
325 Petra_RDP_Vector& bcomp = *new Petra_RDP_Vector(map);
326 Petra_RDP_Vector& xcomp = *new Petra_RDP_Vector(map);
327 Petra_RDP_Vector& resid = *new Petra_RDP_Vector(map);
329#endif /* MULTI_VECTOR */
333 // Construct ILU preconditioner
335 double elapsed_time, total_flops, MFLOPs;
336 Petra_Time & timer = *new Petra_Time(Comm);
338 int LevelFill = atoi(argv[argc-3]);
339 if (verbose) cout << "Using Level Fill = " << LevelFill << endl;
340 double ShiftValue = atof(argv[argc-2]);
341 if (verbose) cout << "Using Diagonal Shift Value of = " << ShiftValue << endl;
342 int NumThreads = atoi(argv[argc-1]);
343 if (verbose) cout << "Using " << NumThreads << " Threads " << endl;
345 Ifpack_RDP_CRS_RILUK * ILUK = 0;
346 Ifpack_ILUK_Graph * ILUK_Graph = 0;
348 elapsed_time = timer.ElapsedTime();
349 ILUK_Graph = new Ifpack_ILUK_Graph(A.Graph(), LevelFill);
350 assert(ILUK_Graph->ConstructFilledGraph()==0);
351 assert(ILUK_Graph->ComputeLevels(NumThreads)==0);
352 elapsed_time = timer.ElapsedTime() - elapsed_time;
353 if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl;
357 elapsed_time = timer.ElapsedTime();
358 ILUK = new Ifpack_RDP_CRS_RILUK(A, *ILUK_Graph);
359 ILUK->SetShiftValue(ShiftValue);
360 assert(ILUK->InitValues()==0);
361 assert(ILUK->Factor()==0);
362 elapsed_time = timer.ElapsedTime() - elapsed_time;
363 total_flops = ILUK->Flops();
364 MFLOPs = total_flops/elapsed_time/1000000.0;
365 if (verbose) cout << "Time to compute preconditioner values = "
366 << elapsed_time << endl
367 << "MFLOPS for Factorization = " << MFLOPs << endl;
371 double Tolerance = 1.0E-14;
372 double * residual = new double[nrhs];
375 elapsed_time = timer.ElapsedTime();
377 BiCGSTAB(A, xcomp, bexact, ILUK, Maxiter, Tolerance, residual, total_flops, verbose);
379 elapsed_time = timer.ElapsedTime() - elapsed_time;
380 MFLOPs = total_flops/elapsed_time/1000000.0;
381 if (verbose) cout << "Time to compute solution = "
382 << elapsed_time << endl
383 << "Number of operations in solve = " << total_flops << endl
384 << "MFLOPS for Solve = " << MFLOPs<< endl << endl;
390 for (ii=0; ii<2; ii++) {
391 bool TransA = (ii==1);
393 elapsed_time = timer.ElapsedTime();
394 for (iii=0; iii<NumMVs; iii++)
395 if ((ierr=A.Multiply(TransA, xcomp, bcomp))) perror1("Error in matvec",ierr);
396 elapsed_time = timer.ElapsedTime() - elapsed_time;
397 total_flops = A.Flops();
398 MFLOPs = total_flops/elapsed_time/1000000.0;
399 if (Comm.MyPID()==0) {
401 cout << "\n\n****************************************************" << endl;
402 cout << "\n\nResults for transpose multiply with standard storage" << endl;
405 cout << "\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs
406 << " Matrix-Multiplies " << endl;
407 cout << "\n\n*******************************************************" << endl;
408 cout << "\n\nResults for no transpose multiply with standard storage" << endl;
411 cout << "\n\nMatrix Fill cost = " << (FillTime+FillCompleteTime)/elapsed_time*NumMVs
412 << " Matrix-Multiplies " << endl;
413 cout << "\n\nTotal FLOPS for standard Storage (" <<NumMVs<< " Multiplies) = "
414 << total_flops<< endl;
415 cout << " Total time for standard Storage = " << elapsed_time<< endl;
416 cout << " Total MFLOPs for standard matrix multiply = " << MFLOPs << endl<< endl;
419 // cout << "Vector bcomp" << bcomp << endl;
422 if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr);}
424 if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr);}
426 if ((ierr = resid.Norm2(residual))) perror1("Error in Norm2",ierr);
429 for (i=0; i< nrhs; i++) printf("Residual[%d] = %22.16g\n",i,residual[i]);
433 // Optimize data layout for memory access
435 if ((ierr=A.OptimizeStorage())) perror1("Error in Petra_RDP_CRS_Matrix: OptimizeStorage",ierr);
437 for (ii=0; ii<2; ii++) {
438 bool TransA = (ii==1);
440 elapsed_time = timer.ElapsedTime();
441 for (iii=0; iii<NumMVs; iii++)
442 if ((ierr=A.Multiply(TransA, xcomp, bcomp)))
443 perror1("Error in Multiply",ierr);
444 elapsed_time = timer.ElapsedTime() - elapsed_time;
445 total_flops = A.Flops();
446 MFLOPs = total_flops/elapsed_time/1000000.0;
447 if (Comm.MyPID()==0) {
448 cout << "\n\n****************************************************" << endl;
449 if (TransA) cout << "\n\nResults for transpose multiply with optimized storage" << endl;
450 else cout << "\n\nResults for no transpose multiply with optimized storage"<< endl;
451 cout << "\n\nTotal FLOPS for optimized storage (" <<NumMVs<< " Multiplies) = "
452 << total_flops<< endl;
453 cout << " Total time for optimized Storage = " << elapsed_time<< endl;
454 cout << " Total MFLOPs for optimized matrix multiply = " << MFLOPs << endl<< endl;
458 if ((ierr=resid.Update(-1.0, btexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr);}
460 if ((ierr=resid.Update(-1.0, bexact, 1.0, bcomp, 0.0))) perror1("Error in Update",ierr); }
462 if ((ierr = resid.Norm2(residual))) perror1("Error in Norm2",ierr);
465 for (i=0; i< nrhs; i++) printf("Residual[%d] = %22.16g\n",i,residual[i]);
469 free ((void *) xguess);
472 free ((void *) xexact);
474 free ((void *) bindx);
475 free ((void *) update);
478 free ((void *) indx);
479 free ((void *) rpntr);
480 free ((void *) bpntr);
481 delete [] blockSizes;
514void BiCGSTAB(Petra_RDP_CRS_Matrix &A,
517 Ifpack_RDP_CRS_RILUK *M,
520 double *residual, double & FLOPS, bool verbose) {
522 // Allocate vectors needed for iterations
523 Petra_RDP_Vector& phat = *new Petra_RDP_Vector(x); phat.PutScalar(0.0);
524 Petra_RDP_Vector& p = *new Petra_RDP_Vector(x); p.PutScalar(0.0);
525 Petra_RDP_Vector& shat = *new Petra_RDP_Vector(x); shat.PutScalar(0.0);
526 Petra_RDP_Vector& s = *new Petra_RDP_Vector(x); s.PutScalar(0.0);
527 Petra_RDP_Vector& r = *new Petra_RDP_Vector(x); r.PutScalar(0.0);
528 Petra_RDP_Vector& rtilde = *new Petra_RDP_Vector(x); rtilde.Random();
529 Petra_RDP_Vector& v = *new Petra_RDP_Vector(x); v.PutScalar(0.0);
532 A.Multiply(false, x, r); // r = A*x
534 r.Update(1.0, b, -1.0); // r = b - A*x
536 double r_norm, b_norm, scaled_r_norm, rhon, rhonm1 = 1.0;
537 double alpha = 1.0, omega = 1.0, sigma;
538 double omega_num, omega_den;
541 scaled_r_norm = r_norm/b_norm;
544 if (verbose) cout << "Initial residual = " << r_norm
545 << " Scaled residual = " << scaled_r_norm << endl;
548 for (int i=0; i<Maxiter; i++) { // Main iteration loop
550 double beta = (rhon/rhonm1) * (alpha/omega);
553 /* p = r + beta*(p - omega*v) */
557 double dtemp = - beta*omega;
559 p.Update(1.0, r, dtemp, v, beta);
563 M->LevelSolve(false, p, phat);
564 A.Multiply(false, phat, v);
567 rtilde.Dot(v,&sigma);
570 /* s = r - alpha*v */
572 /* r = A shat (r is a tmp here for t ) */
574 s.Update(-alpha, v, 1.0, r, 0.0);
578 M->LevelSolve(false, s, shat);
579 A.Multiply(false, shat, r);
581 r.Dot(s, &omega_num);
582 r.Dot(r, &omega_den);
583 omega = omega_num / omega_den;
585 /* x = x + alpha*phat + omega*shat */
586 /* r = s - omega*r */
588 x.Update(alpha, phat, omega, shat, 1.0);
589 r.Update(1.0, s, -omega);
592 scaled_r_norm = r_norm/b_norm;
595 if (verbose) cout << "Iter "<< i << " residual = " << r_norm
596 << " Scaled residual = " << scaled_r_norm << endl;
598 if (scaled_r_norm < Tolerance) break;
601 FLOPS = phat.Flops()+p.Flops()+shat.Flops()+s.Flops()+r.Flops()+rtilde.Flops()+
602 v.Flops()+A.Flops()+x.Flops()+b.Flops();
603 if (M!=0) FLOPS += M->Flops();