Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_analyze.hpp
1/* ========================================================================== */
2/* === klu_analyze ========================================================== */
3/* ========================================================================== */
4// @HEADER
5// ***********************************************************************
6//
7// KLU2: A Direct Linear Solver package
8// Copyright 2011 Sandia Corporation
9//
10// Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11// U.S. Government retains certain rights in this software.
12//
13// This library is free software; you can redistribute it and/or modify
14// it under the terms of the GNU Lesser General Public License as
15// published by the Free Software Foundation; either version 2.1 of the
16// License, or (at your option) any later version.
17//
18// This library is distributed in the hope that it will be useful, but
19// WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21// Lesser General Public License for more details.
22//
23// You should have received a copy of the GNU Lesser General Public
24// License along with this library; if not, write to the Free Software
25// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26// USA
27// Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28//
29// KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30// University of Florida. The Authors of KLU are Timothy A. Davis and
31// Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32// information for KLU.
33//
34// ***********************************************************************
35// @HEADER
36
37/* Order the matrix using BTF (or not), and then AMD, COLAMD, the natural
38 * ordering, or the user-provided-function on the blocks. Does not support
39 * using a given ordering (use klu_analyze_given for that case). */
40
41#ifndef KLU2_ANALYZE_HPP
42#define KLU2_ANALYZE_HPP
43
44#include "klu2_internal.h"
45#include "klu2_analyze_given.hpp"
46#include "klu2_memory.hpp"
47
48/* ========================================================================== */
49/* === analyze_worker ======================================================= */
50/* ========================================================================== */
51
52template <typename Entry, typename Int>
53static Int analyze_worker /* returns KLU_OK or < 0 if error */
54(
55 /* inputs, not modified */
56 Int n, /* A is n-by-n */
57 Int Ap [ ], /* size n+1, column pointers */
58 Int Ai [ ], /* size nz, row indices */
59 Int nblocks, /* # of blocks */
60 Int Pbtf [ ], /* BTF row permutation */
61 Int Qbtf [ ], /* BTF col permutation */
62 Int R [ ], /* size n+1, but only Rbtf [0..nblocks] is used */
63 Int ordering, /* what ordering to use (0, 1, or 3 for this routine) */
64
65 /* output only, not defined on input */
66 Int P [ ], /* size n */
67 Int Q [ ], /* size n */
68 double Lnz [ ], /* size n, but only Lnz [0..nblocks-1] is used */
69
70 /* workspace, not defined on input or output */
71 Int Pblk [ ], /* size maxblock */
72 Int Cp [ ], /* size maxblock+1 */
73 Int Ci [ ], /* size MAX (nz+1, Cilen) */
74 Int Cilen, /* nz+1, or COLAMD_recommend(nz,n,n) for COLAMD */
75 Int Pinv [ ], /* size maxblock */
76
77 /* input/output */
78 KLU_symbolic<Entry, Int> *Symbolic,
79 KLU_common<Entry, Int> *Common
80)
81{
82 double amd_Info [TRILINOS_AMD_INFO], lnz, lnz1, flops, flops1 ;
83 Int k1, k2, nk, k, block, oldcol, pend, newcol, result, pc, p, newrow,
84 maxnz, nzoff, cstats [TRILINOS_COLAMD_STATS], ok, err = KLU_INVALID ;
85
86 /* ---------------------------------------------------------------------- */
87 /* initializations */
88 /* ---------------------------------------------------------------------- */
89 for (k = 0 ; k < TRILINOS_AMD_INFO ; k++)
90 {
91 amd_Info[k] = 0;
92 }
93
94 /* compute the inverse of Pbtf */
95#ifndef NDEBUGKLU2
96 for (k = 0 ; k < n ; k++)
97 {
98 P [k] = AMESOS2_KLU2_EMPTY ;
99 Q [k] = AMESOS2_KLU2_EMPTY ;
100 Pinv [k] = AMESOS2_KLU2_EMPTY ;
101 }
102#endif
103 for (k = 0 ; k < n ; k++)
104 {
105 ASSERT (Pbtf [k] >= 0 && Pbtf [k] < n) ;
106 Pinv [Pbtf [k]] = k ;
107 }
108#ifndef NDEBUGKLU2
109 for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
110#endif
111 nzoff = 0 ;
112 lnz = 0 ;
113 maxnz = 0 ;
114 flops = 0 ;
115 Symbolic->symmetry = AMESOS2_KLU2_EMPTY ; /* only computed by AMD */
116
117 /* ---------------------------------------------------------------------- */
118 /* order each block */
119 /* ---------------------------------------------------------------------- */
120
121 for (block = 0 ; block < nblocks ; block++)
122 {
123
124 /* ------------------------------------------------------------------ */
125 /* the block is from rows/columns k1 to k2-1 */
126 /* ------------------------------------------------------------------ */
127
128 k1 = R [block] ;
129 k2 = R [block+1] ;
130 nk = k2 - k1 ;
131 PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ;
132
133 /* ------------------------------------------------------------------ */
134 /* construct the kth block, C */
135 /* ------------------------------------------------------------------ */
136
137 Lnz [block] = AMESOS2_KLU2_EMPTY ;
138 pc = 0 ;
139 for (k = k1 ; k < k2 ; k++)
140 {
141 newcol = k-k1 ;
142 Cp [newcol] = pc ;
143 oldcol = Qbtf [k] ;
144 pend = Ap [oldcol+1] ;
145 for (p = Ap [oldcol] ; p < pend ; p++)
146 {
147 newrow = Pinv [Ai [p]] ;
148 if (newrow < k1)
149 {
150 nzoff++ ;
151 }
152 else
153 {
154 /* (newrow,newcol) is an entry in the block */
155 ASSERT (newrow < k2) ;
156 newrow -= k1 ;
157 Ci [pc++] = newrow ;
158 }
159 }
160 }
161 Cp [nk] = pc ;
162 maxnz = MAX (maxnz, pc) ;
163 ASSERT (KLU_valid (nk, Cp, Ci, NULL)) ;
164
165 /* ------------------------------------------------------------------ */
166 /* order the block C */
167 /* ------------------------------------------------------------------ */
168
169 if (nk <= 3)
170 {
171
172 /* -------------------------------------------------------------- */
173 /* use natural ordering for tiny blocks (3-by-3 or less) */
174 /* -------------------------------------------------------------- */
175
176 for (k = 0 ; k < nk ; k++)
177 {
178 Pblk [k] = k ;
179 }
180 lnz1 = nk * (nk + 1) / 2 ;
181 flops1 = nk * (nk - 1) / 2 + (nk-1)*nk*(2*nk-1) / 6 ;
182 ok = TRUE ;
183
184 }
185 else if (ordering == 0)
186 {
187
188 /* -------------------------------------------------------------- */
189 /* order the block with AMD (C+C') */
190 /* -------------------------------------------------------------- */
191
192 /*result = AMD_order (nk, Cp, Ci, Pblk, NULL, amd_Info) ;*/
193 result = KLU_OrdinalTraits<Int>::amd_order (nk, Cp, Ci, Pblk,
194 NULL, amd_Info) ;
195 ok = (result >= TRILINOS_AMD_OK) ;
196 if (result == TRILINOS_AMD_OUT_OF_MEMORY)
197 {
198 err = KLU_OUT_OF_MEMORY ;
199 }
200
201 /* account for memory usage in AMD */
202 Common->mempeak = ( size_t) (MAX (Common->mempeak,
203 Common->memusage + amd_Info [TRILINOS_AMD_MEMORY])) ;
204
205 /* get the ordering statistics from AMD */
206 lnz1 = (Int) (amd_Info [TRILINOS_AMD_LNZ]) + nk ;
207 flops1 = 2 * amd_Info [TRILINOS_AMD_NMULTSUBS_LU] + amd_Info [TRILINOS_AMD_NDIV] ;
208 if (pc == maxnz)
209 {
210 /* get the symmetry of the biggest block */
211 Symbolic->symmetry = amd_Info [TRILINOS_AMD_SYMMETRY] ;
212 }
213
214 }
215 else if (ordering == 1)
216 {
217
218 /* -------------------------------------------------------------- */
219 /* order the block with COLAMD (C) */
220 /* -------------------------------------------------------------- */
221
222 /* order (and destroy) Ci, returning column permutation in Cp.
223 * COLAMD "cannot" fail since the matrix has already been checked,
224 * and Ci allocated. */
225
226 /*ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ;*/
227 ok = KLU_OrdinalTraits<Int>::colamd (nk, nk, Cilen, Ci, Cp,
228 NULL, cstats) ;
229 lnz1 = AMESOS2_KLU2_EMPTY ;
230 flops1 = AMESOS2_KLU2_EMPTY ;
231
232 /* copy the permutation from Cp to Pblk */
233 for (k = 0 ; k < nk ; k++)
234 {
235 Pblk [k] = Cp [k] ;
236 }
237
238 }
239 else
240 {
241
242 /* -------------------------------------------------------------- */
243 /* pass the block to the user-provided ordering function */
244 /* -------------------------------------------------------------- */
245
246 lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ;
247 flops1 = AMESOS2_KLU2_EMPTY ;
248 ok = (lnz1 != 0) ;
249 }
250
251 if (!ok)
252 {
253 return (err) ; /* ordering method failed */
254 }
255
256 /* ------------------------------------------------------------------ */
257 /* keep track of nnz(L) and flops statistics */
258 /* ------------------------------------------------------------------ */
259
260 Lnz [block] = lnz1 ;
261 lnz = (lnz == AMESOS2_KLU2_EMPTY || lnz1 == AMESOS2_KLU2_EMPTY) ? AMESOS2_KLU2_EMPTY : (lnz + lnz1) ;
262 flops = (flops == AMESOS2_KLU2_EMPTY || flops1 == AMESOS2_KLU2_EMPTY) ? AMESOS2_KLU2_EMPTY : (flops + flops1) ;
263
264 /* ------------------------------------------------------------------ */
265 /* combine the preordering with the BTF ordering */
266 /* ------------------------------------------------------------------ */
267
268 PRINTF (("Pblk, 1-based:\n")) ;
269 for (k = 0 ; k < nk ; k++)
270 {
271 ASSERT (k + k1 < n) ;
272 ASSERT (Pblk [k] + k1 < n) ;
273 Q [k + k1] = Qbtf [Pblk [k] + k1] ;
274 }
275 for (k = 0 ; k < nk ; k++)
276 {
277 ASSERT (k + k1 < n) ;
278 ASSERT (Pblk [k] + k1 < n) ;
279 P [k + k1] = Pbtf [Pblk [k] + k1] ;
280 }
281 }
282
283 PRINTF (("nzoff %d Ap[n] %d\n", nzoff, Ap [n])) ;
284 ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ;
285
286 /* return estimates of # of nonzeros in L including diagonal */
287 Symbolic->lnz = lnz ; /* AMESOS2_KLU2_EMPTY if COLAMD used */
288 Symbolic->unz = lnz ;
289 Symbolic->nzoff = nzoff ;
290 Symbolic->est_flops = flops ; /* AMESOS2_KLU2_EMPTY if COLAMD or user-ordering used */
291 return (KLU_OK) ;
292}
293
294
295/* ========================================================================== */
296/* === order_and_analyze ==================================================== */
297/* ========================================================================== */
298
299/* Orders the matrix with or with BTF, then orders each block with AMD, COLAMD,
300 * or the user ordering function. Does not handle the natural or given
301 * ordering cases. */
302
303template <typename Entry, typename Int>
304static KLU_symbolic<Entry, Int> *order_and_analyze /* returns NULL if error, or a valid
305 KLU_symbolic object if successful */
306(
307 /* inputs, not modified */
308 Int n, /* A is n-by-n */
309 Int Ap [ ], /* size n+1, column pointers */
310 Int Ai [ ], /* size nz, row indices */
311 /* --------------------- */
312 KLU_common<Entry, Int> *Common
313)
314{
315 double work ;
316 KLU_symbolic<Entry, Int> *Symbolic ;
317 double *Lnz ;
318 Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ;
319 Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen,
320 *Work ;
321
322 /* ---------------------------------------------------------------------- */
323 /* allocate the Symbolic object, and check input matrix */
324 /* ---------------------------------------------------------------------- */
325
326 Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ;
327 if (Symbolic == NULL)
328 {
329 return (NULL) ;
330 }
331 P = Symbolic->P ;
332 Q = Symbolic->Q ;
333 R = Symbolic->R ;
334 Lnz = Symbolic->Lnz ;
335 nz = Symbolic->nz ;
336
337 ordering = Common->ordering ;
338 if (ordering == 1)
339 {
340 /* COLAMD TODO : Should call the right version of COLAMD and other
341 external functions below */
342 /*Cilen = COLAMD_recommended (nz, n, n) ;*/
343 Cilen = KLU_OrdinalTraits<Int>::colamd_recommended (nz, n, n) ;
344 }
345 else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL))
346 {
347 /* AMD or user ordering function */
348 Cilen = nz+1 ;
349 }
350 else
351 {
352 /* invalid ordering */
353 Common->status = KLU_INVALID ;
354 KLU_free_symbolic (&Symbolic, Common) ;
355 return (NULL) ;
356 }
357
358 /* AMD memory management routines */
359 trilinos_amd_malloc = Common->malloc_memory ;
360 trilinos_amd_free = Common->free_memory ;
361 trilinos_amd_calloc = Common->calloc_memory ;
362 trilinos_amd_realloc = Common->realloc_memory ;
363
364 /* ---------------------------------------------------------------------- */
365 /* allocate workspace for BTF permutation */
366 /* ---------------------------------------------------------------------- */
367
368 Pbtf = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
369 Qbtf = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
370 if (Common->status < KLU_OK)
371 {
372 KLU_free (Pbtf, n, sizeof (Int), Common) ;
373 KLU_free (Qbtf, n, sizeof (Int), Common) ;
374 KLU_free_symbolic (&Symbolic, Common) ;
375 return (NULL) ;
376 }
377
378 /* ---------------------------------------------------------------------- */
379 /* get the common parameters for BTF and ordering method */
380 /* ---------------------------------------------------------------------- */
381
382 do_btf = Common->btf ;
383 do_btf = (do_btf) ? TRUE : FALSE ;
384 Symbolic->ordering = ordering ;
385 Symbolic->do_btf = do_btf ;
386 Symbolic->structural_rank = AMESOS2_KLU2_EMPTY ;
387
388 /* ---------------------------------------------------------------------- */
389 /* find the block triangular form (if requested) */
390 /* ---------------------------------------------------------------------- */
391
392 Common->work = 0 ;
393 work = 0;
394
395 if (do_btf)
396 {
397 Work = (Int *) KLU_malloc (5*n, sizeof (Int), Common) ;
398 if (Common->status < KLU_OK)
399 {
400 /* out of memory */
401 KLU_free (Pbtf, n, sizeof (Int), Common) ;
402 KLU_free (Qbtf, n, sizeof (Int), Common) ;
403 KLU_free_symbolic (&Symbolic, Common) ;
404 return (NULL) ;
405 }
406
407 /* TODO : Correct version of BTF */
408 /*nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R,
409 &(Symbolic->structural_rank), Work) ;*/
410 nblocks = KLU_OrdinalTraits<Int>::btf_order (n, Ap, Ai,
411 Common->maxwork, &work, Pbtf, Qbtf, R,
412 &(Symbolic->structural_rank), Work) ;
413 Common->structural_rank = Symbolic->structural_rank ;
414 Common->work += work ;
415
416 KLU_free (Work, 5*n, sizeof (Int), Common) ;
417
418 /* unflip Qbtf if the matrix does not have full structural rank */
419 if (Symbolic->structural_rank < n)
420 {
421 for (k = 0 ; k < n ; k++)
422 {
423 Qbtf [k] = TRILINOS_BTF_UNFLIP (Qbtf [k]) ;
424 }
425 }
426
427 /* find the size of the largest block */
428 maxblock = 1 ;
429 for (block = 0 ; block < nblocks ; block++)
430 {
431 k1 = R [block] ;
432 k2 = R [block+1] ;
433 nk = k2 - k1 ;
434 PRINTF (("block %d size %d\n", block, nk)) ;
435 maxblock = MAX (maxblock, nk) ;
436 }
437 }
438 else
439 {
440 /* BTF not requested */
441 nblocks = 1 ;
442 maxblock = n ;
443 R [0] = 0 ;
444 R [1] = n ;
445 for (k = 0 ; k < n ; k++)
446 {
447 Pbtf [k] = k ;
448 Qbtf [k] = k ;
449 }
450 }
451
452 Symbolic->nblocks = nblocks ;
453
454 PRINTF (("maxblock size %d\n", maxblock)) ;
455 Symbolic->maxblock = maxblock ;
456
457 /* ---------------------------------------------------------------------- */
458 /* allocate more workspace, for analyze_worker */
459 /* ---------------------------------------------------------------------- */
460
461 Pblk = (Int *) KLU_malloc (maxblock, sizeof (Int), Common) ;
462 Cp = (Int *) KLU_malloc (maxblock + 1, sizeof (Int), Common) ;
463 Ci = (Int *) KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ;
464 Pinv = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
465
466 /* ---------------------------------------------------------------------- */
467 /* order each block of the BTF ordering, and a fill-reducing ordering */
468 /* ---------------------------------------------------------------------- */
469
470 if (Common->status == KLU_OK)
471 {
472 PRINTF (("calling analyze_worker\n")) ;
473 Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R,
474 ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ;
475 PRINTF (("analyze_worker done\n")) ;
476 }
477
478 /* ---------------------------------------------------------------------- */
479 /* free all workspace */
480 /* ---------------------------------------------------------------------- */
481
482 KLU_free (Pblk, maxblock, sizeof (Int), Common) ;
483 KLU_free (Cp, maxblock+1, sizeof (Int), Common) ;
484 KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ;
485 KLU_free (Pinv, n, sizeof (Int), Common) ;
486 KLU_free (Pbtf, n, sizeof (Int), Common) ;
487 KLU_free (Qbtf, n, sizeof (Int), Common) ;
488
489 /* ---------------------------------------------------------------------- */
490 /* return the symbolic object */
491 /* ---------------------------------------------------------------------- */
492
493 if (Common->status < KLU_OK)
494 {
495 KLU_free_symbolic (&Symbolic, Common) ;
496 }
497 return (Symbolic) ;
498}
499
500
501/* ========================================================================== */
502/* === KLU_analyze ========================================================== */
503/* ========================================================================== */
504
505template <typename Entry, typename Int>
506KLU_symbolic<Entry, Int> *KLU_analyze /* returns NULL if error, or a valid
507 KLU_symbolic object if successful */
508(
509 /* inputs, not modified */
510 Int n, /* A is n-by-n */
511 Int Ap [ ], /* size n+1, column pointers */
512 Int Ai [ ], /* size nz, row indices */
513 /* -------------------- */
514 KLU_common<Entry, Int> *Common
515)
516{
517
518 /* ---------------------------------------------------------------------- */
519 /* get the control parameters for BTF and ordering method */
520 /* ---------------------------------------------------------------------- */
521
522 if (Common == NULL)
523 {
524 return (NULL) ;
525 }
526 Common->status = KLU_OK ;
527 Common->structural_rank = AMESOS2_KLU2_EMPTY ;
528
529 /* ---------------------------------------------------------------------- */
530 /* order and analyze */
531 /* ---------------------------------------------------------------------- */
532
533 if (Common->ordering == 2)
534 {
535 /* natural ordering */
536 /* TODO : c++ sees Ap, Ai as int *& Why ? */
537 /* TODO : c++ does not allow NULL to be passed directly */
538 Int *dummy = NULL ;
539 return (KLU_analyze_given (n, Ap, Ai, dummy, dummy, Common)) ;
540 }
541 else
542 {
543 /* order with P and Q */
544 return (order_and_analyze (n, Ap, Ai, Common)) ;
545 }
546}
547
548#endif