Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_factor.hpp
1/* ========================================================================== */
2/* === KLU_factor =========================================================== */
3/* ========================================================================== */
4// @HEADER
5// ***********************************************************************
6//
7// KLU2: A Direct Linear Solver package
8// Copyright 2011 Sandia Corporation
9//
10// Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11// U.S. Government retains certain rights in this software.
12//
13// This library is free software; you can redistribute it and/or modify
14// it under the terms of the GNU Lesser General Public License as
15// published by the Free Software Foundation; either version 2.1 of the
16// License, or (at your option) any later version.
17//
18// This library is distributed in the hope that it will be useful, but
19// WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21// Lesser General Public License for more details.
22//
23// You should have received a copy of the GNU Lesser General Public
24// License along with this library; if not, write to the Free Software
25// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26// USA
27// Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28//
29// KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30// University of Florida. The Authors of KLU are Timothy A. Davis and
31// Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32// information for KLU.
33//
34// ***********************************************************************
35// @HEADER
36
37/* Factor the matrix, after ordering and analyzing it with KLU_analyze
38 * or KLU_analyze_given.
39 */
40
41#ifndef KLU2_FACTOR_HPP
42#define KLU2_FACTOR_HPP
43
44#include "klu2_internal.h"
45#include "klu2.hpp"
46#include "klu2_memory.hpp"
47#include "klu2_scale.hpp"
48
49
50/* ========================================================================== */
51/* === KLU_factor2 ========================================================== */
52/* ========================================================================== */
53
54template <typename Entry, typename Int>
55static void factor2
56(
57 /* inputs, not modified */
58 Int Ap [ ], /* size n+1, column pointers */
59 Int Ai [ ], /* size nz, row indices */
60 Entry Ax [ ],
61 KLU_symbolic<Entry, Int> *Symbolic,
62
63 /* inputs, modified on output: */
64 KLU_numeric<Entry, Int> *Numeric,
65 KLU_common<Entry, Int> *Common
66)
67{
68 double lsize ;
69 double *Lnz, *Rs ;
70 Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork,
71 *Lip, *Uip, *Llen, *Ulen ;
72 Entry *Offx, *X, s, *Udiag ;
73 Unit **LUbx ;
74 Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow,
75 nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block,
76 max_unz_block ;
77
78 /* ---------------------------------------------------------------------- */
79 /* initializations */
80 /* ---------------------------------------------------------------------- */
81
82 /* get the contents of the Symbolic object */
83 n = Symbolic->n ;
84 P = Symbolic->P ;
85 Q = Symbolic->Q ;
86 R = Symbolic->R ;
87 Lnz = Symbolic->Lnz ;
88 nblocks = Symbolic->nblocks ;
89 nzoff = Symbolic->nzoff ;
90
91 Pnum = Numeric->Pnum ;
92 Offp = Numeric->Offp ;
93 Offi = Numeric->Offi ;
94 Offx = (Entry *) Numeric->Offx ;
95
96 Lip = Numeric->Lip ;
97 Uip = Numeric->Uip ;
98 Llen = Numeric->Llen ;
99 Ulen = Numeric->Ulen ;
100 LUbx = (Unit **) Numeric->LUbx ;
101 Udiag = (Entry *) Numeric->Udiag ;
102
103 Rs = Numeric->Rs ;
104 Pinv = Numeric->Pinv ;
105 X = (Entry *) Numeric->Xwork ; /* X is of size n */
106 Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */
107 /* 1*maxblock for Pblock */
108 Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ;
109 Common->nrealloc = 0 ;
110 scale = Common->scale ;
111 max_lnz_block = 1 ;
112 max_unz_block = 1 ;
113
114 /* compute the inverse of P from symbolic analysis. Will be updated to
115 * become the inverse of the numerical factorization when the factorization
116 * is done, for use in KLU_refactor */
117#ifndef NDEBUGKLU2
118 for (k = 0 ; k < n ; k++)
119 {
120 Pinv [k] = AMESOS2_KLU2_EMPTY ;
121 }
122#endif
123 for (k = 0 ; k < n ; k++)
124 {
125 ASSERT (P [k] >= 0 && P [k] < n) ;
126 Pinv [P [k]] = k ;
127 }
128#ifndef NDEBUGKLU2
129 for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
130#endif
131
132 lnz = 0 ;
133 unz = 0 ;
134 Common->noffdiag = 0 ;
135 Offp [0] = 0 ;
136
137 /* ---------------------------------------------------------------------- */
138 /* optionally check input matrix and compute scale factors */
139 /* ---------------------------------------------------------------------- */
140
141 if (scale >= 0)
142 {
143 /* use Pnum as workspace. NOTE: scale factors are not yet permuted
144 * according to the final pivot row ordering, so Rs [oldrow] is the
145 * scale factor for A (oldrow,:), for the user's matrix A. Pnum is
146 * used as workspace in KLU_scale. When the factorization is done,
147 * the scale factors are permuted according to the final pivot row
148 * permutation, so that Rs [k] is the scale factor for the kth row of
149 * A(p,q) where p and q are the final row and column permutations. */
150 /*KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ;*/
151 KLU_scale (scale, n, Ap, Ai, Ax, Rs, Pnum, Common) ;
152 if (Common->status < KLU_OK)
153 {
154 /* matrix is invalid */
155 return ;
156 }
157 }
158
159#ifndef NDEBUGKLU2
160 if (scale > 0)
161 {
162 for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ;
163 }
164#endif
165
166 /* ---------------------------------------------------------------------- */
167 /* factor each block using klu */
168 /* ---------------------------------------------------------------------- */
169
170 for (block = 0 ; block < nblocks ; block++)
171 {
172
173 /* ------------------------------------------------------------------ */
174 /* the block is from rows/columns k1 to k2-1 */
175 /* ------------------------------------------------------------------ */
176
177 k1 = R [block] ;
178 k2 = R [block+1] ;
179 nk = k2 - k1 ;
180 PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
181
182 if (nk == 1)
183 {
184
185 /* -------------------------------------------------------------- */
186 /* singleton case */
187 /* -------------------------------------------------------------- */
188
189 poff = Offp [k1] ;
190 oldcol = Q [k1] ;
191 pend = Ap [oldcol+1] ;
192 CLEAR (s) ;
193
194 if (scale <= 0)
195 {
196 /* no scaling */
197 for (p = Ap [oldcol] ; p < pend ; p++)
198 {
199 oldrow = Ai [p] ;
200 newrow = Pinv [oldrow] ;
201 if (newrow < k1)
202 {
203 Offi [poff] = oldrow ;
204 Offx [poff] = Ax [p] ;
205 poff++ ;
206 }
207 else
208 {
209 ASSERT (newrow == k1) ;
210 PRINTF (("singleton block %d", block)) ;
211 PRINT_ENTRY (Ax [p]) ;
212 s = Ax [p] ;
213 }
214 }
215 }
216 else
217 {
218 /* row scaling. NOTE: scale factors are not yet permuted
219 * according to the pivot row permutation, so Rs [oldrow] is
220 * used below. When the factorization is done, the scale
221 * factors are permuted, so that Rs [newrow] will be used in
222 * klu_solve, klu_tsolve, and klu_rgrowth */
223 for (p = Ap [oldcol] ; p < pend ; p++)
224 {
225 oldrow = Ai [p] ;
226 newrow = Pinv [oldrow] ;
227 if (newrow < k1)
228 {
229 Offi [poff] = oldrow ;
230 /* Offx [poff] = Ax [p] / Rs [oldrow] ; */
231 SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ;
232 poff++ ;
233 }
234 else
235 {
236 ASSERT (newrow == k1) ;
237 PRINTF (("singleton block %d ", block)) ;
238 PRINT_ENTRY (Ax[p]) ;
239 SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ;
240 }
241 }
242 }
243
244 Udiag [k1] = s ;
245
246 if (IS_ZERO (s))
247 {
248 /* singular singleton */
249 Common->status = KLU_SINGULAR ;
250 Common->numerical_rank = k1 ;
251 Common->singular_col = oldcol ;
252 if (Common->halt_if_singular)
253 {
254 return ;
255 }
256 }
257
258 Offp [k1+1] = poff ;
259 Pnum [k1] = P [k1] ;
260 lnz++ ;
261 unz++ ;
262
263 }
264 else
265 {
266
267 /* -------------------------------------------------------------- */
268 /* construct and factorize the kth block */
269 /* -------------------------------------------------------------- */
270
271 if (Lnz [block] < 0)
272 {
273 /* COLAMD was used - no estimate of fill-in */
274 /* use 10 times the nnz in A, plus n */
275 lsize = -(Common->initmem) ;
276 }
277 else
278 {
279 lsize = Common->initmem_amd * Lnz [block] + nk ;
280 }
281
282 /* allocates 1 arrays: LUbx [block] */
283 Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q,
284 lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1,
285 Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block,
286 X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ;
287
288 if (Common->status < KLU_OK ||
289 (Common->status == KLU_SINGULAR && Common->halt_if_singular))
290 {
291 /* out of memory, invalid inputs, or singular */
292 return ;
293 }
294
295 PRINTF (("\n----------------------- L %d:\n", block)) ;
296 ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ;
297 PRINTF (("\n----------------------- U %d:\n", block)) ;
298 ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ;
299
300 /* -------------------------------------------------------------- */
301 /* get statistics */
302 /* -------------------------------------------------------------- */
303
304 lnz += lnz_block ;
305 unz += unz_block ;
306 max_lnz_block = MAX (max_lnz_block, lnz_block) ;
307 max_unz_block = MAX (max_unz_block, unz_block) ;
308
309 if (Lnz [block] == AMESOS2_KLU2_EMPTY)
310 {
311 /* revise estimate for subsequent factorization */
312 Lnz [block] = MAX (lnz_block, unz_block) ;
313 }
314
315 /* -------------------------------------------------------------- */
316 /* combine the klu row ordering with the symbolic pre-ordering */
317 /* -------------------------------------------------------------- */
318
319 PRINTF (("Pnum, 1-based:\n")) ;
320 for (k = 0 ; k < nk ; k++)
321 {
322 ASSERT (k + k1 < n) ;
323 ASSERT (Pblock [k] + k1 < n) ;
324 Pnum [k + k1] = P [Pblock [k] + k1] ;
325 PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n",
326 k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ;
327 }
328
329 /* the local pivot row permutation Pblock is no longer needed */
330 }
331 }
332 ASSERT (nzoff == Offp [n]) ;
333 PRINTF (("\n------------------- Off diagonal entries:\n")) ;
334 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
335
336 Numeric->lnz = lnz ;
337 Numeric->unz = unz ;
338 Numeric->max_lnz_block = max_lnz_block ;
339 Numeric->max_unz_block = max_unz_block ;
340
341 /* compute the inverse of Pnum */
342#ifndef NDEBUGKLU2
343 for (k = 0 ; k < n ; k++)
344 {
345 Pinv [k] = AMESOS2_KLU2_EMPTY ;
346 }
347#endif
348 for (k = 0 ; k < n ; k++)
349 {
350 ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ;
351 Pinv [Pnum [k]] = k ;
352 }
353#ifndef NDEBUGKLU2
354 for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != AMESOS2_KLU2_EMPTY) ;
355#endif
356
357 /* permute scale factors Rs according to pivotal row order */
358 if (scale > 0)
359 {
360 for (k = 0 ; k < n ; k++)
361 {
362 /* TODO : Check. REAL(X[k]) Can be just X[k] */
363 /* REAL (X [k]) = Rs [Pnum [k]] ; */
364 X [k] = Rs [Pnum [k]] ;
365 }
366 for (k = 0 ; k < n ; k++)
367 {
368 Rs [k] = REAL (X [k]) ;
369 }
370 }
371
372 PRINTF (("\n------------------- Off diagonal entries, old:\n")) ;
373 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
374
375 /* apply the pivot row permutations to the off-diagonal entries */
376 for (p = 0 ; p < nzoff ; p++)
377 {
378 ASSERT (Offi [p] >= 0 && Offi [p] < n) ;
379 Offi [p] = Pinv [Offi [p]] ;
380 }
381
382 PRINTF (("\n------------------- Off diagonal entries, new:\n")) ;
383 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
384
385#ifndef NDEBUGKLU2
386 {
387 PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks));
388 Entry ss, *Udiag = Numeric->Udiag ;
389 for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++)
390 {
391 k1 = R [block] ;
392 k2 = R [block+1] ;
393 nk = k2 - k1 ;
394 PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ;
395 if (nk == 1)
396 {
397 PRINTF (("singleton ")) ;
398 /* ENTRY_PRINT (singleton [block]) ; */
399 ss = Udiag [k1] ;
400 PRINT_ENTRY (ss) ;
401 }
402 else
403 {
404 Int *Lip, *Uip, *Llen, *Ulen ;
405 Unit *LU ;
406 Lip = Numeric->Lip + k1 ;
407 Llen = Numeric->Llen + k1 ;
408 LU = (Unit *) Numeric->LUbx [block] ;
409 PRINTF (("\n---- L block %d\n", block));
410 ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ;
411 Uip = Numeric->Uip + k1 ;
412 Ulen = Numeric->Ulen + k1 ;
413 PRINTF (("\n---- U block %d\n", block)) ;
414 ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ;
415 }
416 }
417 }
418#endif
419}
420
421
422
423/* ========================================================================== */
424/* === KLU_factor =========================================================== */
425/* ========================================================================== */
426
427template <typename Entry, typename Int>
428KLU_numeric<Entry, Int> *KLU_factor /* returns NULL if error, or a valid
429 KLU_numeric object if successful */
430(
431 /* --- inputs --- */
432 Int Ap [ ], /* size n+1, column pointers */
433 Int Ai [ ], /* size nz, row indices */
434 /* TODO : Checked, switch from double to Entry , still make sure works in all cases */
435 Entry Ax [ ],
436 KLU_symbolic<Entry, Int> *Symbolic,
437 /* -------------- */
438 KLU_common<Entry, Int> *Common
439)
440{
441 Int n, nzoff, nblocks, maxblock, k, ok = TRUE ;
442
443 KLU_numeric<Entry, Int> *Numeric ;
444 size_t n1, nzoff1, s, b6, n3 ;
445
446 if (Common == NULL)
447 {
448 return (NULL) ;
449 }
450 Common->status = KLU_OK ;
451 Common->numerical_rank = AMESOS2_KLU2_EMPTY ;
452 Common->singular_col = AMESOS2_KLU2_EMPTY ;
453
454 /* ---------------------------------------------------------------------- */
455 /* get the contents of the Symbolic object */
456 /* ---------------------------------------------------------------------- */
457
458 /* check for a valid Symbolic object */
459 if (Symbolic == NULL)
460 {
461 Common->status = KLU_INVALID ;
462 return (NULL) ;
463 }
464
465 n = Symbolic->n ;
466 nzoff = Symbolic->nzoff ;
467 nblocks = Symbolic->nblocks ;
468 maxblock = Symbolic->maxblock ;
469
470 PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n",
471 n, nzoff, nblocks, maxblock)) ;
472
473 /* ---------------------------------------------------------------------- */
474 /* get control parameters and make sure they are in the proper range */
475 /* ---------------------------------------------------------------------- */
476
477 Common->initmem_amd = MAX (1.0, Common->initmem_amd) ;
478 Common->initmem = MAX (1.0, Common->initmem) ;
479 Common->tol = MIN (Common->tol, 1.0) ;
480 Common->tol = MAX (0.0, Common->tol) ;
481 Common->memgrow = MAX (1.0, Common->memgrow) ;
482
483 /* ---------------------------------------------------------------------- */
484 /* allocate the Numeric object */
485 /* ---------------------------------------------------------------------- */
486
487 /* this will not cause size_t overflow (already checked by KLU_symbolic) */
488 n1 = ((size_t) n) + 1 ;
489 nzoff1 = ((size_t) nzoff) + 1 ;
490
491 Numeric = (KLU_numeric<Entry, Int> *) KLU_malloc (sizeof (KLU_numeric<Entry, Int>), 1, Common) ;
492 if (Common->status < KLU_OK)
493 {
494 /* out of memory */
495 Common->status = KLU_OUT_OF_MEMORY ;
496 return (NULL) ;
497 }
498 Numeric->n = n ;
499 Numeric->nblocks = nblocks ;
500 Numeric->nzoff = nzoff ;
501 Numeric->Pnum = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
502 Numeric->Offp = (Int *) KLU_malloc (n1, sizeof (Int), Common) ;
503 Numeric->Offi = (Int *) KLU_malloc (nzoff1, sizeof (Int), Common) ;
504 Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ;
505
506 Numeric->Lip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
507 Numeric->Uip = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
508 Numeric->Llen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
509 Numeric->Ulen = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
510
511 Numeric->LUsize = (size_t *) KLU_malloc (nblocks, sizeof (size_t), Common) ;
512
513 Numeric->LUbx = (void **) KLU_malloc (nblocks, sizeof (Unit *), Common) ;
514 if (Numeric->LUbx != NULL)
515 {
516 for (k = 0 ; k < nblocks ; k++)
517 {
518 Numeric->LUbx [k] = NULL ;
519 }
520 }
521
522 Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ;
523
524 if (Common->scale > 0)
525 {
526 Numeric->Rs = (double *) KLU_malloc (n, sizeof (double), Common) ;
527 }
528 else
529 {
530 /* no scaling */
531 Numeric->Rs = NULL ;
532 }
533
534 Numeric->Pinv = (Int *) KLU_malloc (n, sizeof (Int), Common) ;
535
536 /* allocate permanent workspace for factorization and solve. Note that the
537 * solver will use an Xwork of size 4n, whereas the factorization codes use
538 * an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest
539 * uses an Xwork of size 2n. Total size is:
540 *
541 * n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry))
542 */
543 s = KLU_mult_size_t (n, sizeof (Entry), &ok) ;
544 n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ;
545 b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ;
546 Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ;
547 Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ;
548 Numeric->Xwork = Numeric->Work ;
549 Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ;
550 if (!ok || Common->status < KLU_OK)
551 {
552 /* out of memory or problem too large */
553 Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ;
554 KLU_free_numeric (&Numeric, Common) ;
555 return (NULL) ;
556 }
557
558 /* ---------------------------------------------------------------------- */
559 /* factorize the blocks */
560 /* ---------------------------------------------------------------------- */
561
562 factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ;
563
564 /* ---------------------------------------------------------------------- */
565 /* return or free the Numeric object */
566 /* ---------------------------------------------------------------------- */
567
568 if (Common->status < KLU_OK)
569 {
570 /* out of memory or inputs invalid */
571 KLU_free_numeric (&Numeric, Common) ;
572 }
573 else if (Common->status == KLU_SINGULAR)
574 {
575 if (Common->halt_if_singular)
576 {
577 /* Matrix is singular, and the Numeric object is only partially
578 * defined because we halted early. This is the default case for
579 * a singular matrix. */
580 KLU_free_numeric (&Numeric, Common) ;
581 }
582 }
583 else if (Common->status == KLU_OK)
584 {
585 /* successful non-singular factorization */
586 Common->numerical_rank = n ;
587 Common->singular_col = n ;
588 }
589 return (Numeric) ;
590}
591
592#endif
void scale(ArrayView< Scalar1 > vals, size_t l, size_t ld, ArrayView< Scalar2 > s)
Scales a 1-D representation of a multivector.