Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Teuchos_BLAS_MP_Vector.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Teuchos: Common Tools Package
5// Copyright (2004) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef _TEUCHOS_BLAS_MP_VECTOR_HPP_
43#define _TEUCHOS_BLAS_MP_VECTOR_HPP_
44
45#include "Teuchos_BLAS.hpp"
46#include "Sacado_MP_Vector.hpp"
47
48namespace Teuchos
49{
50
51namespace details
52{
53
54template <typename Storage>
55class GivensRotator<Sacado::MP::Vector<Storage>, false>
56{
57public:
60
61 void
63 ScalarType *db,
64 ScalarType *c,
65 ScalarType *s) const
66 {
67 typedef ScalarTraits<ScalarType> STS;
68
69 ScalarType r, roe, scale, z, da_scaled, db_scaled;
70 auto m_da = (STS::magnitude(*da) > STS::magnitude(*db));
71 mask_assign(m_da, roe) = {*da, *db};
72
73 scale = STS::magnitude(*da) + STS::magnitude(*db);
74
75 auto m_scale = scale != STS::zero();
76
77 da_scaled = *da;
78 db_scaled = *db;
79
80 *c = *da;
81 *s = *db;
82
83 ScalarType tmp = STS::one();
84 mask_assign(m_scale, tmp) /= scale;
85
86 mask_assign(m_scale, da_scaled) *= tmp;
87 mask_assign(m_scale, db_scaled) *= tmp;
88
89 r = scale * STS::squareroot(da_scaled * da_scaled + db_scaled * db_scaled);
90 auto m_roe = roe < 0;
91 mask_assign(m_roe, r) = -r;
92
93 tmp = STS::one();
94 mask_assign(m_scale, tmp) /= r;
95
96 mask_assign(m_scale, *c) *= tmp;
97 mask_assign(m_scale, *s) *= tmp;
98
99 mask_assign(!m_scale, *c) = STS::one();
100 mask_assign(!m_scale, *s) = STS::zero();
101
102 mask_assign(*c != STS::zero(), z) /= {STS::one(), *c, STS::zero()};
103 mask_assign(!m_scale, z) = STS::zero();
104 mask_assign(m_da, z) = *s;
105
106 *da = r;
107 *db = z;
108 }
109};
110} // namespace details
111} // namespace Teuchos
112
113//namespace Sacado {
114// namespace MP {
115namespace Teuchos
116{
118template <typename OrdinalType, typename Storage>
119class BLAS<OrdinalType, Sacado::MP::Vector<Storage>> : public Teuchos::DefaultBLASImpl<OrdinalType, Sacado::MP::Vector<Storage>>
120{
121
122 typedef typename Teuchos::ScalarTraits<Sacado::MP::Vector<Storage>>::magnitudeType MagnitudeType;
123 typedef typename Sacado::ValueType<Sacado::MP::Vector<Storage>>::type ValueType;
124 typedef typename Sacado::ScalarType<Sacado::MP::Vector<Storage>>::type scalar_type;
126 typedef Teuchos::DefaultBLASImpl<OrdinalType, Sacado::MP::Vector<Storage>> BLASType;
127
128public:
129 template <typename alpha_type, typename A_type>
130 void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo,
131 Teuchos::ETransp transa, Teuchos::EDiag diag,
132 const OrdinalType m, const OrdinalType n,
133 const alpha_type &alpha,
134 const A_type *A, const OrdinalType lda,
135 ScalarType *B, const OrdinalType ldb) const
136 {
138
139 OrdinalType izero = OrdinalTraits<OrdinalType>::zero();
140 OrdinalType ione = OrdinalTraits<OrdinalType>::one();
141 alpha_type alpha_zero = ScalarTraits<alpha_type>::zero();
142 A_type A_zero = ScalarTraits<A_type>::zero();
143 ScalarType B_zero = ScalarTraits<ScalarType>::zero();
144 alpha_type alpha_one = ScalarTraits<alpha_type>::one();
145 ScalarType B_one = ScalarTraits<ScalarType>::one();
146 ScalarType temp;
147 OrdinalType NRowA = m;
148 bool BadArgument = false;
149 bool noUnit = (EDiagChar[diag] == 'N');
150 bool noConj = (ETranspChar[transa] == 'T');
151
152 if (!(ESideChar[side] == 'L'))
153 {
154 NRowA = n;
155 }
156
157 // Quick return.
158 if (n == izero || m == izero)
159 {
160 return;
161 }
162 if (m < izero)
163 {
164 std::cout << "BLAS::TRSM Error: M == " << m << std::endl;
165 BadArgument = true;
166 }
167 if (n < izero)
168 {
169 std::cout << "BLAS::TRSM Error: N == " << n << std::endl;
170 BadArgument = true;
171 }
172 if (lda < NRowA)
173 {
174 std::cout << "BLAS::TRSM Error: LDA < " << NRowA << std::endl;
175 BadArgument = true;
176 }
177 if (ldb < m)
178 {
179 std::cout << "BLAS::TRSM Error: LDB < MAX(1,M)" << std::endl;
180 BadArgument = true;
181 }
182
183 if (!BadArgument)
184 {
185 int i, j, k;
186 // Set the solution to the zero vector.
187 auto alpha_is_zero = (alpha == alpha_zero);
188 for (j = izero; j < n; j++)
189 {
190 for (i = izero; i < m; i++)
191 {
192 mask_assign(alpha_is_zero, B[j * ldb + i]) = B_zero;
193 }
194 }
195
196 auto alpha_is_not_one = (alpha != alpha_one);
197
198 { // Start the operations.
199 if (ESideChar[side] == 'L')
200 {
201 //
202 // Perform computations for OP(A)*X = alpha*B
203 //
204 if (ETranspChar[transa] == 'N')
205 {
206 //
207 // Compute B = alpha*inv( A )*B
208 //
209 if (EUploChar[uplo] == 'U')
210 {
211 // A is upper triangular.
212 for (j = izero; j < n; j++)
213 {
214 // Perform alpha*B if alpha is not 1.
215 for (i = izero; i < m; i++)
216 {
217 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
218 }
219
220 // Perform a backsolve for column j of B.
221 for (k = (m - ione); k > -ione; k--)
222 {
223 // If this entry is zero, we don't have to do anything.
224 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
225
226 if (noUnit)
227 {
228 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
229 }
230 for (i = izero; i < k; i++)
231 {
232 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
233 }
234 }
235 }
236 }
237 else
238 { // A is lower triangular.
239 for (j = izero; j < n; j++)
240 {
241 // Perform alpha*B if alpha is not 1.
242 for (i = izero; i < m; i++)
243 {
244 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
245 }
246 // Perform a forward solve for column j of B.
247 for (k = izero; k < m; k++)
248 {
249 // If this entry is zero, we don't have to do anything.
250 auto B_is_not_zero = (B[j * ldb + k] != B_zero);
251 if (noUnit)
252 {
253 mask_assign(B_is_not_zero, B[j * ldb + k]) /= A[k * lda + k];
254 }
255 for (i = k + ione; i < m; i++)
256 {
257 mask_assign(B_is_not_zero, B[j * ldb + i]) -= B[j * ldb + k] * A[k * lda + i];
258 }
259 }
260 }
261 } // end if (uplo == 'U')
262 } // if (transa =='N')
263 else
264 {
265 //
266 // Compute B = alpha*inv( A' )*B
267 // or B = alpha*inv( conj(A') )*B
268 //
269 if (EUploChar[uplo] == 'U')
270 {
271 // A is upper triangular.
272 for (j = izero; j < n; j++)
273 {
274 for (i = izero; i < m; i++)
275 {
276 temp = alpha * B[j * ldb + i];
277 if (noConj)
278 {
279 for (k = izero; k < i; k++)
280 {
281 temp -= A[i * lda + k] * B[j * ldb + k];
282 }
283 if (noUnit)
284 {
285 temp /= A[i * lda + i];
286 }
287 }
288 else
289 {
290 for (k = izero; k < i; k++)
291 {
292 temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[j * ldb + k];
293 }
294 if (noUnit)
295 {
296 temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
297 }
298 }
299 B[j * ldb + i] = temp;
300 }
301 }
302 }
303 else
304 { // A is lower triangular.
305 for (j = izero; j < n; j++)
306 {
307 for (i = (m - ione); i > -ione; i--)
308 {
309 temp = alpha * B[j * ldb + i];
310 if (noConj)
311 {
312 for (k = i + ione; k < m; k++)
313 {
314 temp -= A[i * lda + k] * B[j * ldb + k];
315 }
316 if (noUnit)
317 {
318 temp /= A[i * lda + i];
319 }
320 }
321 else
322 {
323 for (k = i + ione; k < m; k++)
324 {
325 temp -= ScalarTraits<A_type>::conjugate(A[i * lda + k]) * B[j * ldb + k];
326 }
327 if (noUnit)
328 {
329 temp /= ScalarTraits<A_type>::conjugate(A[i * lda + i]);
330 }
331 }
332 B[j * ldb + i] = temp;
333 }
334 }
335 }
336 }
337 } // if (side == 'L')
338 else
339 {
340 // side == 'R'
341 //
342 // Perform computations for X*OP(A) = alpha*B
343 //
344 if (ETranspChar[transa] == 'N')
345 {
346 //
347 // Compute B = alpha*B*inv( A )
348 //
349 if (EUploChar[uplo] == 'U')
350 {
351 // A is upper triangular.
352 // Perform a backsolve for column j of B.
353 for (j = izero; j < n; j++)
354 {
355 // Perform alpha*B if alpha is not 1.
356 for (i = izero; i < m; i++)
357 {
358 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
359 }
360 for (k = izero; k < j; k++)
361 {
362 // If this entry is zero, we don't have to do anything.
363 auto A_is_not_zero = (A[j * lda + k] != A_zero);
364 for (i = izero; i < m; i++)
365 {
366 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
367 }
368 }
369 if (noUnit)
370 {
371 temp = B_one / A[j * lda + j];
372 for (i = izero; i < m; i++)
373 {
374 B[j * ldb + i] *= temp;
375 }
376 }
377 }
378 }
379 else
380 { // A is lower triangular.
381 for (j = (n - ione); j > -ione; j--)
382 {
383 // Perform alpha*B if alpha is not 1.
384 for (i = izero; i < m; i++)
385 {
386 mask_assign(alpha_is_not_one, B[j * ldb + i]) *= alpha;
387 }
388
389 // Perform a forward solve for column j of B.
390 for (k = j + ione; k < n; k++)
391 {
392 // If this entry is zero, we don't have to do anything.
393 auto A_is_not_zero = (A[j * lda + k] != A_zero);
394 for (i = izero; i < m; i++)
395 {
396 mask_assign(A_is_not_zero, B[j * ldb + i]) -= A[j * lda + k] * B[k * ldb + i];
397 }
398 }
399 if (noUnit)
400 {
401 temp = B_one / A[j * lda + j];
402 for (i = izero; i < m; i++)
403 {
404 B[j * ldb + i] *= temp;
405 }
406 }
407 }
408 } // end if (uplo == 'U')
409 } // if (transa =='N')
410 else
411 {
412 //
413 // Compute B = alpha*B*inv( A' )
414 // or B = alpha*B*inv( conj(A') )
415 //
416 if (EUploChar[uplo] == 'U')
417 {
418 // A is upper triangular.
419 for (k = (n - ione); k > -ione; k--)
420 {
421 if (noUnit)
422 {
423 if (noConj)
424 temp = B_one / A[k * lda + k];
425 else
426 temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
427 for (i = izero; i < m; i++)
428 {
429 B[k * ldb + i] *= temp;
430 }
431 }
432 for (j = izero; j < k; j++)
433 {
434 auto A_is_not_zero = (A[k * lda + j] != A_zero);
435 if (noConj)
436 mask_assign(A_is_not_zero, temp) = A[k * lda + j];
437 else
438 mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda + j]);
439 for (i = izero; i < m; i++)
440 {
441 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
442 }
443 }
444 for (i = izero; i < m; i++)
445 {
446 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
447 }
448 }
449 }
450 else
451 { // A is lower triangular.
452 for (k = izero; k < n; k++)
453 {
454 if (noUnit)
455 {
456 if (noConj)
457 temp = B_one / A[k * lda + k];
458 else
459 temp = B_one / ScalarTraits<A_type>::conjugate(A[k * lda + k]);
460 for (i = izero; i < m; i++)
461 {
462 B[k * ldb + i] *= temp;
463 }
464 }
465 for (j = k + ione; j < n; j++)
466 {
467 auto A_is_not_zero = (A[k * lda + j] != A_zero);
468 if (noConj)
469 mask_assign(A_is_not_zero, temp) = A[k * lda + j];
470 else
471 mask_assign(A_is_not_zero, temp) = ScalarTraits<A_type>::conjugate(A[k * lda + j]);
472 for (i = izero; i < m; i++)
473 {
474 mask_assign(A_is_not_zero, B[j * ldb + i]) -= temp * B[k * ldb + i];
475 }
476 }
477 for (i = izero; i < m; i++)
478 {
479 mask_assign(alpha_is_not_one, B[k * ldb + i]) *= alpha;
480 }
481 }
482 }
483 }
484 }
485 }
486 }
487 }
488}; // class BLAS
489// }
490//}
491
492} // namespace Teuchos
493
494#endif // _TEUCHOS_BLAS__MP_VECTOR_HPP_
KOKKOS_INLINE_FUNCTION MaskedAssign< scalar > mask_assign(bool b, scalar *s)
Teuchos::DefaultBLASImpl< OrdinalType, Sacado::MP::Vector< Storage > > BLASType
Sacado::ScalarType< Sacado::MP::Vector< Storage > >::type scalar_type
Sacado::ValueType< Sacado::MP::Vector< Storage > >::type ValueType
Teuchos::ScalarTraits< Sacado::MP::Vector< Storage > >::magnitudeType MagnitudeType
void TRSM(Teuchos::ESide side, Teuchos::EUplo uplo, Teuchos::ETransp transa, Teuchos::EDiag diag, const OrdinalType m, const OrdinalType n, const alpha_type &alpha, const A_type *A, const OrdinalType lda, ScalarType *B, const OrdinalType ldb) const
void ROTG(ScalarType *da, ScalarType *db, ScalarType *c, ScalarType *s) const