Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_BlockView.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_BLOCKVIEW_HPP
43#define TPETRA_BLOCKVIEW_HPP
44
52
53#include "TpetraCore_config.h"
54#include "Kokkos_ArithTraits.hpp"
55#include "Kokkos_Complex.hpp"
56
57namespace Tpetra {
58
63
64namespace Impl {
65
72template<class ViewType1,
73 class ViewType2,
74 const int rank1 = ViewType1::rank>
75struct AbsMax {
76 static KOKKOS_INLINE_FUNCTION void
77 run (const ViewType2& Y, const ViewType1& X);
78};
79
84template<class ViewType1,
85 class ViewType2>
86struct AbsMax<ViewType1, ViewType2, 2> {
89 static KOKKOS_INLINE_FUNCTION void
90 run (const ViewType2& Y, const ViewType1& X)
91 {
92 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
93 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
94 typedef typename std::remove_reference<decltype (Y(0,0)) >::type STY;
95 static_assert(! std::is_const<STY>::value,
96 "AbsMax: The type of each entry of Y must be nonconst.");
97 typedef typename std::decay<decltype (X(0,0)) >::type STX;
98 static_assert( std::is_same<STX, STY>::value,
99 "AbsMax: The type of each entry of X and Y must be the same.");
100 typedef Kokkos::Details::ArithTraits<STY> KAT;
101
102 const int numCols = Y.extent (1);
103 const int numRows = Y.extent (0);
104 for (int j = 0; j < numCols; ++j) {
105 for (int i = 0; i < numRows; ++i) {
106 STY& Y_ij = Y(i,j); // use ref here to avoid 2nd op() call on Y
107 const STX X_ij = X(i,j);
108 // NOTE: no std::max (not a CUDA __device__ function); must
109 // cast back up to complex.
110 const auto Y_ij_abs = KAT::abs (Y_ij);
111 const auto X_ij_abs = KAT::abs (X_ij);
112 Y_ij = (Y_ij_abs >= X_ij_abs) ?
113 static_cast<STY> (Y_ij_abs) :
114 static_cast<STY> (X_ij_abs);
115 }
116 }
117 }
118};
119
124template<class ViewType1,
125 class ViewType2>
126struct AbsMax<ViewType1, ViewType2, 1> {
129 static KOKKOS_INLINE_FUNCTION void
130 run (const ViewType2& Y, const ViewType1& X)
131 {
132 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
133 "AbsMax: ViewType1 and ViewType2 must have the same rank.");
134
135 typedef typename std::remove_reference<decltype (Y(0)) >::type STY;
136 static_assert(! std::is_const<STY>::value,
137 "AbsMax: The type of each entry of Y must be nonconst.");
138 typedef typename std::remove_const<typename std::remove_reference<decltype (X(0)) >::type>::type STX;
139 static_assert( std::is_same<STX, STY>::value,
140 "AbsMax: The type of each entry of X and Y must be the same.");
141 typedef Kokkos::Details::ArithTraits<STY> KAT;
142
143 const int numRows = Y.extent (0);
144 for (int i = 0; i < numRows; ++i) {
145 STY& Y_i = Y(i); // use ref here to avoid 2nd op() call on Y
146 const STX X_i = X(i);
147 // NOTE: no std::max (not a CUDA __device__ function); must
148 // cast back up to complex.
149 const auto Y_i_abs = KAT::abs (Y_i);
150 const auto X_i_abs = KAT::abs (X_i);
151 Y_i = (Y_i_abs >= X_i_abs) ?
152 static_cast<STY> (Y_i_abs) :
153 static_cast<STY> (X_i_abs);
154 }
155 }
156};
157
164template<class ViewType1, class ViewType2, const int rank = ViewType1::rank>
165KOKKOS_INLINE_FUNCTION void
166absMax (const ViewType2& Y, const ViewType1& X)
167{
168 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
169 "absMax: ViewType1 and ViewType2 must have the same rank.");
171}
172
177template<class ViewType,
178 class CoefficientType,
179 class IndexType = int,
180 const bool is_contiguous = false,
181 const int rank = ViewType::rank>
182struct SCAL {
183 static KOKKOS_INLINE_FUNCTION void
184 run (const CoefficientType& alpha, const ViewType& x);
185};
186
189template<class ViewType,
190 class CoefficientType,
191 class IndexType>
192struct SCAL<ViewType, CoefficientType, IndexType, false, 1> {
194 static KOKKOS_INLINE_FUNCTION void
195 run (const CoefficientType& alpha, const ViewType& x)
196 {
197 const IndexType numRows = static_cast<IndexType> (x.extent (0));
198
200#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
201#pragma unroll
202#endif
203 for (IndexType i = 0; i < numRows; ++i)
204 x(i) = alpha * x(i);
205 }
206};
209template<class ViewType,
210 class CoefficientType,
211 class IndexType>
212struct SCAL<ViewType, CoefficientType, IndexType, false, 2> {
214 static KOKKOS_INLINE_FUNCTION void
215 run (const CoefficientType& alpha, const ViewType& A)
216 {
217 const IndexType numRows = static_cast<IndexType> (A.extent (0));
218 const IndexType numCols = static_cast<IndexType> (A.extent (1));
219
220 for (IndexType j = 0; j < numCols; ++j)
221 for (IndexType i = 0; i < numRows; ++i)
222 A(i,j) = alpha * A(i,j);
223 }
224};
225template<class ViewType,
226 class CoefficientType,
227 class IndexType,
228 const int rank>
229struct SCAL<ViewType, CoefficientType, IndexType, true, rank> {
231 static KOKKOS_INLINE_FUNCTION void
232 run (const CoefficientType& alpha, const ViewType& x)
233 {
234 using x_value_type = typename std::decay<decltype (*x.data()) >::type;
235 const IndexType span = static_cast<IndexType> (x.span());
236 x_value_type *__restrict__ x_ptr(x.data());
237#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
238#pragma unroll
239#endif
240 for (IndexType i = 0; i < span; ++i)
241 x_ptr[i] = alpha * x_ptr[i];
242 }
243};
244
249template<class ViewType,
250 class InputType,
251 class IndexType = int,
252 const bool is_contiguous = false,
253 const int rank = ViewType::rank>
254struct FILL {
255 static KOKKOS_INLINE_FUNCTION void
256 run (const ViewType& x, const InputType& val);
257};
258
261template<class ViewType,
262 class InputType,
263 class IndexType>
264struct FILL<ViewType, InputType, IndexType, false, 1> {
265 static KOKKOS_INLINE_FUNCTION void
266 run (const ViewType& x, const InputType& val)
267 {
268 const IndexType numRows = static_cast<IndexType> (x.extent (0));
269 for (IndexType i = 0; i < numRows; ++i)
270 x(i) = val;
271 }
272};
275template<class ViewType,
276 class InputType,
277 class IndexType>
278struct FILL<ViewType, InputType, IndexType, false, 2> {
279 static KOKKOS_INLINE_FUNCTION void
280 run (const ViewType& X, const InputType& val)
281 {
282 const IndexType numRows = static_cast<IndexType> (X.extent (0));
283 const IndexType numCols = static_cast<IndexType> (X.extent (1));
284 for (IndexType j = 0; j < numCols; ++j)
285 for (IndexType i = 0; i < numRows; ++i)
286 X(i,j) = val;
287 }
288};
289template<class ViewType,
290 class InputType,
291 class IndexType,
292 const int rank>
293struct FILL<ViewType, InputType, IndexType, true, rank> {
294 static KOKKOS_INLINE_FUNCTION void
295 run (const ViewType& x, const InputType& val)
296 {
297 const IndexType span = static_cast<IndexType> (x.span());
298 auto x_ptr = x.data();
299#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
300#pragma unroll
301#endif
302 for (IndexType i = 0; i < span; ++i)
303 x_ptr[i] = val;
304 }
305};
306
307
312template<class CoefficientType,
313 class ViewType1,
314 class ViewType2,
315 class IndexType = int,
316 const bool is_contiguous = false,
317 const int rank = ViewType1::rank>
318struct AXPY {
319 static KOKKOS_INLINE_FUNCTION void
320 run (const CoefficientType& alpha,
321 const ViewType1& x,
322 const ViewType2& y);
323};
324
327template<class CoefficientType,
328 class ViewType1,
329 class ViewType2,
330 class IndexType>
331struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 1> {
333 static KOKKOS_INLINE_FUNCTION void
334 run (const CoefficientType& alpha,
335 const ViewType1& x,
336 const ViewType2& y)
337 {
338 using Kokkos::Details::ArithTraits;
339 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
340 "AXPY: x and y must have the same rank.");
341
342 const IndexType numRows = static_cast<IndexType> (y.extent (0));
343 if (alpha != ArithTraits<CoefficientType>::zero ()) {
345 for (IndexType i = 0; i < numRows; ++i)
346 y(i) += alpha * x(i);
347 }
348 }
349};
350
353template<class CoefficientType,
354 class ViewType1,
355 class ViewType2,
356 class IndexType>
357struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, false, 2> {
359 static KOKKOS_INLINE_FUNCTION void
360 run (const CoefficientType& alpha,
361 const ViewType1& X,
362 const ViewType2& Y)
363 {
364 using Kokkos::Details::ArithTraits;
365 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
366 "AXPY: X and Y must have the same rank.");
367 const IndexType numRows = static_cast<IndexType> (Y.extent (0));
368 const IndexType numCols = static_cast<IndexType> (Y.extent (1));
369
370 if (alpha != ArithTraits<CoefficientType>::zero ()) {
371 for (IndexType j = 0; j < numCols; ++j)
372 for (IndexType i = 0; i < numRows; ++i)
373 Y(i,j) += alpha * X(i,j);
374 }
375 }
376};
377
378template<class CoefficientType,
379 class ViewType1,
380 class ViewType2,
381 class IndexType,
382 const int rank>
383struct AXPY<CoefficientType, ViewType1, ViewType2, IndexType, true, rank> {
385 static KOKKOS_INLINE_FUNCTION void
386 run (const CoefficientType& alpha,
387 const ViewType1& x,
388 const ViewType2& y)
389 {
390 using Kokkos::Details::ArithTraits;
391 static_assert (static_cast<int> (ViewType1::rank) == static_cast<int> (ViewType2::rank),
392 "AXPY: x and y must have the same rank.");
393
394 if (alpha != ArithTraits<CoefficientType>::zero ()) {
395 using x_value_type = typename std::decay<decltype (*x.data()) >::type;
396 using y_value_type = typename std::decay<decltype (*y.data()) >::type;
397 const IndexType span = static_cast<IndexType> (y.span());
398 const x_value_type *__restrict__ x_ptr(x.data());
399 y_value_type *__restrict__ y_ptr(y.data());
400#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
401#pragma unroll
402#endif
403 for (IndexType i = 0; i < span; ++i)
404 y_ptr[i] += alpha * x_ptr[i];
405 }
406 }
407};
408
413template<class ViewType1,
414 class ViewType2,
415 class IndexType = int,
416 const bool is_contiguous = false,
417 const int rank = ViewType1::rank>
418struct COPY {
419 static KOKKOS_INLINE_FUNCTION void
420 run (const ViewType1& x, const ViewType2& y);
421};
422
425template<class ViewType1,
426 class ViewType2,
427 class IndexType>
428struct COPY<ViewType1, ViewType2, IndexType, false, 1> {
430 static KOKKOS_INLINE_FUNCTION void
431 run (const ViewType1& x, const ViewType2& y)
432 {
433 const IndexType numRows = static_cast<IndexType> (x.extent (0));
435 for (IndexType i = 0; i < numRows; ++i)
436 y(i) = x(i);
437 }
438};
439
442template<class ViewType1,
443 class ViewType2,
444 class IndexType>
445struct COPY<ViewType1, ViewType2, IndexType, false, 2> {
447 static KOKKOS_INLINE_FUNCTION void
448 run (const ViewType1& X, const ViewType2& Y)
449 {
450 const IndexType numRows = static_cast<IndexType> (Y.extent (0));
451 const IndexType numCols = static_cast<IndexType> (Y.extent (1));
453 for (IndexType j = 0; j < numCols; ++j)
454 for (IndexType i = 0; i < numRows; ++i)
455 Y(i,j) = X(i,j);
456 }
457};
458
459template<class ViewType1,
460 class ViewType2,
461 class IndexType,
462 const int rank>
463struct COPY<ViewType1, ViewType2, IndexType, true, rank> {
465 static KOKKOS_INLINE_FUNCTION void
466 run (const ViewType1& x, const ViewType2& y)
467 {
468 const IndexType span = static_cast<IndexType> (x.span());
469 using x_value_type = typename std::decay<decltype (*x.data()) >::type;
470 using y_value_type = typename std::decay<decltype (*y.data()) >::type;
471 const x_value_type *__restrict__ x_ptr(x.data());
472 y_value_type *__restrict__ y_ptr(y.data());
473
474#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
475#pragma unroll
476#endif
477 for (IndexType i = 0; i < span; ++i)
478 y_ptr[i] = x_ptr[i];
479 }
480};
481
482template<class VecType1,
483 class BlkType,
484 class VecType2,
485 class CoeffType,
486 class IndexType = int,
487 bool is_contiguous = false,
488 class BlkLayoutType = typename BlkType::array_layout>
489struct GEMV {
490 static KOKKOS_INLINE_FUNCTION void
491 run (const CoeffType& alpha,
492 const BlkType& A,
493 const VecType1& x,
494 const VecType2& y)
495 {
496 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
497 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
498 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
499
500 const IndexType numRows = static_cast<IndexType> (A.extent (0));
501 const IndexType numCols = static_cast<IndexType> (A.extent (1));
502
504 for (IndexType i = 0; i < numRows; ++i)
505 for (IndexType j = 0; j < numCols; ++j)
506 y(i) += alpha * A(i,j) * x(j);
507 }
508};
509
510template<class VecType1,
511 class BlkType,
512 class VecType2,
513 class CoeffType,
514 class IndexType>
515struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutLeft> {
516 static KOKKOS_INLINE_FUNCTION void
517 run (const CoeffType& alpha,
518 const BlkType& A,
519 const VecType1& x,
520 const VecType2& y)
521 {
522 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
523 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
524 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
525
526 using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
527 using x_value_type = typename std::decay<decltype (x(0)) >::type;
528 using y_value_type = typename std::decay<decltype (y(0)) >::type;
529
530 const IndexType numRows = static_cast<IndexType> (A.extent (0));
531 const IndexType numCols = static_cast<IndexType> (A.extent (1));
532
533 const A_value_type *__restrict__ A_ptr(A.data()); const IndexType as1(A.stride(1));
534 const x_value_type *__restrict__ x_ptr(x.data());
535 y_value_type *__restrict__ y_ptr(y.data());
536
537#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
538#pragma unroll
539#endif
540 for (IndexType j=0;j<numCols;++j) {
541 const x_value_type x_at_j = alpha*x_ptr[j];
542 const A_value_type *__restrict__ A_at_j = A_ptr + j*as1;
543#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
544#pragma unroll
545#endif
546 for (IndexType i=0;i<numRows;++i)
547 y_ptr[i] += A_at_j[i] * x_at_j;
548 }
549 }
550};
551
552template<class VecType1,
553 class BlkType,
554 class VecType2,
555 class CoeffType,
556 class IndexType>
557struct GEMV<VecType1,BlkType,VecType2,CoeffType,IndexType,true,Kokkos::LayoutRight> {
558 static KOKKOS_INLINE_FUNCTION void
559 run (const CoeffType& alpha,
560 const BlkType& A,
561 const VecType1& x,
562 const VecType2& y)
563 {
564 static_assert (VecType1::rank == 1, "GEMV: VecType1 must have rank 1.");
565 static_assert (BlkType::rank == 2, "GEMV: BlkType must have rank 2.");
566 static_assert (VecType2::rank == 1, "GEMV: VecType2 must have rank 1.");
567
568 using A_value_type = typename std::decay<decltype (A(0,0)) >::type;
569 using x_value_type = typename std::decay<decltype (x(0)) >::type;
570 using y_value_type = typename std::decay<decltype (y(0)) >::type;
571
572 const IndexType numRows = static_cast<IndexType> (A.extent (0));
573 const IndexType numCols = static_cast<IndexType> (A.extent (1));
574
575 const A_value_type *__restrict__ A_ptr(A.data()); const IndexType as0(A.stride(0));
576 const x_value_type *__restrict__ x_ptr(x.data());
577 y_value_type *__restrict__ y_ptr(y.data());
578
579#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
580#pragma unroll
581#endif
582 for (IndexType i=0;i<numRows;++i) {
583 y_value_type y_at_i(0);
584 const auto A_at_i = A_ptr + i*as0;
585#if defined(KOKKOS_ENABLE_PRAGMA_UNROLL)
586#pragma unroll
587#endif
588 for (IndexType j=0;j<numCols;++j)
589 y_at_i += A_at_i[j] * x_ptr[j];
590 y_ptr[i] += alpha*y_at_i;
591 }
592 }
593};
594
595} // namespace Impl
596
599template<class ViewType,
600 class CoefficientType,
601 class IndexType = int,
602 const int rank = ViewType::rank>
603KOKKOS_INLINE_FUNCTION void
604SCAL (const CoefficientType& alpha, const ViewType& x)
605{
606 using LayoutType = typename ViewType::array_layout;
607 constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
608 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
610}
611
613template<class ViewType,
614 class InputType,
615 class IndexType = int,
616 const int rank = ViewType::rank>
617KOKKOS_INLINE_FUNCTION void
618FILL (const ViewType& x, const InputType& val)
619{
620 using LayoutType = typename ViewType::array_layout;
621 constexpr bool is_contiguous = (std::is_same<LayoutType,Kokkos::LayoutLeft>::value ||
622 std::is_same<LayoutType,Kokkos::LayoutRight>::value);
624}
625
631template<class CoefficientType,
632 class ViewType1,
633 class ViewType2,
634 class IndexType = int,
635 const int rank = ViewType1::rank>
636KOKKOS_INLINE_FUNCTION void
637AXPY (const CoefficientType& alpha,
638 const ViewType1& x,
639 const ViewType2& y)
640{
641 static_assert (static_cast<int> (ViewType1::rank) ==
642 static_cast<int> (ViewType2::rank),
643 "AXPY: x and y must have the same rank.");
644 using LayoutType1 = typename ViewType1::array_layout;
645 using LayoutType2 = typename ViewType2::array_layout;
646 constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
647 constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
648 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
649 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
651}
652
661template<class ViewType1,
662 class ViewType2,
663 class IndexType = int,
664 const int rank = ViewType1::rank>
665KOKKOS_INLINE_FUNCTION void
666COPY (const ViewType1& x, const ViewType2& y)
667{
668 static_assert (static_cast<int> (ViewType1::rank) ==
669 static_cast<int> (ViewType2::rank),
670 "COPY: x and y must have the same rank.");
671 using LayoutType1 = typename ViewType1::array_layout;
672 using LayoutType2 = typename ViewType2::array_layout;
673 constexpr bool is_layout_same = std::is_same<LayoutType1,LayoutType2>::value;
674 constexpr bool is_x_contiguous = (std::is_same<LayoutType1,Kokkos::LayoutLeft>::value ||
675 std::is_same<LayoutType1,Kokkos::LayoutRight>::value);
676 constexpr bool is_contiguous = is_layout_same && is_x_contiguous;
678}
679
691template<class VecType1,
692 class BlkType,
693 class VecType2,
694 class CoeffType,
695 class IndexType = int>
696KOKKOS_INLINE_FUNCTION void
697GEMV (const CoeffType& alpha,
698 const BlkType& A,
699 const VecType1& x,
700 const VecType2& y)
701{
702 constexpr bool is_A_contiguous = (std::is_same<typename BlkType::array_layout, Kokkos::LayoutLeft>::value ||
703 std::is_same<typename BlkType::array_layout, Kokkos::LayoutRight>::value);
704 constexpr bool is_x_contiguous = (std::is_same<typename VecType1::array_layout, Kokkos::LayoutLeft>::value ||
705 std::is_same<typename VecType1::array_layout, Kokkos::LayoutRight>::value);
706 constexpr bool is_y_contiguous = (std::is_same<typename VecType2::array_layout, Kokkos::LayoutLeft>::value ||
707 std::is_same<typename VecType2::array_layout, Kokkos::LayoutRight>::value);
708 constexpr bool is_contiguous = is_A_contiguous && is_x_contiguous && is_y_contiguous;
709 Impl::GEMV<VecType1, BlkType, VecType2, CoeffType, IndexType, is_contiguous>::run (alpha, A, x, y);
710}
711
719template<class ViewType1,
720 class ViewType2,
721 class ViewType3,
722 class CoefficientType,
723 class IndexType = int>
724KOKKOS_INLINE_FUNCTION void
725GEMM (const char transA[],
726 const char transB[],
727 const CoefficientType& alpha,
728 const ViewType1& A,
729 const ViewType2& B,
730 const CoefficientType& beta,
731 const ViewType3& C)
732{
733 // Assert that A, B, and C are in fact matrices
734 static_assert (ViewType1::rank == 2, "GEMM: A must have rank 2 (be a matrix).");
735 static_assert (ViewType2::rank == 2, "GEMM: B must have rank 2 (be a matrix).");
736 static_assert (ViewType3::rank == 2, "GEMM: C must have rank 2 (be a matrix).");
737
738 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
739 typedef Kokkos::Details::ArithTraits<Scalar> STS;
740 const Scalar ZERO = STS::zero();
741 const Scalar ONE = STS::one();
742
743 // Get the dimensions
744 IndexType m, n, k;
745 if(transA[0] == 'N' || transA[0] == 'n') {
746 m = static_cast<IndexType> (A.extent (0));
747 n = static_cast<IndexType> (A.extent (1));
748 }
749 else {
750 m = static_cast<IndexType> (A.extent (1));
751 n = static_cast<IndexType> (A.extent (0));
752 }
753 k = static_cast<IndexType> (C.extent (1));
754
755 // quick return if possible
756 if(alpha == ZERO && beta == ONE)
757 return;
758
759 // And if alpha equals zero...
760 if(alpha == ZERO) {
761 if(beta == ZERO) {
762 for(IndexType i=0; i<m; i++) {
763 for(IndexType j=0; j<k; j++) {
764 C(i,j) = ZERO;
765 }
766 }
767 }
768 else {
769 for(IndexType i=0; i<m; i++) {
770 for(IndexType j=0; j<k; j++) {
771 C(i,j) = beta*C(i,j);
772 }
773 }
774 }
775 }
776
777 // Start the operations
778 if(transB[0] == 'n' || transB[0] == 'N') {
779 if(transA[0] == 'n' || transA[0] == 'N') {
780 // Form C = alpha*A*B + beta*C
781 for(IndexType j=0; j<n; j++) {
782 if(beta == ZERO) {
783 for(IndexType i=0; i<m; i++) {
784 C(i,j) = ZERO;
785 }
786 }
787 else if(beta != ONE) {
788 for(IndexType i=0; i<m; i++) {
789 C(i,j) = beta*C(i,j);
790 }
791 }
792 for(IndexType l=0; l<k; l++) {
793 Scalar temp = alpha*B(l,j);
794 for(IndexType i=0; i<m; i++) {
795 C(i,j) = C(i,j) + temp*A(i,l);
796 }
797 }
798 }
799 }
800 else {
801 // Form C = alpha*A**T*B + beta*C
802 for(IndexType j=0; j<n; j++) {
803 for(IndexType i=0; i<m; i++) {
804 Scalar temp = ZERO;
805 for(IndexType l=0; l<k; l++) {
806 temp = temp + A(l,i)*B(l,j);
807 }
808 if(beta == ZERO) {
809 C(i,j) = alpha*temp;
810 }
811 else {
812 C(i,j) = alpha*temp + beta*C(i,j);
813 }
814 }
815 }
816 }
817 }
818 else {
819 if(transA[0] == 'n' || transA[0] == 'N') {
820 // Form C = alpha*A*B**T + beta*C
821 for(IndexType j=0; j<n; j++) {
822 if(beta == ZERO) {
823 for(IndexType i=0; i<m; i++) {
824 C(i,j) = ZERO;
825 }
826 }
827 else if(beta != ONE) {
828 for(IndexType i=0; i<m; i++) {
829 C(i,j) = beta*C(i,j);
830 }
831 }
832 for(IndexType l=0; l<k; l++) {
833 Scalar temp = alpha*B(j,l);
834 for(IndexType i=0; i<m; i++) {
835 C(i,j) = C(i,j) + temp*A(i,l);
836 }
837 }
838 }
839 }
840 else {
841 // Form C = alpha*A**T*B**T + beta*C
842 for(IndexType j=0; j<n; j++) {
843 for(IndexType i=0; i<m; i++) {
844 Scalar temp = ZERO;
845 for(IndexType l=0; l<k; l++) {
846 temp = temp + A(l,i)*B(j,l);
847 }
848 if(beta == ZERO) {
849 C(i,j) = alpha*temp;
850 }
851 else {
852 C(i,j) = alpha*temp + beta*C(i,j);
853 }
854 }
855 }
856 }
857 }
858}
859
861template<class LittleBlockType,
862 class LittleVectorType>
863KOKKOS_INLINE_FUNCTION void
864GETF2 (const LittleBlockType& A, const LittleVectorType& ipiv, int& info)
865{
866 // The type of an entry of ipiv is the index type.
867 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
868 static_assert (std::is_integral<IndexType>::value,
869 "GETF2: The type of each entry of ipiv must be an integer type.");
870 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
871 static_assert (! std::is_const<Scalar>::value,
872 "GETF2: A must not be a const View (or LittleBlock).");
873 static_assert (! std::is_const<std::remove_reference<decltype (ipiv(0))>>::value,
874 "GETF2: ipiv must not be a const View (or LittleBlock).");
875 static_assert (LittleBlockType::rank == 2, "GETF2: A must have rank 2 (be a matrix).");
876 typedef Kokkos::Details::ArithTraits<Scalar> STS;
877 const Scalar ZERO = STS::zero();
878
879 const IndexType numRows = static_cast<IndexType> (A.extent (0));
880 const IndexType numCols = static_cast<IndexType> (A.extent (1));
881 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
882
883 // std::min is not a CUDA device function
884 const IndexType minPivDim = (numRows < numCols) ? numRows : numCols;
885 if (pivDim < minPivDim) {
886 info = -2;
887 return;
888 }
889
890 // Initialize info
891 info = 0;
892
893 for(IndexType j=0; j < pivDim; j++)
894 {
895 // Find pivot and test for singularity
896 IndexType jp = j;
897 for(IndexType i=j+1; i<numRows; i++)
898 {
899 if(STS::abs(A(i,j)) > STS::abs(A(jp,j))) {
900 jp = i;
901 }
902 }
903 ipiv(j) = jp+1;
904
905 if(A(jp,j) != ZERO)
906 {
907 // Apply the interchange to columns 1:N
908 if(jp != j)
909 {
910 for(IndexType i=0; i < numCols; i++)
911 {
912 Scalar temp = A(jp,i);
913 A(jp,i) = A(j,i);
914 A(j,i) = temp;
915 }
916 }
917
918 // Compute elements J+1:M of J-th column
919 for(IndexType i=j+1; i<numRows; i++) {
920 A(i,j) = A(i,j) / A(j,j);
921 }
922 }
923 else if(info == 0) {
924 info = j;
925 }
926
927 // Update trailing submatrix
928 for(IndexType r=j+1; r < numRows; r++)
929 {
930 for(IndexType c=j+1; c < numCols; c++) {
931 A(r,c) = A(r,c) - A(r,j) * A(j,c);
932 }
933 }
934 }
935}
936
937namespace Impl {
938
942template<class LittleBlockType,
943 class LittleIntVectorType,
944 class LittleScalarVectorType,
945 const int rank = LittleScalarVectorType::rank>
946struct GETRS {
947 static KOKKOS_INLINE_FUNCTION void
948 run (const char mode[],
949 const LittleBlockType& A,
950 const LittleIntVectorType& ipiv,
951 const LittleScalarVectorType& B,
952 int& info);
953};
954
956template<class LittleBlockType,
957 class LittleIntVectorType,
958 class LittleScalarVectorType>
959struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 1> {
960 static KOKKOS_INLINE_FUNCTION void
961 run (const char mode[],
962 const LittleBlockType& A,
963 const LittleIntVectorType& ipiv,
964 const LittleScalarVectorType& B,
965 int& info)
966 {
967 // The type of an entry of ipiv is the index type.
968 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
969 // IndexType must be signed, because this code does a countdown loop
970 // to zero. Unsigned integers are always >= 0, even on underflow.
971 static_assert (std::is_integral<IndexType>::value &&
972 std::is_signed<IndexType>::value,
973 "GETRS: The type of each entry of ipiv must be a signed integer.");
974 typedef typename std::decay<decltype (A(0,0))>::type Scalar;
975 static_assert (! std::is_const<std::remove_reference<decltype (B(0))>>::value,
976 "GETRS: B must not be a const View (or LittleBlock).");
977 static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
978 static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
979 static_assert (LittleScalarVectorType::rank == 1, "GETRS: For this specialization, B must have rank 1.");
980
981 typedef Kokkos::Details::ArithTraits<Scalar> STS;
982 const Scalar ZERO = STS::zero();
983 const IndexType numRows = static_cast<IndexType> (A.extent (0));
984 const IndexType numCols = static_cast<IndexType> (A.extent (1));
985 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
986
987 info = 0;
988
989 // Ensure that the matrix is square
990 if (numRows != numCols) {
991 info = -2;
992 return;
993 }
994
995 // Ensure that the pivot array is sufficiently large
996 if (pivDim < numRows) {
997 info = -3;
998 return;
999 }
1000
1001 // No transpose case
1002 if(mode[0] == 'n' || mode[0] == 'N') {
1003 // Apply row interchanges to the RHS
1004 for(IndexType i=0; i<numRows; i++) {
1005 if(ipiv(i) != i+1) {
1006 Scalar temp = B(i);
1007 B(i) = B(ipiv(i)-1);
1008 B(ipiv(i)-1) = temp;
1009 }
1010 }
1011
1012 // Solve Lx=b, overwriting b with x
1013 for(IndexType r=1; r < numRows; r++) {
1014 for(IndexType c=0; c < r; c++) {
1015 B(r) = B(r) - A(r,c)*B(c);
1016 }
1017 }
1018
1019 // Solve Ux=b, overwriting b with x
1020 for(IndexType r=numRows-1; r >= 0; r--) {
1021 // Check whether U is singular
1022 if(A(r,r) == ZERO) {
1023 info = r+1;
1024 return;
1025 }
1026
1027 for(IndexType c=r+1; c < numCols; c++) {
1028 B(r) = B(r) - A(r,c)*B(c);
1029 }
1030 B(r) = B(r) / A(r,r);
1031 }
1032 }
1033 // Transpose case
1034 else if(mode[0] == 't' || mode[0] == 'T') {
1035 info = -1; // NOT YET IMPLEMENTED
1036 return;
1037 }
1038 // Conjugate transpose case
1039 else if (mode[0] == 'c' || mode[0] == 'C') {
1040 info = -1; // NOT YET IMPLEMENTED
1041 return;
1042 }
1043 else { // invalid mode
1044 info = -1;
1045 return;
1046 }
1047 }
1048};
1049
1050
1052template<class LittleBlockType,
1053 class LittleIntVectorType,
1054 class LittleScalarVectorType>
1055struct GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType, 2> {
1056 static KOKKOS_INLINE_FUNCTION void
1057 run (const char mode[],
1058 const LittleBlockType& A,
1059 const LittleIntVectorType& ipiv,
1060 const LittleScalarVectorType& B,
1061 int& info)
1062 {
1063 // The type of an entry of ipiv is the index type.
1064 typedef typename std::decay<decltype (ipiv(0)) >::type IndexType;
1065 static_assert (std::is_integral<IndexType>::value,
1066 "GETRS: The type of each entry of ipiv must be an integer type.");
1067 static_assert (! std::is_const<std::remove_reference<decltype (B(0)) > >::value,
1068 "GETRS: B must not be a const View (or LittleBlock).");
1069 static_assert (LittleBlockType::rank == 2, "GETRS: A must have rank 2 (be a matrix).");
1070 static_assert (LittleIntVectorType::rank == 1, "GETRS: ipiv must have rank 1.");
1071 static_assert (LittleScalarVectorType::rank == 2, "GETRS: For this specialization, B must have rank 2.");
1072
1073 // The current implementation iterates over one right-hand side at
1074 // a time. It might be faster to do this differently, but this
1075 // should work for now.
1076 const IndexType numRhs = B.extent (1);
1077 info = 0;
1078
1079 for (IndexType rhs = 0; rhs < numRhs; ++rhs) {
1080 auto B_cur = Kokkos::subview (B, Kokkos::ALL (), rhs);
1081 GETRS<LittleBlockType, LittleIntVectorType, decltype (B_cur), 1>::run (mode, A, ipiv, B_cur, info);
1082 if (info != 0) {
1083 return;
1084 }
1085 }
1086 }
1087};
1088
1089} // namespace Impl
1090
1094template<class LittleBlockType,
1095 class LittleIntVectorType,
1096 class LittleScalarVectorType>
1097KOKKOS_INLINE_FUNCTION void
1098GETRS (const char mode[],
1099 const LittleBlockType& A,
1100 const LittleIntVectorType& ipiv,
1101 const LittleScalarVectorType& B,
1102 int& info)
1103{
1104 Impl::GETRS<LittleBlockType, LittleIntVectorType, LittleScalarVectorType,
1105 LittleScalarVectorType::rank>::run (mode, A, ipiv, B, info);
1106}
1107
1108
1123template<class LittleBlockType,
1124 class LittleIntVectorType,
1125 class LittleScalarVectorType>
1126KOKKOS_INLINE_FUNCTION void
1127GETRI (const LittleBlockType& A,
1128 const LittleIntVectorType& ipiv,
1129 const LittleScalarVectorType& work,
1130 int& info)
1131{
1132 // The type of an entry of ipiv is the index type.
1133 typedef typename std::decay<decltype (ipiv(0))>::type IndexType;
1134 // IndexType must be signed, because this code does a countdown loop
1135 // to zero. Unsigned integers are always >= 0, even on underflow.
1136 static_assert (std::is_integral<IndexType>::value &&
1137 std::is_signed<IndexType>::value,
1138 "GETRI: The type of each entry of ipiv must be a signed integer.");
1139 typedef typename std::remove_reference<decltype (A(0,0))>::type Scalar;
1140 static_assert (! std::is_const<std::remove_reference<decltype (A(0,0))>>::value,
1141 "GETRI: A must not be a const View (or LittleBlock).");
1142 static_assert (! std::is_const<std::remove_reference<decltype (work(0))>>::value,
1143 "GETRI: work must not be a const View (or LittleBlock).");
1144 static_assert (LittleBlockType::rank == 2, "GETRI: A must have rank 2 (be a matrix).");
1145 typedef Kokkos::Details::ArithTraits<Scalar> STS;
1146 const Scalar ZERO = STS::zero();
1147 const Scalar ONE = STS::one();
1148
1149 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1150 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1151 const IndexType pivDim = static_cast<IndexType> (ipiv.extent (0));
1152 const IndexType workDim = static_cast<IndexType> (work.extent (0));
1153
1154 info = 0;
1155
1156 // Ensure that the matrix is square
1157 if (numRows != numCols) {
1158 info = -1;
1159 return;
1160 }
1161
1162 // Ensure that the pivot array is sufficiently large
1163 if (pivDim < numRows) {
1164 info = -2;
1165 return;
1166 }
1167
1168 // Ensure that the work array is sufficiently large
1169 if (workDim < numRows) {
1170 info = -3;
1171 return;
1172 }
1173
1174 // Form Uinv in place
1175 for(IndexType j=0; j < numRows; j++) {
1176 if(A(j,j) == ZERO) {
1177 info = j+1;
1178 return;
1179 }
1180
1181 A(j,j) = ONE / A(j,j);
1182
1183 // Compute elements 1:j-1 of j-th column
1184 for(IndexType r=0; r < j; r++) {
1185 A(r,j) = A(r,r)*A(r,j);
1186 for(IndexType c=r+1; c < j; c++) {
1187 A(r,j) = A(r,j) + A(r,c)*A(c,j);
1188 }
1189 }
1190 for(IndexType r=0; r < j; r++) {
1191 A(r,j) = -A(j,j)*A(r,j);
1192 }
1193 }
1194
1195 // Compute Ainv by solving A\L = Uinv
1196 for(IndexType j = numCols-2; j >= 0; j--) {
1197 // Copy lower triangular data to work array and replace with 0
1198 for(IndexType r=j+1; r < numRows; r++) {
1199 work(r) = A(r,j);
1200 A(r,j) = 0;
1201 }
1202
1203 for(IndexType r=0; r < numRows; r++) {
1204 for(IndexType i=j+1; i < numRows; i++) {
1205 A(r,j) = A(r,j) - work(i)*A(r,i);
1206 }
1207 }
1208 }
1209
1210 // Apply column interchanges
1211 for(IndexType j=numRows-1; j >= 0; j--) {
1212 IndexType jp = ipiv(j)-1;
1213 if(j != jp) {
1214 for(IndexType r=0; r < numRows; r++) {
1215 Scalar temp = A(r,j);
1216 A(r,j) = A(r,jp);
1217 A(r,jp) = temp;
1218 }
1219 }
1220 }
1221}
1222
1223
1224// mfh 08 Nov 2015: I haven't tested this overload yet. It also needs
1225// an implementation for trans != 'N' (the transpose and conjugate
1226// transpose cases).
1227#if 0
1228template<class LittleBlockType,
1229 class LittleVectorTypeX,
1230 class LittleVectorTypeY,
1231 class CoefficientType,
1232 class IndexType = int>
1233KOKKOS_INLINE_FUNCTION void
1234GEMV (const char trans,
1235 const CoefficientType& alpha,
1236 const LittleBlockType& A,
1237 const LittleVectorTypeX& x,
1238 const CoefficientType& beta,
1239 const LittleVectorTypeY& y)
1240{
1241 // y(0) returns a reference to the 0-th entry of y. Remove that
1242 // reference to get the type of each entry of y. It's OK if y has
1243 // zero entries -- this doesn't actually do y(i), it just returns
1244 // the type of that expression.
1245 typedef typename std::remove_reference<decltype (y(0)) >::type y_value_type;
1246 const IndexType numRows = static_cast<IndexType> (A.extent (0));
1247 const IndexType numCols = static_cast<IndexType> (A.extent (1));
1248
1249 if (beta == 0.0) {
1250 if (alpha == 0.0) {
1251 for (IndexType i = 0; i < numRows; ++i) {
1252 y(i) = 0.0;
1253 }
1254 }
1255 else {
1256 for (IndexType i = 0; i < numRows; ++i) {
1257 y_value_type y_i = 0.0;
1258 for (IndexType j = 0; j < numCols; ++j) {
1259 y_i += A(i,j) * x(j);
1260 }
1261 y(i) = y_i;
1262 }
1263 }
1264 }
1265 else { // beta != 0
1266 if (alpha == 0.0) {
1267 if (beta == 0.0) {
1268 for (IndexType i = 0; i < numRows; ++i) {
1269 y(i) = 0.0;
1270 }
1271 }
1272 else {
1273 for (IndexType i = 0; i < numRows; ++i) {
1274 y(i) *= beta;
1275 }
1276 }
1277 }
1278 else {
1279 for (IndexType i = 0; i < numRows; ++i) {
1280 y_value_type y_i = beta * y(i);
1281 for (IndexType j = 0; j < numCols; ++j) {
1282 y_i += alpha * A(i,j) * x(j);
1283 }
1284 y(i) = y_i;
1285 }
1286 }
1287 }
1288}
1289
1290#endif // 0
1291
1292} // namespace Tpetra
1293
1294#endif // TPETRA_BLOCKVIEW_HPP
KOKKOS_INLINE_FUNCTION void absMax(const ViewType2 &Y, const ViewType1 &X)
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void GETF2(const LittleBlockType &A, const LittleVectorType &ipiv, int &info)
Computes A = P*L*U.
KOKKOS_INLINE_FUNCTION void GETRI(const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &work, int &info)
Compute inverse of A, using result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
KOKKOS_INLINE_FUNCTION void GETRS(const char mode[], const LittleBlockType &A, const LittleIntVectorType &ipiv, const LittleScalarVectorType &B, int &info)
Solve the linear system(s) AX=B, using the result of GETRF or GETF2.
KOKKOS_INLINE_FUNCTION void GEMM(const char transA[], const char transB[], const CoefficientType &alpha, const ViewType1 &A, const ViewType2 &B, const CoefficientType &beta, const ViewType3 &C)
Small dense matrix-matrix multiply: C := alpha*A*B + beta*C
@ ZERO
Replace old values with zero.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha*x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType1 &X, const ViewType2 &Y)
Y := Y + alpha*X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::AXPY function.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i) := max(abs((*this)(i)), abs(X(i))) for all i.
static KOKKOS_INLINE_FUNCTION void run(const ViewType2 &Y, const ViewType1 &X)
(*this)(i,j) := max(abs((*this)(i,j)), abs(X(i,j))) for all (i,j).
Implementation of Tpetra's ABSMAX CombineMode for the small dense blocks in BlockCrsMatrix,...
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &x, const ViewType2 &y)
y := x (rank-1 x and y, i.e., vectors)
static KOKKOS_INLINE_FUNCTION void run(const ViewType1 &X, const ViewType2 &Y)
Y := X (rank-2 X and Y, i.e., matrices)
Implementation of Tpetra::COPY function.
Implementation of Tpetra::FILL function.
Computes the solution to Ax=b.
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &x)
x := alpha*x (rank-1 x, i.e., a vector)
static KOKKOS_INLINE_FUNCTION void run(const CoefficientType &alpha, const ViewType &A)
A := alpha*A (rank-2 A, i.e., a matrix)
Implementation of Tpetra::SCAL function.