44#ifndef TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
45#define TPETRA_KOKKOS_REFACTOR_DETAILS_MULTI_VECTOR_DIST_OBJECT_KERNELS_UQ_PCE_HPP
51#include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
56namespace KokkosRefactor {
62 template <
typename DstView,
typename SrcView,
typename IdxView>
63 struct PackArraySingleColumn<
64 DstView, SrcView, IdxView,
65 typename
std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
66 Kokkos::is_view_uq_pce<SrcView>::value >::type >
69 typedef typename execution_space::size_type
size_type;
71 static const unsigned BlockSize = 32;
82 dst(dst_), src(src_), idx(idx_), col(col_) {}
84 KOKKOS_INLINE_FUNCTION
86 dst(k) = src(idx(k), col);
89 KOKKOS_INLINE_FUNCTION
92 dst(k).fastAccessCoeff(i) = src(idx(k), col).fastAccessCoeff(i);
95 static void pack(
const DstView& dst,
101 PackArraySingleColumn(dst,src,idx,col) );
105 template <
typename DstView,
typename SrcView,
typename IdxView>
106 struct PackArrayMultiColumn<
107 DstView, SrcView, IdxView,
108 typename
std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
109 Kokkos::is_view_uq_pce<SrcView>::value >::type >
114 static const unsigned BlockSize = 32;
125 dst(dst_), src(src_), idx(idx_), numCols(numCols_) {}
127 KOKKOS_INLINE_FUNCTION
129 const typename IdxView::value_type localRow = idx(k);
130 const size_t offset = k*numCols;
131 for (
size_t j = 0;
j < numCols; ++
j)
132 dst(offset +
j) = src(localRow,
j);
135 KOKKOS_INLINE_FUNCTION
137 const typename IdxView::value_type localRow = idx(k);
138 const size_t offset = k*numCols;
139 for (
size_t j = 0;
j < numCols; ++
j)
141 dst(offset +
j).fastAccessCoeff(i) =
142 src(localRow,
j).fastAccessCoeff(i);
145 static void pack(
const DstView& dst,
149 Kokkos::parallel_for(
151 PackArrayMultiColumn(dst,src,idx,numCols) );
155 template <
typename DstView,
typename SrcView,
typename IdxView,
157 struct PackArrayMultiColumnVariableStride<
158 DstView, SrcView, IdxView, ColView,
159 typename
std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
160 Kokkos::is_view_uq_pce<SrcView>::value >::type >
165 static const unsigned BlockSize = 32;
178 dst(dst_), src(src_), idx(idx_), col(col_), numCols(numCols_) {}
180 KOKKOS_INLINE_FUNCTION
182 const typename IdxView::value_type localRow = idx(k);
183 const size_t offset = k*numCols;
184 for (
size_t j = 0;
j < numCols; ++
j)
185 dst(offset +
j) = src(localRow, col(
j));
188 KOKKOS_INLINE_FUNCTION
190 const typename IdxView::value_type localRow = idx(k);
191 const size_t offset = k*numCols;
192 for (
size_t j = 0;
j < numCols; ++
j)
194 dst(offset +
j).fastAccessCoeff(i) =
195 src(localRow, col(
j)).fastAccessCoeff(i);
198 static void pack(
const DstView& dst,
203 Kokkos::parallel_for(
205 PackArrayMultiColumnVariableStride(dst,src,idx,col,numCols) );
209 template <
typename ExecutionSpace,
214 struct UnpackArrayMultiColumn<
215 ExecutionSpace, DstView, SrcView, IdxView, Op,
216 typename
std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
217 Kokkos::is_view_uq_pce<SrcView>::value >::type >
222 static const unsigned BlockSize = 32;
235 const size_t numCols_) :
243 template <
class TagType>
244 KOKKOS_INLINE_FUNCTION
void
247 const typename IdxView::value_type localRow = idx(k);
248 const size_t offset = k*numCols;
249 for (
size_t j = 0;
j < numCols; ++
j) {
250 op (tag, dst(localRow,
j), src(offset+
j));
254 template <
class TagType>
255 KOKKOS_INLINE_FUNCTION
void
258 const typename IdxView::value_type localRow = idx(k);
259 const size_t offset = k*numCols;
260 for (
size_t j = 0;
j < numCols; ++
j) {
274 const size_t numCols,
275 const bool use_atomic_updates)
277 if (use_atomic_updates) {
279 (
"Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
281 idx.size (), BlockSize),
282 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
286 (
"Tpetra::MultiVector (Sacado::UQ::PCE) unpack (constant stride)",
288 idx.size (), BlockSize),
289 UnpackArrayMultiColumn (execSpace, dst, src, idx, op, numCols));
294 template <
typename ExecutionSpace,
300 struct UnpackArrayMultiColumnVariableStride<
301 ExecutionSpace, DstView, SrcView, IdxView, ColView, Op,
302 typename
std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
303 Kokkos::is_view_uq_pce<SrcView>::value >::type>
308 static const unsigned BlockSize = 32;
324 dst(dst_), src(src_), idx(idx_), col(col_), op(op_), numCols(numCols_) {}
326 template <
class TagType>
327 KOKKOS_INLINE_FUNCTION
void
329 const typename IdxView::value_type localRow = idx(k);
330 const size_t offset = k*numCols;
331 for (
size_t j = 0;
j < numCols; ++
j)
332 op( tag, dst(localRow,col(
j)), src(offset+
j) );
335 template <
class TagType>
336 KOKKOS_INLINE_FUNCTION
void
338 const typename IdxView::value_type localRow = idx(k);
339 const size_t offset = k*numCols;
340 for (
size_t j = 0;
j < numCols; ++
j)
342 op( tag, dst(localRow,col(
j)).fastAccessCoeff(i),
343 src(offset+
j).fastAccessCoeff(i) );
353 const size_t numCols,
354 const bool use_atomic_updates)
356 if (use_atomic_updates) {
358 (
"Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
360 idx.size (), BlockSize),
361 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
362 idx, col, op, numCols));
366 (
"Tpetra::MultiVector unpack (Sacado::UQ::PCE) (nonconstant stride)",
368 idx.size (), BlockSize),
369 UnpackArrayMultiColumnVariableStride (execSpace, dst, src,
370 idx, col, op, numCols));
375 template <
typename DstView,
typename SrcView,
376 typename DstIdxView,
typename SrcIdxView,
typename Op>
377 struct PermuteArrayMultiColumn<
378 DstView, SrcView, DstIdxView, SrcIdxView, Op,
379 typename
std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
380 Kokkos::is_view_uq_pce<SrcView>::value >::type>
385 static const unsigned BlockSize = 32;
396 const DstIdxView& dst_idx_,
397 const SrcIdxView& src_idx_,
398 size_t numCols_,
const Op& op_) :
399 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
400 numCols(numCols_), op(op_) {}
402 KOKKOS_INLINE_FUNCTION
404 const typename DstIdxView::value_type toRow = dst_idx(k);
405 const typename SrcIdxView::value_type fromRow = src_idx(k);
407 for (
size_t j = 0;
j < numCols; ++
j)
408 op(tag, dst(toRow,
j),src(fromRow,
j));
411 KOKKOS_INLINE_FUNCTION
413 const typename DstIdxView::value_type toRow = dst_idx(k);
414 const typename SrcIdxView::value_type fromRow = src_idx(k);
416 for (
size_t j = 0;
j < numCols; ++
j)
418 op(tag, dst(toRow,
j).fastAccessCoeff(i),
419 src(fromRow,
j).fastAccessCoeff(i));
424 const DstIdxView& dst_idx,
425 const SrcIdxView& src_idx,
428 const size_type n = std::min( dst_idx.size(), src_idx.size() );
429 Kokkos::parallel_for(
431 PermuteArrayMultiColumn(dst,src,dst_idx,src_idx,numCols,op) );
435 template <
typename DstView,
typename SrcView,
436 typename DstIdxView,
typename SrcIdxView,
437 typename DstColView,
typename SrcColView,
typename Op>
438 struct PermuteArrayMultiColumnVariableStride<
439 DstView, SrcView, DstIdxView, SrcIdxView, DstColView, SrcColView, Op,
440 typename
std::enable_if< Kokkos::is_view_uq_pce<DstView>::value &&
441 Kokkos::is_view_uq_pce<SrcView>::value >::type >
446 static const unsigned BlockSize = 32;
459 const DstIdxView& dst_idx_,
460 const SrcIdxView& src_idx_,
461 const DstColView& dst_col_,
462 const SrcColView& src_col_,
465 dst(dst_), src(src_), dst_idx(dst_idx_), src_idx(src_idx_),
466 dst_col(dst_col_), src_col(src_col_),
467 numCols(numCols_), op(op_) {}
469 KOKKOS_INLINE_FUNCTION
471 const typename DstIdxView::value_type toRow = dst_idx(k);
472 const typename SrcIdxView::value_type fromRow = src_idx(k);
474 for (
size_t j = 0;
j < numCols; ++
j)
475 op(tag, dst(toRow, dst_col(
j)),src(fromRow, src_col(
j)));
478 KOKKOS_INLINE_FUNCTION
480 const typename DstIdxView::value_type toRow = dst_idx(k);
481 const typename SrcIdxView::value_type fromRow = src_idx(k);
483 for (
size_t j = 0;
j < numCols; ++
j)
485 op(tag, dst(toRow, dst_col(
j)).fastAccessCoeff(i),
486 src(fromRow, src_col(
j)).fastAccessCoeff(i));
491 const DstIdxView& dst_idx,
492 const SrcIdxView& src_idx,
493 const DstColView& dst_col,
494 const SrcColView& src_col,
497 const size_type n = std::min( dst_idx.size(), src_idx.size() );
498 Kokkos::parallel_for(
500 PermuteArrayMultiColumnVariableStride(
501 dst,src,dst_idx,src_idx,dst_col,src_col,numCols,op) );
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c fastAccessCoeff(j) - expr2.val(j)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< View< T, P... > >::value, unsigned >::type dimension_scalar(const View< T, P... > &view)
Team-based parallel work configuration for Sacado::MP::Vector.
PackArrayMultiColumnVariableStride(const DstView &dst_, const SrcView &src_, const IdxView &idx_, const ColView &col_, size_t numCols_)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
DstView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
execution_space::size_type size_type
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, const ColView &col, size_t numCols)
PackArrayMultiColumn(const DstView &dst_, const SrcView &src_, const IdxView &idx_, size_t numCols_)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, size_t numCols)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
DstView::execution_space execution_space
execution_space::size_type size_type
execution_space::size_type size_type
DstView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
static void pack(const DstView &dst, const SrcView &src, const IdxView &idx, size_t col)
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
PackArraySingleColumn(const DstView &dst_, const SrcView &src_, const IdxView &idx_, size_t col_)
execution_space::size_type size_type
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
DstView::execution_space execution_space
static void permute(const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, const DstColView &dst_col, const SrcColView &src_col, size_t numCols, const Op &op)
PermuteArrayMultiColumnVariableStride(const DstView &dst_, const SrcView &src_, const DstIdxView &dst_idx_, const SrcIdxView &src_idx_, const DstColView &dst_col_, const SrcColView &src_col_, size_t numCols_, const Op &op_)
execution_space::size_type size_type
PermuteArrayMultiColumn(const DstView &dst_, const SrcView &src_, const DstIdxView &dst_idx_, const SrcIdxView &src_idx_, size_t numCols_, const Op &op_)
DstView::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(const size_type k) const
KOKKOS_INLINE_FUNCTION void operator()(const size_type k, const size_type tidx) const
static void permute(const DstView &dst, const SrcView &src, const DstIdxView &dst_idx, const SrcIdxView &src_idx, size_t numCols, const Op &op)
KOKKOS_INLINE_FUNCTION void operator()(TagType tag, const size_type k) const
ExecutionSpace::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(TagType tag, const size_type k, const size_type tidx) const
execution_space::size_type size_type
static void unpack(const ExecutionSpace &execSpace, const DstView &dst, const SrcView &src, const IdxView &idx, const ColView &col, const Op &op, const size_t numCols, const bool use_atomic_updates)
UnpackArrayMultiColumnVariableStride(const ExecutionSpace &, const DstView &dst_, const SrcView &src_, const IdxView &idx_, const ColView &col_, const Op &op_, size_t numCols_)
ExecutionSpace::execution_space execution_space
execution_space::size_type size_type
static void unpack(const ExecutionSpace &execSpace, const DstView &dst, const SrcView &src, const IdxView &idx, const Op &op, const size_t numCols, const bool use_atomic_updates)
UnpackArrayMultiColumn(const ExecutionSpace &, const DstView &dst_, const SrcView &src_, const IdxView &idx_, const Op &op_, const size_t numCols_)