42#ifndef STOKHOS_MULTIPLY_HPP
43#define STOKHOS_MULTIPLY_HPP
49#include "Kokkos_Core.hpp"
60template <size_t N, bool OK = is_power_of_two<N>::value>
90template <
typename MatrixType,
91 typename InputVectorType,
92 typename OutputVectorType,
93 typename ColumnIndicesType = void,
98template <
typename MatrixType,
99 typename InputVectorType,
100 typename OutputVectorType>
102 const InputVectorType& x,
103 OutputVectorType& y) {
105 multiply_type::apply( A, x, y );
112template<
class... Ts>
struct make_void {
typedef void type; };
114using replace_me_with_void_t_in_cxx17 =
115 typename make_void<Ts...>::type;
117template<
class T,
class = replace_me_with_
void_t_in_cxx17<> >
118struct const_type_impl {
123struct const_type_impl<T,
124 replace_me_with_void_t_in_cxx17<typename T::const_type> > {
125 using type =
typename T::const_type;
129using const_type_t =
typename const_type_impl<T>::type;
133template <
typename MatrixType,
134 typename InputVectorType,
135 typename OutputVectorType>
137 const InputVectorType& x,
143 using input_vector_type = const_type_t<InputVectorType>;
144 using multiply_type =
146 multiply_type::apply( A, x, y );
149template <
typename MatrixType,
150 typename InputVectorType,
151 typename OutputVectorType,
152 typename ColumnIndicesType>
154 const InputVectorType& x,
156 const ColumnIndicesType& col) {
158 multiply_type::apply( A, x, y, col );
161template <
typename MatrixType,
162 typename InputVectorType,
163 typename OutputVectorType,
164 typename ColumnIndicesType>
166 const InputVectorType& x,
168 const ColumnIndicesType& col,
171 multiply_type::apply( A, x, y, col );
182template <
typename scalar_type,
typename execution_space,
typename size_type>
183KOKKOS_INLINE_FUNCTION
184Kokkos::pair<size_type, size_type>
186 const size_type work_count,
187 const size_type thread_count,
188 const size_type thread_rank)
190#if defined( KOKKOS_ENABLE_CUDA )
192 std::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
194 enum { cache_line = 64 };
197 enum { work_align = cache_line /
sizeof(scalar_type) };
199 enum { work_mask = work_align - 1 };
201 const size_type work_per_thread =
202 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
203 thread_count ) << work_shift ;
205 size_type work_begin = thread_rank * work_per_thread;
206 size_type work_end = work_begin + work_per_thread;
207 if (work_begin > work_count)
208 work_begin = work_count;
209 if (work_end > work_count)
210 work_end = work_count;
212 return Kokkos::make_pair(work_begin, work_end);
217 template <
typename Scalar>
218 KOKKOS_INLINE_FUNCTION
224 template <
typename Scalar>
225 KOKKOS_INLINE_FUNCTION
230template <
typename Value>
234 template <
typename Scalar>
235 KOKKOS_INLINE_FUNCTION
240template <
typename Value>
244 template <
typename Scalar>
245 KOKKOS_INLINE_FUNCTION
250template <
typename Value>
255 template <
typename Scalar>
256 KOKKOS_INLINE_FUNCTION
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
Top-level namespace for Stokhos classes and functions.
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
IntegralRank< T::Rank > type
IntegralRank< T::Rank > type
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledAssign(const Value &a_)
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledUpdate(const Value &a_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const