17#include <cusp/array1d.h>
19#include <cusp/detail/format_utils.h>
31template <
typename IndexType,
35 const IndexType xnum_row,
39 const ValueType * Aval,
43 const IndexType
thread_id = blockDim.x * blockIdx.x + threadIdx.x;
44 const IndexType grid_size = gridDim.x * blockDim.x;
47 for(IndexType row =
thread_id; row < Anum_rows; row += grid_size)
49 const IndexType row_start = Ar[row];
50 const IndexType row_end = Ar[row+1];
51 const IndexType r = row_end - row_start;
56 for (IndexType jj = row_start; jj < row_end; jj++)
67template <
typename IndexType,
75 const ValueType * Aval,
79 const IndexType
thread_id = blockDim.x * blockIdx.x + threadIdx.x;
80 const IndexType grid_size = gridDim.x * blockDim.x;
81 for(IndexType row =
thread_id; row < Anum_rows; row += grid_size){
82 const IndexType row_start = Ar[row];
83 const IndexType row_end = Ar[row+1];
88 for (IndexType jj = row_start; jj < row_end; jj++)
90 y[
j*Anum_rows+row]=sum;
106template <
typename Matrix1,
111 Vector3&
y, cusp::row_major)
113 CUSP_PROFILE_SCOPED();
114 typedef typename Vector3::index_type IndexType;
115 typedef typename Vector3::value_type ValueType;
116 typedef typename Vector3::memory_space MemorySpace;
117 const size_t BLOCK_SIZE = 256;
118 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(row_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
119 const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
123 row_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE >>>
124 (A.num_rows,
x.num_rows,
x.num_cols,
125 thrust::raw_pointer_cast(&A.row_offsets[0]),
126 thrust::raw_pointer_cast(&A.column_indices[0]),
127 thrust::raw_pointer_cast(&A.values[0]),
128 thrust::raw_pointer_cast(&(
x.values)[0]),
129 thrust::raw_pointer_cast(&(
y.values)[0]));
133template <
typename Matrix1,
138 Vector3&
y, cusp::column_major)
140 CUSP_PROFILE_SCOPED();
141 typedef typename Vector3::index_type IndexType;
142 typedef typename Vector3::value_type ValueType;
143 typedef typename Vector3::memory_space MemorySpace;
144 const size_t BLOCK_SIZE = 256;
145 const size_t MAX_BLOCKS = cusp::detail::device::arch::max_active_blocks(column_spmm_csr_scalar_kernel<IndexType, ValueType>, BLOCK_SIZE, (
size_t) 0);
146 const size_t NUM_BLOCKS = std::min(MAX_BLOCKS, DIVIDE_INTO(A.num_rows, BLOCK_SIZE));
147 column_spmm_csr_scalar_kernel<IndexType,ValueType> <<<NUM_BLOCKS, BLOCK_SIZE>>>
148 (A.num_rows,
x.num_rows,
x.num_cols,
149 thrust::raw_pointer_cast(&A.row_offsets[0]),
150 thrust::raw_pointer_cast(&A.column_indices[0]),
151 thrust::raw_pointer_cast(&A.values[0]),
152 thrust::raw_pointer_cast(&(
x.values)[0]),
153 thrust::raw_pointer_cast(&(
y.values)[0]));
158template <
typename Matrix1,
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
void spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y)
__global__ void row_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_row, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)
void __spmm_csr_scalar(const Matrix1 &A, const Vector2 &x, Vector3 &y, cusp::row_major)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
const IndexType xnum_rows
const IndexType const IndexType xnum_cols
const IndexType thread_id
__global__ void column_spmm_csr_scalar_kernel(const IndexType Anum_rows, const IndexType xnum_rows, const IndexType xnum_cols, const IndexType *Ar, const IndexType *Ac, const ValueType *Aval, const ValueType *x, ValueType *y)