Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Stokhos_Cuda_SymmetricDiagonalSpec.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef KOKKOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP
43#define KOKKOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP
44
45#include <stdexcept>
46
47#include "Kokkos_Core.hpp"
48
49#include "Stokhos_Multiply.hpp"
52
53//----------------------------------------------------------------------------
54
55namespace Stokhos {
56
57template<>
59{
60public:
61 typedef Kokkos::Cuda execution_space ;
62 typedef execution_space::size_type size_type ;
64
65 __host__
66 static dim3 thread_block( const block_type & block )
67 {
68 const int d = block.dimension();
69 const int warp_size = Kokkos::Impl::CudaTraits::WarpSize;
70 const int y = ( Kokkos::Impl::cuda_internal_maximum_warp_count() * warp_size ) / d ;
71
72 if ( 0 == y ) {
73 throw std::runtime_error( std::string("Stokhos::Multiply< SymmetricDiagonalSpec<Cuda> > ERROR: block too large") );
74 }
75
76 // dimension X #diagonals to concurrently process
77 return dim3( d , std::min( y , ( 1 + d ) / 2 ) , 1 );
78 }
79
80 template< typename VectorValue >
81 __host__
82 static size_type shmem_size( const block_type & block )
83 {
84 const dim3 d = thread_block( block );
85
86 return sizeof(VectorValue) * d.x * d.y ;
87 }
88
89 __host__
90 static size_type matrix_size( const block_type & block )
91 { return block.matrix_size(); }
92
93 // Required: blockDim.x == block.dimension()
94 // Required: blockDim.y > 1
95 // Computing: Y[ threadIdx.x ]
96 template< typename MatrixValue , typename VectorValue >
97 __device__
98 static VectorValue apply( const block_type & block ,
99 const MatrixValue * const a ,
100 const VectorValue * const x )
101 {
102 const int dimension = block.dimension();
103 const int dim_half = ( dimension + 1 ) >> 1 ;
104
105 VectorValue * const shX = kokkos_impl_cuda_shared_memory<VectorValue>();
106
107 int ia = -1 ;
108 int ix = -1 ;
109
110 VectorValue y = 0 ;
111
112 if ( 0 == threadIdx.y ) {
113 // Load vector and multiply the main diagonal (first diagonal)
114
115 shX[ threadIdx.x ] = x[ threadIdx.x ]; // Load 'x'
116
117 y = shX[ threadIdx.x ] * a[ threadIdx.x ];
118 }
119
120 __syncthreads(); // Wait for load of 'x'
121
122 if ( 0 == threadIdx.y && ! ( dimension & 01 ) ) {
123
124 // If even number of diagonals then the last diagonal is half-length
125 // and only used once.
126
127 ix = threadIdx.x ;
128 ia = threadIdx.x + dim_half * dimension ;
129
130 if ( threadIdx.x < dim_half ) {
131 ix += dim_half ;
132 }
133 else {
134 ix -= dim_half ;
135 ia -= dim_half ;
136 }
137 y += shX[ ix ] * a[ ia ];
138 }
139
140 //------------------------------------
141
142 const int A_stride = blockDim.y * dimension ;
143
144 int d = 1 + threadIdx.y ;
145
146 const MatrixValue * A = a + d * dimension ;
147
148 for ( ; d < dim_half ; d += blockDim.y , A += A_stride ) {
149
150 ix = threadIdx.x + d ; if ( dimension <= ix ) ix -= dimension ;
151 ia = threadIdx.x - d ; if ( ia < 0 ) ia += dimension ;
152
153 // A 'threadIdx.y' group accesses the matrix diagonal
154 // A[ 0 .. dimension - 1 ] twice in the following statement.
155 // Spatial and temporal locality are excellent for L1 cache reuse
156 // for the second access.
157
158 y += shX[ ix ] * A[ threadIdx.x ] +
159 shX[ ia ] * A[ ia ];
160 }
161
162 if ( 0 < threadIdx.y ) {
163 shX[ threadIdx.x + threadIdx.y * dimension ] = y ;
164 }
165
166 __syncthreads(); // Wait for load of contributions to 'y'
167
168 for ( ix = 1 ; ix < blockDim.y ; ++ix ) {
169 y += shX[ threadIdx.x + ix * dimension ];
170 }
171
172 return y ;
173 }
174};
175
176} /* namespace Stokhos */
177
178//----------------------------------------------------------------------------
179
180#endif /* #ifndef STOKHOS_CUDA_SYMMETRIC_DIAGONAL_SPEC_HPP */
static __device__ VectorValue apply(const block_type &block, const MatrixValue *const a, const VectorValue *const x)
Symmetric diagonal storage for a dense matrix.
KOKKOS_INLINE_FUNCTION unsigned dimension() const
Dimension of vector block.
KOKKOS_INLINE_FUNCTION unsigned matrix_size() const
Storage size for block coefficients.
Top-level namespace for Stokhos classes and functions.