Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
TestSpMv.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41#include <iostream>
42
43// MP::Vector and Matrix
45#include "KokkosSparse_CrsMatrix.hpp"
46#include "KokkosSparse_spmv.hpp"
48
49// Compile-time loops
50#include "Sacado_mpl_range_c.hpp"
51#include "Sacado_mpl_for_each.hpp"
52#include "Sacado_mpl_integral_c.hpp"
53
54// Utilities
55#include "Kokkos_Timer.hpp"
56
57template< typename IntType >
58inline
59IntType map_fem_graph_coord( const IntType & N ,
60 const IntType & i ,
61 const IntType & j ,
62 const IntType & k )
63{
64 return k + N * ( j + N * i );
65}
66
67inline
68size_t generate_fem_graph( size_t N ,
69 std::vector< std::vector<size_t> > & graph )
70{
71 graph.resize( N * N * N , std::vector<size_t>() );
72
73 size_t total = 0 ;
74
75 for ( int i = 0 ; i < (int) N ; ++i ) {
76 for ( int j = 0 ; j < (int) N ; ++j ) {
77 for ( int k = 0 ; k < (int) N ; ++k ) {
78
79 const size_t row = map_fem_graph_coord((int)N,i,j,k);
80
81 graph[row].reserve(27);
82
83 for ( int ii = -1 ; ii < 2 ; ++ii ) {
84 for ( int jj = -1 ; jj < 2 ; ++jj ) {
85 for ( int kk = -1 ; kk < 2 ; ++kk ) {
86 if ( 0 <= i + ii && i + ii < (int) N &&
87 0 <= j + jj && j + jj < (int) N &&
88 0 <= k + kk && k + kk < (int) N ) {
89 size_t col = map_fem_graph_coord((int)N,i+ii,j+jj,k+kk);
90
91 graph[row].push_back(col);
92 }
93 }}}
94 total += graph[row].size();
95 }}}
96
97 return total ;
98}
99
100template <typename StorageType, typename MultiplyTag>
101std::vector<double>
102test_mpvector_spmv(const int ensemble_length,
103 const int nGrid,
104 const int iterCount,
105 KokkosSparse::DeviceConfig dev_config,
106 MultiplyTag tag)
107{
108 typedef StorageType storage_type;
109 typedef typename storage_type::value_type value_type;
110 typedef typename storage_type::ordinal_type ordinal_type;
111 typedef typename storage_type::execution_space execution_space;
112 typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
113 typedef Sacado::MP::Vector<StorageType> VectorType;
114 typedef Kokkos::LayoutRight Layout;
115 typedef Kokkos::View< VectorType*, Layout, execution_space > vector_type;
116 typedef KokkosSparse::CrsMatrix< VectorType, ordinal_type, device_type > matrix_type;
117 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
118 typedef typename matrix_type::values_type matrix_values_type;
119
120 //------------------------------
121 // Generate graph for "FEM" box structure:
122
123 std::vector< std::vector<size_t> > fem_graph;
124 const size_t fem_length = nGrid * nGrid * nGrid;
125 const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
126
127 //------------------------------
128 // Generate input multivector:
129
130 vector_type x =
131 vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length, ensemble_length);
132 vector_type y =
133 vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length, ensemble_length);
134
135 //------------------------------
136
137 matrix_graph_type matrix_graph =
138 Kokkos::create_staticcrsgraph<matrix_graph_type>(
139 std::string("test crs graph"), fem_graph);
140 matrix_values_type matrix_values =
141 matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length, ensemble_length);
142 matrix_type matrix("block_matrix", fem_length, matrix_values, matrix_graph);
143 matrix.dev_config = dev_config;
144
145 //------------------------------
146 // Fill:
147
148 {
149 // The VectorType may be dynamic (with allocated memory)
150 // so cannot pass a VectorType value to the device.
151 // Get an array-of-intrinsic View and fill that view.
152 typename vector_type::array_type xx( x );
153 typename vector_type::array_type yy( y );
154 typename matrix_values_type::array_type mm( matrix_values );
155
156 Kokkos::deep_copy( xx , value_type(1.0) );
157 Kokkos::deep_copy( yy , value_type(1.0) );
158 Kokkos::deep_copy( mm , value_type(1.0) );
159 }
160
161 //------------------------------
162
163 // One iteration to warm up
164 Stokhos::multiply( matrix, x, y, tag );
165
166 execution_space().fence();
167 Kokkos::Timer clock ;
168 for (int iter = 0; iter < iterCount; ++iter) {
169 Stokhos::multiply( matrix, x, y, tag );
170 }
171 execution_space().fence();
172
173 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
174 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
175
176 std::vector<double> perf(5);
177 perf[0] = fem_length;
178 perf[1] = ensemble_length;
179 perf[2] = graph_length;
180 perf[3] = seconds_per_iter;
181 perf[4] = flops / seconds_per_iter;
182 return perf;
183}
184
185template <typename ScalarType, typename OrdinalType, typename Device>
186std::vector<double>
187test_scalar_spmv(const int ensemble_length,
188 const int nGrid,
189 const int iterCount,
190 KokkosSparse::DeviceConfig dev_config)
191{
192 typedef ScalarType value_type;
193 typedef OrdinalType ordinal_type;
194 typedef Device execution_space;
195 typedef Kokkos::Device<execution_space, typename execution_space::memory_space> device_type;
196 typedef Kokkos::View< value_type*, execution_space > vector_type;
197 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, device_type > matrix_type;
198 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
199 typedef typename matrix_type::values_type matrix_values_type;
200
201 //------------------------------
202 // Generate graph for "FEM" box structure:
203
204 std::vector< std::vector<size_t> > fem_graph;
205 const size_t fem_length = nGrid * nGrid * nGrid;
206 const size_t graph_length = generate_fem_graph( nGrid , fem_graph );
207
208 //------------------------------
209 // Generate input multivector:
210
211 std::vector<vector_type> x(ensemble_length);
212 std::vector<vector_type> y(ensemble_length);
213 for (int e=0; e<ensemble_length; ++e) {
214 x[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("x"), fem_length);
215 y[e] = vector_type(Kokkos::ViewAllocateWithoutInitializing("y"), fem_length);
216
217 Kokkos::deep_copy( x[e] , value_type(1.0) );
218 Kokkos::deep_copy( y[e] , value_type(0.0) );
219 }
220
221 //------------------------------
222
223 std::vector<matrix_type> matrix(ensemble_length);
224 for (int e=0; e<ensemble_length; ++e) {
225 matrix_graph_type matrix_graph =
226 Kokkos::create_staticcrsgraph<matrix_graph_type>(
227 std::string("test crs graph"), fem_graph);
228 matrix_values_type matrix_values =
229 matrix_values_type(Kokkos::ViewAllocateWithoutInitializing("matrix"), graph_length);
230 matrix[e] = matrix_type("matrix", fem_length, matrix_values, matrix_graph);
231
232 Kokkos::deep_copy( matrix[e].values , value_type(1.0) );
233 }
234
235 //------------------------------
236
237 // One iteration to warm up
238 for (int iter = 0; iter < iterCount; ++iter) {
239 for (int e=0; e<ensemble_length; ++e) {
240 KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
241 }
242 }
243
244 execution_space().fence();
245 Kokkos::Timer clock ;
246 for (int iter = 0; iter < iterCount; ++iter) {
247 for (int e=0; e<ensemble_length; ++e) {
248 KokkosSparse::spmv( "N" , value_type(1.0), matrix[e], x[e] , value_type(0.0), y[e]);
249 }
250 }
251 execution_space().fence();
252
253 const double seconds_per_iter = clock.seconds() / ((double) iterCount );
254 const double flops = 1.0e-9 * 2.0 * graph_length * ensemble_length;
255
256 std::vector<double> perf(5);
257 perf[0] = fem_length;
258 perf[1] = ensemble_length;
259 perf[2] = graph_length;
260 perf[3] = seconds_per_iter;
261 perf[4] = flops / seconds_per_iter;
262 return perf;
263}
264
265template <class Storage>
266struct PerformanceDriverOp {
267 typedef typename Storage::value_type Scalar;
268 typedef typename Storage::ordinal_type Ordinal;
269 typedef typename Storage::execution_space Device;
270 const int nGrid, nIter;
271 KokkosSparse::DeviceConfig dev_config;
272
273 PerformanceDriverOp(const int nGrid_, const int nIter_,
274 KokkosSparse::DeviceConfig dev_config_) :
275 nGrid(nGrid_), nIter(nIter_), dev_config(dev_config_) {}
276
277 template <typename ArgT>
278 void operator() (ArgT arg) const {
279 const int ensemble = ArgT::value;
280 typedef typename Storage::template apply_N<ensemble> NewStorageApply;
281 typedef typename NewStorageApply::type storage_type;
282
283 const std::vector<double> perf_scalar =
284 test_scalar_spmv<Scalar,Ordinal,Device>(
285 ensemble, nGrid, nIter, dev_config );
286
287 const std::vector<double> perf_mpvector =
288 test_mpvector_spmv<storage_type>(
290
291 std::cout << nGrid << " , "
292 << perf_scalar[0] << " , "
293 << perf_scalar[2] << " , "
294 << perf_scalar[1] << " , "
295 << perf_scalar[3] << " , "
296 << perf_scalar[4] / perf_scalar[4] << " , "
297 << perf_scalar[4] << " , "
298 << perf_mpvector[4]/ perf_scalar[4] << " , "
299 << perf_mpvector[4] << " , "
300 << std::endl;
301 }
302};
303
304template <class Storage, int entry_min, int entry_max, int entry_step>
305void performance_test_driver( const int nGrid,
306 const int nIter,
307 KokkosSparse::DeviceConfig dev_config)
308{
309 std::cout.precision(8);
310 std::cout << std::endl
311 << "\"Grid Size\" , "
312 << "\"FEM Size\" , "
313 << "\"FEM Graph Size\" , "
314 << "\"Ensemble Size\" , "
315 << "\"Scalar SpMv Time\" , "
316 << "\"Scalar SpMv Speedup\" , "
317 << "\"Scalar SpMv GFLOPS\" , "
318 << "\"MPVector SpMv Speedup\" , "
319 << "\"MPVector SpMv GFLOPS\" , "
320 << std::endl;
321
322 // Loop over [entry_min, entry_max] vector entries per thread
323 typedef Sacado::mpl::range_c< int, entry_min, entry_max+1, entry_step > Range;
324 PerformanceDriverOp<Storage> op(nGrid, nIter, dev_config);
325 Sacado::mpl::for_each_no_kokkos<Range> f(op);
326}
Stokhos::StandardStorage< int, double > storage_type
Kokkos::DefaultExecutionSpace execution_space
void performance_test_driver(const int nGrid, const int nIter, KokkosSparse::DeviceConfig dev_config)
Definition: TestSpMv.hpp:305
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
Definition: TestSpMv.hpp:68
std::vector< double > test_scalar_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config)
Definition: TestSpMv.hpp:187
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
Definition: TestSpMv.hpp:59
std::vector< double > test_mpvector_spmv(const int ensemble_length, const int nGrid, const int iterCount, KokkosSparse::DeviceConfig dev_config, MultiplyTag tag)
Definition: TestSpMv.hpp:102
ScalarType f(const Teuchos::Array< ScalarType > &x, double a, double b)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
Kokkos::Example::FENL::DeviceConfig dev_config
Storage::value_type Scalar
Definition: TestSpMv.hpp:267
Storage::execution_space Device
Definition: TestSpMv.hpp:269
KokkosSparse::DeviceConfig dev_config
Definition: TestSpMv.hpp:271
Storage::ordinal_type Ordinal
Definition: TestSpMv.hpp:268
PerformanceDriverOp(const int nGrid_, const int nIter_, KokkosSparse::DeviceConfig dev_config_)
Definition: TestSpMv.hpp:273