Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
advection_const_basis/advection.cpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29
30#include "Sacado.hpp"
31#include "advection.hpp"
32#include "common.hpp"
33
34#include "Kokkos_Timer.hpp"
35
36template<typename FluxView, typename WgbView, typename SrcView,
37 typename WbsView, typename ResidualView>
38void run_fad_flat(const FluxView& flux, const WgbView& wgb,
39 const SrcView& src, const WbsView& wbs,
40 const ResidualView& residual)
41{
42 typedef typename ResidualView::execution_space execution_space;
43 typedef typename ResidualView::non_const_value_type scalar_type;
44
45 const size_t num_cells = wgb.extent(0);
46 const int num_basis = wgb.extent(1);
47 const int num_points = wgb.extent(2);
48 const int num_dim = wgb.extent(3);
49
50 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
51 KOKKOS_LAMBDA (const size_t cell)
52 {
53 scalar_type value, value2;
54 for (int basis=0; basis<num_basis; ++basis) {
55 value = 0.0;
56 value2 = 0.0;
57 for (int qp=0; qp<num_points; ++qp) {
58 for (int dim=0; dim<num_dim; ++dim)
59 value += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
60 value2 += src(cell,qp)*wbs(cell,basis,qp);
61 }
62 residual(cell,basis) = value+value2;
63 }
64 });
65}
66
67template<typename FluxView, typename WgbView, typename SrcView,
68 typename WbsView, typename ResidualView>
69void run_fad_scratch(const FluxView& flux, const WgbView& wgb,
70 const SrcView& src, const WbsView& wbs,
71 const ResidualView& residual)
72{
73 typedef typename ResidualView::execution_space execution_space;
74 typedef typename ResidualView::non_const_value_type scalar_type;
75 typedef Kokkos::TeamPolicy<execution_space> policy_type;
76 typedef typename policy_type::member_type team_member;
77 typedef Kokkos::View<scalar_type*, typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
78
79 const size_t num_cells = wgb.extent(0);
80 const int num_basis = wgb.extent(1);
81 const int num_points = wgb.extent(2);
82 const int num_dim = wgb.extent(3);
83
84 const int vector_size = 1;
85 const int team_size = is_cuda_space<execution_space>::value ? 32 : 1;
86 const int fad_size = Kokkos::dimension_scalar(residual);
87 const size_t range = (num_cells+team_size-1)/team_size;
88 const size_t bytes = 2*tmp_scratch_type::shmem_size(team_size,fad_size);
89 policy_type policy(range,team_size,vector_size);
90
91 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerTeam(bytes)),
92 KOKKOS_LAMBDA (const team_member& team)
93 {
94 const int team_rank = team.team_rank();
95 tmp_scratch_type value(team.team_scratch(0), team_size, fad_size);
96 tmp_scratch_type value2(team.team_scratch(0), team_size, fad_size);
97 const size_t cell = team.league_rank()*team_size + team_rank;
98 if (cell < num_cells) {
99 for (int basis=0; basis<num_basis; ++basis) {
100 value(team_rank) = 0.0;
101 value2(team_rank) = 0.0;
102 for (int qp=0; qp<num_points; ++qp) {
103 for (int dim=0; dim<num_dim; ++dim)
104 value(team_rank) += flux(cell,qp,dim)*wgb(cell,basis,qp,dim);
105 value2(team_rank) += src(cell,qp)*wbs(cell,basis,qp);
106 }
107 residual(cell,basis) = value(team_rank)+value2(team_rank);
108 }
109 }
110 });
111}
112
113template<int N, typename FluxView, typename WgbView, typename SrcView,
114 typename WbsView, typename ResidualView>
115void run_analytic_flat(const FluxView& flux, const WgbView& wgb,
116 const SrcView& src, const WbsView& wbs,
117 const ResidualView& residual)
118{
119 typedef typename ResidualView::execution_space execution_space;
120 typedef typename ResidualView::non_const_value_type scalar_type;
121
122 const size_t num_cells = wgb.extent(0);
123 const int num_basis = wgb.extent(1);
124 const int num_points = wgb.extent(2);
125 const int num_dim = wgb.extent(3);
126
127 Kokkos::parallel_for(Kokkos::RangePolicy<execution_space>( 0,num_cells ),
128 KOKKOS_LAMBDA (const size_t cell)
129 {
130 scalar_type value[N+1],value2[N+1];
131 for (int basis=0; basis<num_basis; ++basis) {
132 for (int k=0; k<N+1; ++k) {
133 value[k] = 0.0;
134 value2[k] = 0.0;
135 }
136 for (int qp=0; qp<num_points; ++qp) {
137 for (int dim=0; dim<num_dim; ++dim) {
138 const scalar_type flux_val = flux(cell,qp,dim,N);
139 const scalar_type wgb_val = wgb(cell,basis,qp,dim);
140 value[N] += flux_val*wgb_val;
141 for(int k=0; k<N; k++)
142 value[k] += flux(cell,qp,dim,k)*wgb_val;
143 }
144 const scalar_type src_val = src(cell,qp,N);
145 const scalar_type wbs_val = wbs(cell,basis,qp);
146 value2[N] += src_val*wbs_val;
147 for(int k=0; k<N; k++)
148 value2[k] += src(cell,qp,k)*wbs_val;
149 }
150 for(int k=0; k<=N; k++)
151 residual(cell,basis,k) = value[k]+value2[k];
152 }
153 });
154}
155
156template<int N, typename FluxView, typename WgbView, typename SrcView,
157 typename WbsView, typename ResidualView>
158void run_analytic_team(const FluxView& flux, const WgbView& wgb,
159 const SrcView& src, const WbsView& wbs,
160 const ResidualView& residual)
161{
162 typedef typename ResidualView::execution_space execution_space;
163 typedef typename ResidualView::non_const_value_type scalar_type;
164 typedef Kokkos::TeamPolicy<execution_space> policy_type;
165 typedef typename policy_type::member_type team_member;
166 typedef Kokkos::View<scalar_type[N+1], typename execution_space::scratch_memory_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> > tmp_scratch_type;
167
168 const size_t num_cells = wgb.extent(0);
169 const int num_basis = wgb.extent(1);
170 /*const*/ int num_points = wgb.extent(2);
171 /*const*/ int num_dim = wgb.extent(3);
172
173 const size_t bytes = 2*tmp_scratch_type::shmem_size();
174 policy_type policy(num_cells,num_basis,32);
175 Kokkos::parallel_for(policy.set_scratch_size(0,Kokkos::PerThread(bytes)),
176 KOKKOS_LAMBDA (const team_member& team)
177 {
178 tmp_scratch_type value(team.thread_scratch(0));
179 tmp_scratch_type value2(team.thread_scratch(0));
180 const size_t cell = team.league_rank();
181 Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,num_basis),
182 [&] (const int& basis)
183 {
184 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
185 [&] (const int& k)
186 {
187 value(k) = 0;
188 value2(k) = 0;
189 });
190 for (int qp=0; qp<num_points; ++qp) {
191 for (int dim=0; dim<num_dim; ++dim) {
192 const scalar_type flux_val = flux(cell,qp,dim,N);
193 const scalar_type wgb_val = wgb(cell,basis,qp,dim);
194 Kokkos::single(Kokkos::PerThread(team), [&] () {
195 value[N] += flux_val*wgb_val;
196 });
197 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
198 [&] (const int& k)
199 {
200 value[k] += flux(cell,qp,dim,k)*wgb_val;
201 });
202 }
203 const scalar_type src_val = src(cell,qp,N);
204 const scalar_type wbs_val = wbs(cell,basis,qp);
205 Kokkos::single(Kokkos::PerThread(team), [&] () {
206 value2[N] += src_val*wbs_val;
207 });
208 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N),
209 [&] (const int& k)
210 {
211 value2[k] += src(cell,qp,k)*wbs_val;
212 });
213 }
214 Kokkos::parallel_for(Kokkos::ThreadVectorRange(team,N+1),
215 [&] (const int& k)
216 {
217 residual(cell,basis,k) = value[k]+value2[k];
218 });
219 });
220 });
221}
222
223template <typename FadType, int N, typename ExecSpace>
224double time_fad_flat(int ncells, int num_basis, int num_points, int ndim,
225 int ntrial, bool check)
226{
227 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
228 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
229 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
230 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
231
232 t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
233 t_3DView_d wbs("",ncells,num_basis,num_points);
234 t_3DView flux("",ncells,num_points,ndim,N+1);
235 t_2DView src("",ncells,num_points,N+1);
236 t_2DView residual("",ncells,num_basis,N+1);
237 init_fad(wgb, wbs, flux, src, residual);
238
239 // Run once to warm up, complete any UVM transfers
240 run_fad_flat(flux, wgb, src, wbs, residual);
241
242 // Time execution
243 Kokkos::fence();
244 Kokkos::Timer timer;
245 for (int i=0; i<ntrial; ++i)
246 run_fad_flat(flux, wgb, src, wbs, residual);
247 Kokkos::fence();
248 double time = timer.seconds() / ntrial / ncells;
249
250 // Check result
251 if (check)
252 check_residual(flux, wgb, src, wbs, residual);
253
254 return time;
255}
256
257template <typename FadType, int N, typename ExecSpace>
258double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim,
259 int ntrial, bool check)
260{
261 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
262 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
263 typedef Kokkos::View<FadType***,ExecSpace> t_3DView;
264 typedef Kokkos::View<FadType**,ExecSpace> t_2DView;
265
266 t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
267 t_3DView_d wbs("",ncells,num_basis,num_points);
268 t_3DView flux("",ncells,num_points,ndim,N+1);
269 t_2DView src("",ncells,num_points,N+1);
270 t_2DView residual("",ncells,num_basis,N+1);
271 init_fad(wgb, wbs, flux, src, residual);
272
273 // Run once to warm up, complete any UVM transfers
274 run_fad_scratch(flux, wgb, src, wbs, residual);
275
276 // Time execution
277 Kokkos::fence();
278 Kokkos::Timer timer;
279 for (int i=0; i<ntrial; ++i)
280 run_fad_scratch(flux, wgb, src, wbs, residual);
281 Kokkos::fence();
282 double time = timer.seconds() / ntrial / ncells;
283
284 // Check result
285 if (check)
286 check_residual(flux, wgb, src, wbs, residual);
287
288 return time;
289}
290
291template <int N, typename ExecSpace>
292double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim,
293 int ntrial, bool check)
294{
295 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
296 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
297 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
298 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
299
300 t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
301 t_3DView_d wbs("",ncells,num_basis,num_points);
302 t_3DView flux("",ncells,num_points,ndim);
303 t_2DView src("",ncells,num_points);
304 t_2DView residual("",ncells,num_basis);
305 init_array(wgb, wbs, flux, src, residual);
306
307 // Run once to warm up, complete any UVM transfers
308 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
309
310 // Time execution
311 Kokkos::fence();
312 Kokkos::Timer timer;
313 for (int i=0; i<ntrial; ++i)
314 run_analytic_flat<N>(flux, wgb, src, wbs, residual);
315 Kokkos::fence();
316 double time = timer.seconds() / ntrial / ncells;
317
318 // Check result
319 if (check)
320 check_residual(flux, wgb, src, wbs, residual);
321
322 return time;
323}
324
325template <int N, typename ExecSpace>
326double time_analytic_const(int ncells, int num_basis, int num_points, int ndim,
327 int ntrial, bool check)
328{
329 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
330 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
331 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
332 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
333
334 t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
335 t_3DView_d wbs("",ncells,num_basis,num_points);
336 t_3DView flux("",ncells,num_points,ndim);
337 t_2DView src("",ncells,num_points);
338 t_2DView residual("",ncells,num_basis);
339 init_array(wgb, wbs, flux, src, residual);
340
341 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
342 t_3DView_const flux_const = flux;
343
344 // Run once to warm up, complete any UVM transfers
345 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
346
347 // Time execution
348 Kokkos::fence();
349 Kokkos::Timer timer;
350 for (int i=0; i<ntrial; ++i)
351 run_analytic_flat<N>(flux_const, wgb, src, wbs, residual);
352 Kokkos::fence();
353 double time = timer.seconds() / ntrial / ncells;
354
355 // Check result
356 if (check)
357 check_residual(flux, wgb, src, wbs, residual);
358
359 return time;
360}
361
362template <int N, typename ExecSpace>
363double time_analytic_team(int ncells, int num_basis, int num_points, int ndim,
364 int ntrial, bool check)
365{
366 typedef Kokkos::View<double****,ExecSpace> t_4DView_d;
367 typedef Kokkos::View<double***,ExecSpace> t_3DView_d;
368 typedef Kokkos::View<double***[N+1],ExecSpace> t_3DView;
369 typedef Kokkos::View<double**[N+1],ExecSpace> t_2DView;
370
371 t_4DView_d wgb("",ncells,num_basis,num_points,ndim);
372 t_3DView_d wbs("",ncells,num_basis,num_points);
373 t_3DView flux("",ncells,num_points,ndim);
374 t_2DView src("",ncells,num_points);
375 t_2DView residual("",ncells,num_basis);
376 init_array(wgb, wbs, flux, src, residual);
377
378 typedef Kokkos::View<const double***[N+1],ExecSpace,Kokkos::MemoryTraits<Kokkos::RandomAccess> > t_3DView_const;
379 t_3DView_const flux_const = flux;
380
381 // Run once to warm up, complete any UVM transfers
382 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
383
384 // Time execution
385 Kokkos::fence();
386 Kokkos::Timer timer;
387 for (int i=0; i<ntrial; ++i)
388 run_analytic_team<N>(flux_const, wgb, src, wbs, residual);
389 Kokkos::fence();
390 double time = timer.seconds() / ntrial / ncells;
391
392 // Check result
393 if (check)
394 check_residual(flux, wgb, src, wbs, residual);
395
396 return time;
397}
398
399#define INST_FUNC_FAD_N_DEV(FAD,N,DEV) \
400 template double time_fad_flat< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
401 template double time_fad_scratch< FAD, N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
402
403#define INST_FUNC_N_DEV(N,DEV) \
404 INST_FUNC_FAD_N_DEV(SFadType,N,DEV) \
405 INST_FUNC_FAD_N_DEV(SLFadType,N,DEV) \
406 INST_FUNC_FAD_N_DEV(DFadType,N,DEV) \
407 template double time_analytic_flat< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
408 template double time_analytic_const< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check); \
409 template double time_analytic_team< N, DEV >(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check);
410
411#define INST_FUNC_DEV(DEV) \
412 INST_FUNC_N_DEV( fad_dim, DEV )
413
414#ifdef KOKKOS_ENABLE_SERIAL
415INST_FUNC_DEV(Kokkos::Serial)
416#endif
417
418#ifdef KOKKOS_ENABLE_OPENMP
419INST_FUNC_DEV(Kokkos::OpenMP)
420#endif
421
422#ifdef KOKKOS_ENABLE_THREADS
423INST_FUNC_DEV(Kokkos::Threads)
424#endif
425
426#ifdef KOKKOS_ENABLE_CUDA
427INST_FUNC_DEV(Kokkos::Cuda)
428#endif
const int N
void init_fad(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void check_residual(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
void init_array(const V1 &v1, const V2 &v2, const V3 &v3, const V4 &v4, const V5 &v5)
void run_fad_scratch(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
#define INST_FUNC_DEV(DEV)
double time_analytic_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_fad_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_analytic_team(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_flat(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
double time_fad_flat(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_analytic_const(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
double time_fad_scratch(int ncells, int num_basis, int num_points, int ndim, int ntrial, bool check)
void run_analytic_team(const FluxView &flux, const WgbView &wgb, const SrcView &src, const WbsView &wbs, const ResidualView &residual)
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
int value