Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Fad_KokkosAtomicTests.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29#include "Teuchos_TestingHelpers.hpp"
30
31#include "Sacado.hpp"
32
33template <typename T>
34struct is_dfad {
35 static const bool value = false;
36};
37
38template <typename T>
39struct is_dfad< Sacado::Fad::Exp::DFad<T> > {
40 static const bool value = true;
41};
42
43template <typename FadType1, typename FadType2>
44bool checkFads(const FadType1& x, const FadType2& x2,
45 Teuchos::FancyOStream& out, double tol = 1.0e-15)
46{
47 bool success = true;
48
49 // Check sizes match
50 TEUCHOS_TEST_EQUALITY(x.size(), x2.size(), out, success);
51
52 // Check values match
53 TEUCHOS_TEST_FLOATING_EQUALITY(x.val(), x2.val(), tol, out, success);
54
55 // Check derivatives match
56 for (int i=0; i<x.size(); ++i)
57 TEUCHOS_TEST_FLOATING_EQUALITY(x.dx(i), x2.dx(i), tol, out, success);
58
59 return success;
60}
61
62template <typename fadtype, typename ordinal>
63inline
64fadtype generate_fad( const ordinal num_rows,
65 const ordinal num_cols,
66 const ordinal fad_size,
67 const ordinal row,
68 const ordinal col )
69{
70 typedef typename fadtype::value_type scalar;
71 fadtype x(fad_size, scalar(0.0));
72
73 const scalar x_row = 100.0 + scalar(num_rows) / scalar(row+1);
74 const scalar x_col = 10.0 + scalar(num_cols) / scalar(col+1);
75 x.val() = x_row + x_col;
76 for (ordinal i=0; i<fad_size; ++i) {
77 const scalar x_fad = 1.0 + scalar(fad_size) / scalar(i+1);
78 x.fastAccessDx(i) = x_row + x_col + x_fad;
79 }
80 return x;
81}
82
83#ifndef GLOBAL_FAD_SIZE
84#define GLOBAL_FAD_SIZE 5
85#endif
86const int global_num_rows = 11;
87const int global_num_cols = 7;
89
90struct AddTag {
91 static double init() { return 0.0; }
92 template <typename T1, typename T2>
93 static auto apply(const T1& a, const T2& b) -> decltype(a+b)
94 {
95 return a+b;
96 }
97};
98struct SubTag {
99 static double init() { return 0.0; }
100 template <typename T1, typename T2>
101 static auto apply(const T1& a, const T2& b) -> decltype(a-b)
102 {
103 return a-b;
104 }
105};
106struct MulTag {
107 static double init() { return 1.0; }
108 template <typename T1, typename T2>
109 static auto apply(const T1& a, const T2& b) -> decltype(a*b)
110 {
111 return a*b;
112 }
113};
114struct DivTag {
115 static double init() { return 1.0; }
116 template <typename T1, typename T2>
117 static auto apply(const T1& a, const T2& b) -> decltype(a/b)
118 {
119 return a/b;
120 }
121};
122struct MaxTag {
123 static double init() { return 1.0; }
124 template <typename T1, typename T2>
125 static auto apply(const T1& a, const T2& b) -> decltype(max(a,b))
126 {
127 return max(a,b);
128 }
129};
130struct MinTag {
131 static double init() { return 1.0; }
132 template <typename T1, typename T2>
133 static auto apply(const T1& a, const T2& b) -> decltype(min(a,b))
134 {
135 return min(a,b);
136 }
137};
138
139// Kernel to test atomic_add
140template <typename ViewType, typename ScalarViewType, bool OperFetch>
142 typedef typename ViewType::execution_space execution_space;
143 typedef typename ViewType::size_type size_type;
144 typedef typename Kokkos::TeamPolicy< execution_space>::member_type team_handle;
145 typedef typename Kokkos::ThreadLocalScalarType<ViewType>::type local_scalar_type;
146 static const size_type stride = Kokkos::ViewScalarStride<ViewType>::stride;
147
148 const ViewType m_v;
149 const ScalarViewType m_s;
150
151 AtomicKernel(const ViewType& v, const ScalarViewType& s) :
152 m_v(v), m_s(s) {};
153
154 KOKKOS_INLINE_FUNCTION
155 void operator() (AddTag tag, const size_type i) const {
157 if (OperFetch)
158 Kokkos::atomic_add_fetch(&(m_s()), x);
159 else
160 Kokkos::atomic_fetch_add(&(m_s()), x);
161 }
162
163 KOKKOS_INLINE_FUNCTION
164 void operator() (SubTag tag, const size_type i) const {
166 if (OperFetch)
167 Kokkos::atomic_sub_fetch(&(m_s()), x);
168 else
169 Kokkos::atomic_fetch_sub(&(m_s()), x);
170 }
171
172 KOKKOS_INLINE_FUNCTION
173 void operator() (MulTag tag, const size_type i) const {
175 if (OperFetch)
176 Kokkos::atomic_mul_fetch(&(m_s()), x);
177 else
178 Kokkos::atomic_fetch_mul(&(m_s()), x);
179 }
180
181 KOKKOS_INLINE_FUNCTION
182 void operator() (DivTag tag, const size_type i) const {
184 if (OperFetch)
185 Kokkos::atomic_div_fetch(&(m_s()), x);
186 else
187 Kokkos::atomic_fetch_div(&(m_s()), x);
188 }
189
190 KOKKOS_INLINE_FUNCTION
191 void operator() (MaxTag tag, const size_type i) const {
193 if (OperFetch)
194 Kokkos::atomic_max_fetch(&(m_s()), x);
195 else
196 Kokkos::atomic_fetch_max(&(m_s()), x);
197 }
198
199 KOKKOS_INLINE_FUNCTION
200 void operator() (MinTag tag, const size_type i) const {
202 if (OperFetch)
203 Kokkos::atomic_min_fetch(&(m_s()), x);
204 else
205 Kokkos::atomic_fetch_min(&(m_s()), x);
206 }
207
208 template <typename Tag>
209 KOKKOS_INLINE_FUNCTION
210 void operator()( Tag tag, const team_handle& team ) const
211 {
212 const size_type i = team.league_rank()*team.team_size() + team.team_rank();
213 if (i < m_v.extent(0))
214 (*this)(tag, i);
215 }
216
217 // Kernel launch
218 template <typename Tag>
219 static void apply(Tag tag, const ViewType& v, const ScalarViewType& s) {
220 const size_type nrow = v.extent(0);
221
222#if defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL)
223 const bool use_team =
224 std::is_same<execution_space, Kokkos::Cuda>::value &&
225 Kokkos::is_view_fad_contiguous<ViewType>::value &&
226 ( stride > 1 );
227#elif defined (KOKKOS_ENABLE_CUDA) && defined (SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
228 const bool use_team =
229 std::is_same<execution_space, Kokkos::Cuda>::value &&
230 Kokkos::is_view_fad_contiguous<ViewType>::value &&
232#elif defined (KOKKOS_ENABLE_HIP) && defined (SACADO_VIEW_CUDA_HIERARCHICAL)
233 const bool use_team =
234 std::is_same<execution_space, Kokkos::Experimental::HIP>::value &&
235 Kokkos::is_view_fad_contiguous<ViewType>::value &&
236 ( stride > 1 );
237#elif defined (KOKKOS_ENABLE_HIP) && defined (SACADO_VIEW_CUDA_HIERARCHICAL_DFAD)
238 const bool use_team =
239 std::is_same<execution_space, Kokkos::Experimental::HIP>::value &&
240 Kokkos::is_view_fad_contiguous<ViewType>::value &&
242#else
243 const bool use_team = false;
244#endif
245
246 if (use_team) {
247 const size_type team_size = 256 / stride;
248 Kokkos::TeamPolicy<execution_space, Tag> policy(
249 (nrow+team_size-1)/team_size, team_size, stride );
250 Kokkos::parallel_for( policy, AtomicKernel(v,s) );
251 }
252 else {
253 Kokkos::RangePolicy<execution_space, Tag> policy( 0, nrow );
254 Kokkos::parallel_for( policy, AtomicKernel(v,s) );
255 }
256 }
257};
258
259template <typename FadType, typename Layout, typename Device, bool OperFetch,
260 typename TagType>
261bool testAtomic(const TagType& tag, Teuchos::FancyOStream& out)
262{
263 typedef Kokkos::View<FadType*,Layout,Device> ViewType;
264 typedef Kokkos::View<FadType,Layout,Device> ScalarViewType;
265 typedef typename ViewType::size_type size_type;
266 typedef typename ViewType::HostMirror host_view_type;
267 typedef typename ScalarViewType::HostMirror host_scalar_view_type;
268
269 const size_type num_rows = global_num_rows;
270 const size_type fad_size = global_fad_size;
271
272 // Create and fill view
273 ViewType v;
274 ScalarViewType s0;
275#if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
276 v = ViewType ("view", num_rows);
277 s0 = ScalarViewType ("");
278#else
279 v = ViewType ("view", num_rows, fad_size+1);
280 s0 = ScalarViewType ("", fad_size+1);
281#endif
282 host_view_type h_v = Kokkos::create_mirror_view(v);
283 for (size_type i=0; i<num_rows; ++i)
284 h_v(i) =
285 generate_fad<FadType>(num_rows, size_type(1), fad_size, i, size_type(0));
286 Kokkos::deep_copy(v, h_v);
287
288 Kokkos::deep_copy(s0, tag.init());
289
290 // Create scalar view
291 ScalarViewType s;
292#if defined (SACADO_DISABLE_FAD_VIEW_SPEC)
293 s = ScalarViewType ("scalar view");
294#else
295 s = ScalarViewType ("scalar view", fad_size+1);
296#endif
297 Kokkos::deep_copy( s, tag.init() );
298
299 // Call atomic_add kernel, which adds up entries in v
301
302 // Copy to host
303 host_scalar_view_type hs = Kokkos::create_mirror_view(s);
304 Kokkos::deep_copy(hs, s);
305
306 // Compute correct result
307 auto b = Kokkos::create_mirror_view(s0);
308 Kokkos::deep_copy(b, s0);
309
310 for (size_type i=0; i<num_rows; ++i)
311 b() = tag.apply(b(), h_v(i));
312
313 // Check
314 bool success = checkFads(b(), hs(), out);
315
316 return success;
317}
318
319// Test atomic_oper_fetch form
320
322 Kokkos_View_Fad, AtomicAddFetch, FadType, Layout, Device )
323{
324 success = testAtomic<FadType, Layout, Device, true>(AddTag(), out);
325}
326
328 Kokkos_View_Fad, AtomicSubFetch, FadType, Layout, Device )
329{
330 success = testAtomic<FadType, Layout, Device, true>(SubTag(), out);
331}
332
334 Kokkos_View_Fad, AtomicMulFetch, FadType, Layout, Device )
335{
336 success = testAtomic<FadType, Layout, Device, true>(MulTag(), out);
337}
338
340 Kokkos_View_Fad, AtomicDivFetch, FadType, Layout, Device )
341{
342 success = testAtomic<FadType, Layout, Device, true>(DivTag(), out);
343}
344
346 Kokkos_View_Fad, AtomicMaxFetch, FadType, Layout, Device )
347{
348 success = testAtomic<FadType, Layout, Device, true>(MaxTag(), out);
349}
350
352 Kokkos_View_Fad, AtomicMinFetch, FadType, Layout, Device )
353{
354 success = testAtomic<FadType, Layout, Device, true>(MinTag(), out);
355}
356
357// Test atomic_fetch_oper form
358
360 Kokkos_View_Fad, AtomicFetchAdd, FadType, Layout, Device )
361{
362 success = testAtomic<FadType, Layout, Device, false>(AddTag(), out);
363}
364
366 Kokkos_View_Fad, AtomicFetchSub, FadType, Layout, Device )
367{
368 success = testAtomic<FadType, Layout, Device, false>(SubTag(), out);
369}
370
372 Kokkos_View_Fad, AtomicFetchMul, FadType, Layout, Device )
373{
374 success = testAtomic<FadType, Layout, Device, false>(MulTag(), out);
375}
376
378 Kokkos_View_Fad, AtomicFetchDiv, FadType, Layout, Device )
379{
380 success = testAtomic<FadType, Layout, Device, false>(DivTag(), out);
381}
382
384 Kokkos_View_Fad, AtomicFetchMax, FadType, Layout, Device )
385{
386 success = testAtomic<FadType, Layout, Device, false>(MaxTag(), out);
387}
388
390 Kokkos_View_Fad, AtomicFetchMin, FadType, Layout, Device )
391{
392 success = testAtomic<FadType, Layout, Device, false>(MinTag(), out);
393}
394
395#define VIEW_FAD_TESTS_FLD( F, L, D ) \
396 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicAddFetch, F, L, D ) \
397 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicSubFetch, F, L, D ) \
398 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMulFetch, F, L, D ) \
399 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicDivFetch, F, L, D ) \
400 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMaxFetch, F, L, D ) \
401 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicMinFetch, F, L, D ) \
402 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchAdd, F, L, D ) \
403 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchSub, F, L, D ) \
404 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMul, F, L, D ) \
405 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchDiv, F, L, D ) \
406 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMax, F, L, D ) \
407 TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT( Kokkos_View_Fad, AtomicFetchMin, F, L, D )
408
409using Kokkos::LayoutLeft;
410using Kokkos::LayoutRight;
413
414#define VIEW_FAD_TESTS_FD( F, D ) \
415 VIEW_FAD_TESTS_FLD( F, LayoutLeft, D ) \
416 VIEW_FAD_TESTS_FLD( F, LayoutRight, D ) \
417 VIEW_FAD_TESTS_FLD( F, LeftContiguous, D ) \
418 VIEW_FAD_TESTS_FLD( F, RightContiguous, D )
419
420// Full set of atomics only implemented for new design
421#if SACADO_ENABLE_NEW_DESIGN
425
426#if SACADO_TEST_DFAD
427#define VIEW_FAD_TESTS_D( D ) \
428 VIEW_FAD_TESTS_FD( SFadType, D ) \
429 VIEW_FAD_TESTS_FD( SLFadType, D ) \
430 VIEW_FAD_TESTS_FD( DFadType, D )
431#else
432#define VIEW_FAD_TESTS_D( D ) \
433 VIEW_FAD_TESTS_FD( SFadType, D ) \
434 VIEW_FAD_TESTS_FD( SLFadType, D )
435#endif
436
437#else
438
439#define VIEW_FAD_TESTS_D( D ) /* */
440
441#endif
bool checkFads(const FadType1 &x, const FadType2 &x2, Teuchos::FancyOStream &out, double tol=1.0e-15)
const int global_num_rows
Kokkos::LayoutContiguous< Kokkos::LayoutRight > RightContiguous
fadtype generate_fad(const ordinal num_rows, const ordinal num_cols, const ordinal fad_size, const ordinal row, const ordinal col)
const int global_num_cols
#define GLOBAL_FAD_SIZE
TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL(Kokkos_View_Fad, AtomicAddFetch, FadType, Layout, Device)
bool testAtomic(const TagType &tag, Teuchos::FancyOStream &out)
const int global_fad_size
Kokkos::LayoutContiguous< Kokkos::LayoutLeft > LeftContiguous
Sacado::Fad::DFad< double > DFadType
#define T1(r, f)
Definition: Sacado_rad.hpp:603
#define T2(r, f)
Definition: Sacado_rad.hpp:578
Sacado::Fad::SFad< double, fad_dim > SFadType
Sacado::Fad::SLFad< double, fad_dim > SLFadType
Sacado::Fad::DFad< double > FadType
Forward-mode AD class templated on the storage for the derivative array.
static double init()
static auto apply(const T1 &a, const T2 &b) -> decltype(a+b)
ViewType::execution_space execution_space
KOKKOS_INLINE_FUNCTION void operator()(Tag tag, const team_handle &team) const
KOKKOS_INLINE_FUNCTION void operator()(AddTag tag, const size_type i) const
static void apply(Tag tag, const ViewType &v, const ScalarViewType &s)
Kokkos::ThreadLocalScalarType< ViewType >::type local_scalar_type
ViewType::size_type size_type
Kokkos::TeamPolicy< execution_space >::member_type team_handle
const ScalarViewType m_s
AtomicKernel(const ViewType &v, const ScalarViewType &s)
static const size_type stride
static auto apply(const T1 &a, const T2 &b) -> decltype(a/b)
static double init()
static auto apply(const T1 &a, const T2 &b) -> decltype(max(a, b))
static double init()
static auto apply(const T1 &a, const T2 &b) -> decltype(min(a, b))
static double init()
static auto apply(const T1 &a, const T2 &b) -> decltype(a *b)
static double init()
static double init()
static auto apply(const T1 &a, const T2 &b) -> decltype(a-b)
static const bool value
const double tol