Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Fad_CommTests.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Sacado Package
5// Copyright (2006) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// This library is free software; you can redistribute it and/or modify
11// it under the terms of the GNU Lesser General Public License as
12// published by the Free Software Foundation; either version 2.1 of the
13// License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful, but
16// WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
23// USA
24// Questions? Contact David M. Gay (dmgay@sandia.gov) or Eric T. Phipps
25// (etphipp@sandia.gov).
26//
27// ***********************************************************************
28// @HEADER
29#include "Teuchos_TestingHelpers.hpp"
30#include "Teuchos_CommHelpers.hpp"
31#include "Teuchos_DefaultComm.hpp"
32#include "Teuchos_Array.hpp"
33#include "Teuchos_Comm.hpp"
34
35#include "Sacado_mpl_apply.hpp"
36#include "Sacado_Random.hpp"
37
38using Teuchos::RCP;
39using Teuchos::rcp;
40using Teuchos::ValueTypeSerializer;
41
42template <typename ArrayType>
43bool checkFadArrays(const ArrayType& x,
44 const ArrayType& x2,
45 const std::string& tag,
46 Teuchos::FancyOStream& out) {
47 typedef typename ArrayType::value_type FadType;
48
49 // Check sizes match
50 bool success = (x.size() == x2.size());
51 out << tag << " Fad array size test";
52 if (success)
53 out << " passed";
54 else
55 out << " failed";
56 out << ": \n\tExpected: " << x.size() << ", \n\tGot: " << x2.size()
57 << "." << std::endl;
58
59 // Check Fads match
60 const int sz = x.size();
61 for (int i=0; i<sz; i++) {
62 bool success2 = Sacado::IsEqual<FadType>::eval(x[i], x2[i]);
63 out << tag << " Fad array comparison test " << i;
64 if (success2)
65 out << " passed";
66 else
67 out << " failed";
68 out << ": \n\tExpected: " << x[i] << ", \n\tGot: " << x2[i] << "."
69 << std::endl;
70 success = success && success2;
71 }
72
73 return success;
74}
75
76template<typename Ordinal>
78 const Teuchos::Comm<Ordinal> &comm,
79 Teuchos::FancyOStream &out,
80 const bool result
81 )
82{
83 out << "\nChecking that the above test passed in all processes ...";
84 int thisResult = ( result ? 1 : 0 );
85 int sumResult = -1;
86 Teuchos::reduceAll(comm,Teuchos::REDUCE_SUM,Ordinal(1),&thisResult,
87 &sumResult);
88 const bool passed = sumResult==Teuchos::size(comm);
89 if(passed)
90 out << " passed\n";
91 else
92 out << " (sumResult="<<sumResult<<"!=numProcs="<<Teuchos::size(comm)<<") failed\n";
93 return passed;
94}
95
96#define FAD_BASE_COMM_TESTS(FadType, FAD) \
97TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_Broadcast ) { \
98 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
99 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
100 \
101 int n = 7; \
102 int p = 5; \
103 ValueTypeSerializer<int,FadType> fts( \
104 rcp(new ValueTypeSerializer<int,double>), p); \
105 \
106 Teuchos::Array<FadType> x(n), x2(n), x3(n); \
107 for (int i=0; i<n; i++) { \
108 x[i] = FadType(p, rnd.number()); \
109 for (int j=0; j<p; j++) \
110 x[i].fastAccessDx(j) = rnd.number(); \
111 } \
112 for (int i=0; i<n; i++) { \
113 x2[i] = FadType(p, 0.0); \
114 } \
115 if (comm->getRank() == 0) { \
116 x2 = x; \
117 x3 = x; \
118 } \
119 \
120 Teuchos::broadcast(*comm, 0, n, &x2[0]); \
121 bool success1 = checkFadArrays( \
122 x, x2, std::string(#FAD)+" Broadcast", out); \
123 success1 = checkResultOnAllProcs(*comm, out, success1); \
124 \
125 Teuchos::broadcast(*comm, fts, 0, n, &x3[0]); \
126 bool success2 = checkFadArrays( \
127 x, x3, std::string(#FAD)+" Broadcast FTS", out); \
128 success2 = checkResultOnAllProcs(*comm, out, success2); \
129 \
130 success = success1 && success2; \
131} \
132 \
133TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_GatherAll ) { \
134 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
135 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
136 \
137 int n = 7; \
138 int p = 5; \
139 int size = comm->getSize(); \
140 int rank = comm->getRank(); \
141 int N = n*size; \
142 ValueTypeSerializer<int,FadType> fts( \
143 rcp(new ValueTypeSerializer<int,double>), p); \
144 \
145 Teuchos::Array<FadType> x(n), x2(N), x3(N), x4(N); \
146 for (int i=0; i<n; i++) { \
147 x[i] = FadType(p, (rank+1)*(i+1)); \
148 for (int j=0; j<p; j++) \
149 x[i].fastAccessDx(j) = (rank+1)*(i+1)*(j+1); \
150 } \
151 for (int i=0; i<N; i++) { \
152 x2[i] = FadType(p, 0.0); \
153 } \
154 for (int j=0; j<size; j++) { \
155 for (int i=0; i<n; i++) { \
156 x3[n*j+i] = FadType(p, (j+1)*(i+1)); \
157 for (int k=0; k<p; k++) \
158 x3[n*j+i].fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
159 } \
160 } \
161 \
162 Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
163 bool success1 = checkFadArrays( \
164 x3, x2, std::string(#FAD)+" Gather All", out); \
165 success1 = checkResultOnAllProcs(*comm, out, success1); \
166 \
167 Teuchos::gatherAll(*comm, fts, n, &x[0], N, &x4[0]); \
168 bool success2 = checkFadArrays( \
169 x3, x4, std::string(#FAD)+" Gather All FTS", out); \
170 success2 = checkResultOnAllProcs(*comm, out, success2); \
171 \
172 success = success1 && success2; \
173} \
174 \
175TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SumAll ) { \
176 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
177 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
178 \
179 int n = 7; \
180 int p = 5; \
181 int num_proc = comm->getSize(); \
182 ValueTypeSerializer<int,FadType> fts( \
183 rcp(new ValueTypeSerializer<int,double>), p); \
184 \
185 Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
186 for (int i=0; i<n; i++) { \
187 x[i] = FadType(p, 1.0*(i+1)); \
188 for (int j=0; j<p; j++) \
189 x[i].fastAccessDx(j) = 2.0*(i+1); \
190 } \
191 for (int i=0; i<n; i++) { \
192 sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
193 for (int j=0; j<p; j++) \
194 sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
195 } \
196 for (int i=0; i<n; i++) { \
197 sums2[i] = FadType(p, 0.0); \
198 } \
199 \
200 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
201 bool success1 = checkFadArrays( \
202 sums, sums2, std::string(#FAD)+" Sum All", out); \
203 success1 = checkResultOnAllProcs(*comm, out, success1); \
204 \
205 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
206 bool success2 = checkFadArrays( \
207 sums, sums3, std::string(#FAD)+" Sum All FTS", out); \
208 success2 = checkResultOnAllProcs(*comm, out, success2); \
209 \
210 success = success1 && success2; \
211} \
212 \
213TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MaxAll ) { \
214 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
215 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
216 \
217 int n = 7; \
218 int p = 5; \
219 int rank = comm->getRank(); \
220 int num_proc = comm->getSize(); \
221 ValueTypeSerializer<int,FadType> fts( \
222 rcp(new ValueTypeSerializer<int,double>), p); \
223 \
224 Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
225 for (int i=0; i<n; i++) { \
226 x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
227 for (int j=0; j<p; j++) \
228 x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
229 } \
230 for (int i=0; i<n; i++) { \
231 maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
232 for (int j=0; j<p; j++) \
233 maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
234 } \
235 for (int i=0; i<n; i++) { \
236 maxs2[i] = FadType(p, 0.0); \
237 } \
238 \
239 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
240 bool success1 = checkFadArrays( \
241 maxs, maxs2, std::string(#FAD)+" Max All", out); \
242 success1 = checkResultOnAllProcs(*comm, out, success1); \
243 \
244 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
245 bool success2 = checkFadArrays( \
246 maxs, maxs3, std::string(#FAD)+" Max All FTS", out); \
247 success2 = checkResultOnAllProcs(*comm, out, success2); \
248 \
249 success = success1 && success2; \
250} \
251 \
252TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_MinAll ) { \
253 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
254 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
255 \
256 int n = 7; \
257 int p = 5; \
258 int rank = comm->getRank(); \
259 ValueTypeSerializer<int,FadType> fts( \
260 rcp(new ValueTypeSerializer<int,double>), p); \
261 \
262 Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
263 for (int i=0; i<n; i++) { \
264 x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
265 for (int j=0; j<p; j++) \
266 x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
267 } \
268 for (int i=0; i<n; i++) { \
269 mins[i] = FadType(p, 1.0*(i+1)); \
270 for (int j=0; j<p; j++) \
271 mins[i].fastAccessDx(j) = 2.0*(i+1); \
272 } \
273 for (int i=0; i<n; i++) { \
274 mins2[i] = FadType(p, 0.0); \
275 } \
276 \
277 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
278 bool success1 = checkFadArrays( \
279 mins, mins2, std::string(#FAD)+" Min All", out); \
280 success1 = checkResultOnAllProcs(*comm, out, success1); \
281 \
282 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
283 bool success2 = checkFadArrays( \
284 mins, mins3, std::string(#FAD)+" Min All FTS", out); \
285 success2 = checkResultOnAllProcs(*comm, out, success2); \
286 \
287 success = success1 && success2; \
288} \
289 \
290TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanSum ) { \
291 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
292 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
293 \
294 int n = 7; \
295 int p = 5; \
296 int rank = comm->getRank(); \
297 ValueTypeSerializer<int,FadType> fts( \
298 rcp(new ValueTypeSerializer<int,double>), p); \
299 \
300 Teuchos::Array<FadType> x(n), sums(n), sums2(n), sums3(n); \
301 for (int i=0; i<n; i++) { \
302 x[i] = FadType(p, 1.0*(i+1)); \
303 for (int j=0; j<p; j++) \
304 x[i].fastAccessDx(j) = 2.0*(i+1); \
305 } \
306 for (int i=0; i<n; i++) { \
307 sums[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
308 for (int j=0; j<p; j++) \
309 sums[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
310 } \
311 for (int i=0; i<n; i++) { \
312 sums2[i] = FadType(p, 0.0); \
313 } \
314 \
315 Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
316 bool success1 = checkFadArrays( \
317 sums, sums2, std::string(#FAD)+" Scan Sum", out); \
318 success1 = checkResultOnAllProcs(*comm, out, success1); \
319 \
320 Teuchos::scan(*comm, fts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
321 bool success2 = checkFadArrays( \
322 sums, sums3, std::string(#FAD)+" Scan Sum FTS", out); \
323 success2 = checkResultOnAllProcs(*comm, out, success2); \
324 \
325 success = success1 && success2; \
326} \
327 \
328TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMax ) { \
329 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
330 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
331 \
332 int n = 7; \
333 int p = 5; \
334 int rank = comm->getRank(); \
335 ValueTypeSerializer<int,FadType> fts( \
336 rcp(new ValueTypeSerializer<int,double>), p); \
337 \
338 Teuchos::Array<FadType> x(n), maxs(n), maxs2(n), maxs3(n); \
339 for (int i=0; i<n; i++) { \
340 x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
341 for (int j=0; j<p; j++) \
342 x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
343 } \
344 for (int i=0; i<n; i++) { \
345 maxs[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
346 for (int j=0; j<p; j++) \
347 maxs[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
348 } \
349 for (int i=0; i<n; i++) { \
350 maxs2[i] = FadType(p, 0.0); \
351 } \
352 \
353 Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
354 bool success1 = checkFadArrays( \
355 maxs, maxs2, std::string(#FAD)+" Scan Max", out); \
356 success1 = checkResultOnAllProcs(*comm, out, success1); \
357 \
358 Teuchos::scan(*comm, fts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
359 bool success2 = checkFadArrays( \
360 maxs, maxs3, std::string(#FAD)+" Scan Max FTS", out); \
361 success2 = checkResultOnAllProcs(*comm, out, success2); \
362 \
363 success = success1 && success2; \
364} \
365 \
366TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_ScanMin ) { \
367 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
368 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
369 \
370 int n = 7; \
371 int p = 5; \
372 int rank = comm->getRank(); \
373 ValueTypeSerializer<int,FadType> fts( \
374 rcp(new ValueTypeSerializer<int,double>), p); \
375 \
376 Teuchos::Array<FadType> x(n), mins(n), mins2(n), mins3(n); \
377 for (int i=0; i<n; i++) { \
378 x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
379 for (int j=0; j<p; j++) \
380 x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
381 } \
382 for (int i=0; i<n; i++) { \
383 mins[i] = FadType(p, 1.0*(i+1)); \
384 for (int j=0; j<p; j++) \
385 mins[i].fastAccessDx(j) = 2.0*(i+1); \
386 } \
387 for (int i=0; i<n; i++) { \
388 mins2[i] = FadType(p, 0.0); \
389 } \
390 \
391 Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
392 bool success1 = checkFadArrays( \
393 mins, mins2, std::string(#FAD)+" Scan Min", out); \
394 success1 = checkResultOnAllProcs(*comm, out, success1); \
395 \
396 Teuchos::scan(*comm, fts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
397 bool success2 = checkFadArrays( \
398 mins, mins3, std::string(#FAD)+" Scan Min FTS", out); \
399 success2 = checkResultOnAllProcs(*comm, out, success2); \
400 \
401 success = success1 && success2; \
402} \
403 \
404TEUCHOS_UNIT_TEST( FAD##_Comm, Fad_SendReceive ) { \
405 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
406 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
407 \
408 int num_proc = comm->getSize(); \
409 if (num_proc > 1) { \
410 int rank = comm->getRank(); \
411 int n = 7; \
412 int p = 5; \
413 ValueTypeSerializer<int,FadType> fts( \
414 rcp(new ValueTypeSerializer<int,double>), p); \
415 \
416 Teuchos::Array<FadType> x(n), x2(n), x3(n); \
417 for (int i=0; i<n; i++) { \
418 x[i] = FadType(p, 1.0*(i+1)); \
419 for (int j=0; j<p; j++) \
420 x[i].fastAccessDx(j) = 2.0*(i+1)*(j+1); \
421 } \
422 for (int i=0; i<n; i++) { \
423 x2[i] = FadType(p, 0.0); \
424 } \
425 if (rank != 1) { \
426 x2 = x; \
427 x3 = x; \
428 } \
429 \
430 if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
431 if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
432 bool success1 = checkFadArrays( \
433 x, x2, std::string(#FAD)+" Send/Receive", out); \
434 success1 = checkResultOnAllProcs(*comm, out, success1); \
435 \
436 if (rank == 0) Teuchos::send(*comm, fts, n, &x[0], 1); \
437 if (rank == 1) Teuchos::receive(*comm, fts, 0, n, &x3[0]); \
438 bool success2 = checkFadArrays( \
439 x, x3, std::string(#FAD)+" Send/Receive FTS", out); \
440 success2 = checkResultOnAllProcs(*comm, out, success2); \
441 \
442 success = success1 && success2; \
443 } \
444 else \
445 success = true; \
446} \
447 \
448TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_Broadcast ) { \
449 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
450 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
451 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
452 \
453 int n = 7; \
454 int p1 = 5; \
455 int p2 = 5; \
456 RCP< ValueTypeSerializer<int,FadType> > fts = \
457 rcp(new ValueTypeSerializer<int,FadType>( \
458 rcp(new ValueTypeSerializer<int,double>), p1)); \
459 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
460 \
461 Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
462 for (int i=0; i<n; i++) { \
463 FadType f(p1, rnd.number()); \
464 for (int k=0; k<p1; k++) \
465 f.fastAccessDx(k) = rnd.number(); \
466 x[i] = FadFadType(p2, f); \
467 for (int j=0; j<p2; j++) { \
468 FadType g(p1, rnd.number()); \
469 for (int k=0; k<p1; k++) \
470 g.fastAccessDx(k) = rnd.number(); \
471 x[i].fastAccessDx(j) = g; \
472 } \
473 } \
474 for (int i=0; i<n; i++) { \
475 x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
476 for (int j=0; j<p2; j++) \
477 x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
478 } \
479 if (comm->getRank() == 0) { \
480 x2 = x; \
481 x3 = x; \
482 } \
483 \
484 Teuchos::broadcast(*comm, 0, n, &x2[0]); \
485 bool success1 = checkFadArrays( \
486 x, x2, std::string(#FAD)+"<"+#FAD+"> Broadcast", out); \
487 success1 = checkResultOnAllProcs(*comm, out, success1); \
488 \
489 Teuchos::broadcast(*comm, ffts, 0, n, &x3[0]); \
490 bool success2 = checkFadArrays( \
491 x, x3, std::string(#FAD)+"<"+#FAD+"> Broadcast FTS", out); \
492 success2 = checkResultOnAllProcs(*comm, out, success2); \
493 \
494 success = success1 && success2; \
495} \
496 \
497TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_GatherAll ) { \
498 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
499 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
500 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
501 \
502 int n = 7; \
503 int p1 = 5; \
504 int p2 = 5; \
505 int size = comm->getSize(); \
506 int rank = comm->getRank(); \
507 int N = n*size; \
508 RCP< ValueTypeSerializer<int,FadType> > fts = \
509 rcp(new ValueTypeSerializer<int,FadType>( \
510 rcp(new ValueTypeSerializer<int,double>), p1)); \
511 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
512 \
513 Teuchos::Array<FadFadType> x(n), x2(N), x3(N), x4(N); \
514 for (int i=0; i<n; i++) { \
515 FadType f(p1, (rank+1)*(i+1)); \
516 for (int k=0; k<p1; k++) \
517 f.fastAccessDx(k) = (rank+1)*(i+1)*(k+1); \
518 x[i] = FadFadType(p2, f); \
519 for (int j=0; j<p2; j++) { \
520 x[i].fastAccessDx(j) = f; \
521 } \
522 } \
523 for (int i=0; i<N; i++) { \
524 x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
525 for (int j=0; j<p2; j++) \
526 x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
527 } \
528 for (int j=0; j<size; j++) { \
529 for (int i=0; i<n; i++) { \
530 FadType f(p1, (j+1)*(i+1)); \
531 for (int k=0; k<p1; k++) \
532 f.fastAccessDx(k) = (j+1)*(i+1)*(k+1); \
533 x3[n*j+i] = FadFadType(p2, f); \
534 for (int k=0; k<p2; k++) \
535 x3[n*j+i].fastAccessDx(k) = f; \
536 } \
537 } \
538 \
539 Teuchos::gatherAll(*comm, n, &x[0], N, &x2[0]); \
540 bool success1 = checkFadArrays( \
541 x3, x2, std::string(#FAD)+"<"+#FAD+"> Gather All", out); \
542 success1 = checkResultOnAllProcs(*comm, out, success1); \
543 \
544 Teuchos::gatherAll(*comm, ffts, n, &x[0], N, &x4[0]); \
545 bool success2 = checkFadArrays( \
546 x3, x4, std::string(#FAD)+"<"+#FAD+"> Gather All FTS", out); \
547 success2 = checkResultOnAllProcs(*comm, out, success2); \
548 \
549 success = success1 && success2; \
550} \
551 \
552TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SumAll ) { \
553 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
554 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
555 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
556 \
557 int n = 7; \
558 int p1 = 5; \
559 int p2 = 5; \
560 int num_proc = comm->getSize(); \
561 RCP< ValueTypeSerializer<int,FadType> > fts = \
562 rcp(new ValueTypeSerializer<int,FadType>( \
563 rcp(new ValueTypeSerializer<int,double>), p1)); \
564 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
565 \
566 Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
567 for (int i=0; i<n; i++) { \
568 FadType f(p1, 1.0*(i+1)); \
569 for (int k=0; k<p1; k++) \
570 f.fastAccessDx(k) = 2.0*(i+1); \
571 x[i] = FadFadType(p2, f); \
572 for (int j=0; j<p2; j++) { \
573 x[i].fastAccessDx(j) = f; \
574 } \
575 } \
576 for (int i=0; i<n; i++) { \
577 FadType f(p1, 1.0*(i+1)*num_proc); \
578 for (int k=0; k<p1; k++) \
579 f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
580 sums[i] = FadFadType(p2, f); \
581 for (int j=0; j<p2; j++) \
582 sums[i].fastAccessDx(j) = f; \
583 } \
584 for (int i=0; i<n; i++) { \
585 sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
586 for (int j=0; j<p2; j++) \
587 sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
588 } \
589 \
590 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
591 bool success1 = checkFadArrays( \
592 sums, sums2, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
593 success1 = checkResultOnAllProcs(*comm, out, success1); \
594 \
595 Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
596 bool success2 = checkFadArrays( \
597 sums, sums3, std::string(#FAD)+"<"+#FAD+"> Sum All", out); \
598 success2 = checkResultOnAllProcs(*comm, out, success2); \
599 \
600 success = success1 && success2; \
601} \
602 \
603TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MaxAll ) { \
604 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
605 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
606 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
607 \
608 int n = 7; \
609 int p1 = 5; \
610 int p2 = 5; \
611 int rank = comm->getRank(); \
612 int num_proc = comm->getSize(); \
613 RCP< ValueTypeSerializer<int,FadType> > fts = \
614 rcp(new ValueTypeSerializer<int,FadType>( \
615 rcp(new ValueTypeSerializer<int,double>), p1)); \
616 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
617 \
618 Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
619 for (int i=0; i<n; i++) { \
620 FadType f(p1, 1.0*(i+1)*(rank+1)); \
621 for (int k=0; k<p1; k++) \
622 f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
623 x[i] = FadFadType(p2, f); \
624 for (int j=0; j<p2; j++) { \
625 x[i].fastAccessDx(j) = f; \
626 } \
627 } \
628 for (int i=0; i<n; i++) { \
629 FadType f(p1, 1.0*(i+1)*num_proc); \
630 for (int k=0; k<p1; k++) \
631 f.fastAccessDx(k) = 2.0*(i+1)*num_proc; \
632 maxs[i] = FadFadType(p2, f); \
633 for (int j=0; j<p2; j++) \
634 maxs[i].fastAccessDx(j) = f; \
635 } \
636 for (int i=0; i<n; i++) { \
637 maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
638 for (int j=0; j<p2; j++) \
639 maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
640 } \
641 \
642 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
643 bool success1 = checkFadArrays( \
644 maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Max All", out); \
645 success1 = checkResultOnAllProcs(*comm, out, success1); \
646 \
647 Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
648 bool success2 = checkFadArrays( \
649 maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Max All FTS", out); \
650 success2 = checkResultOnAllProcs(*comm, out, success2); \
651 \
652 success = success1 && success2; \
653} \
654 \
655TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_MinAll ) { \
656 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
657 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
658 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
659 \
660 int n = 7; \
661 int p1 = 5; \
662 int p2 = 5; \
663 int rank = comm->getRank(); \
664 RCP< ValueTypeSerializer<int,FadType> > fts = \
665 rcp(new ValueTypeSerializer<int,FadType>( \
666 rcp(new ValueTypeSerializer<int,double>), p1)); \
667 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
668 \
669 Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
670 for (int i=0; i<n; i++) { \
671 FadType f(p1, 1.0*(i+1)*(rank+1)); \
672 for (int k=0; k<p1; k++) \
673 f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
674 x[i] = FadFadType(p2, f); \
675 for (int j=0; j<p2; j++) { \
676 x[i].fastAccessDx(j) = f; \
677 } \
678 } \
679 for (int i=0; i<n; i++) { \
680 FadType f(p1, 1.0*(i+1)); \
681 for (int k=0; k<p1; k++) \
682 f.fastAccessDx(k) = 2.0*(i+1); \
683 mins[i] = FadFadType(p2, f); \
684 for (int j=0; j<p2; j++) \
685 mins[i].fastAccessDx(j) = f; \
686 } \
687 for (int i=0; i<n; i++) { \
688 mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
689 for (int j=0; j<p2; j++) \
690 mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
691 } \
692 \
693 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
694 bool success1 = checkFadArrays( \
695 mins, mins2, std::string(#FAD)+"<"+#FAD+"> Min All", out); \
696 success1 = checkResultOnAllProcs(*comm, out, success1); \
697 \
698 Teuchos::reduceAll(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
699 bool success2 = checkFadArrays( \
700 mins, mins3, std::string(#FAD)+"<"+#FAD+"> Min All FTS", out); \
701 success2 = checkResultOnAllProcs(*comm, out, success2); \
702 \
703 success = success1 && success2; \
704} \
705 \
706TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanSum ) { \
707 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
708 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
709 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
710 \
711 int n = 7; \
712 int p1 = 5; \
713 int p2 = 5; \
714 int rank = comm->getRank(); \
715 RCP< ValueTypeSerializer<int,FadType> > fts = \
716 rcp(new ValueTypeSerializer<int,FadType>( \
717 rcp(new ValueTypeSerializer<int,double>), p1)); \
718 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
719 \
720 Teuchos::Array<FadFadType> x(n), sums(n), sums2(n), sums3(n); \
721 for (int i=0; i<n; i++) { \
722 FadType f(p1, 1.0*(i+1)); \
723 for (int k=0; k<p1; k++) \
724 f.fastAccessDx(k) = 2.0*(i+1); \
725 x[i] = FadFadType(p2, f); \
726 for (int j=0; j<p2; j++) { \
727 x[i].fastAccessDx(j) = f; \
728 } \
729 } \
730 for (int i=0; i<n; i++) { \
731 FadType f(p1, 1.0*(i+1)*(rank+1)); \
732 for (int k=0; k<p1; k++) \
733 f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
734 sums[i] = FadFadType(p2, f); \
735 for (int j=0; j<p2; j++) \
736 sums[i].fastAccessDx(j) = f; \
737 } \
738 for (int i=0; i<n; i++) { \
739 sums2[i] = FadFadType(p2, FadType(p1, 0.0)); \
740 for (int j=0; j<p2; j++) \
741 sums2[i].fastAccessDx(j) = FadType(p1, 0.0); \
742 } \
743 \
744 Teuchos::scan(*comm, Teuchos::REDUCE_SUM, n, &x[0], &sums2[0]); \
745 bool success1 = checkFadArrays( \
746 sums, sums2, std::string(#FAD)+"<"+#FAD+"> Scan Sum", out); \
747 success1 = checkResultOnAllProcs(*comm, out, success1); \
748 \
749 Teuchos::scan(*comm, ffts, Teuchos::REDUCE_SUM, n, &x[0], &sums3[0]); \
750 bool success2 = checkFadArrays( \
751 sums, sums3, std::string(#FAD)+"<"+#FAD+"> Scan Sum FTS", out); \
752 success2 = checkResultOnAllProcs(*comm, out, success2); \
753 \
754 success = success1 && success2; \
755} \
756 \
757TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMax ) { \
758 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
759 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
760 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
761 \
762 int n = 7; \
763 int p1 = 5; \
764 int p2 = 5; \
765 int rank = comm->getRank(); \
766 RCP< ValueTypeSerializer<int,FadType> > fts = \
767 rcp(new ValueTypeSerializer<int,FadType>( \
768 rcp(new ValueTypeSerializer<int,double>), p1)); \
769 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
770 \
771 Teuchos::Array<FadFadType> x(n), maxs(n), maxs2(n), maxs3(n); \
772 for (int i=0; i<n; i++) { \
773 FadType f(p1, 1.0*(i+1)*(rank+1)); \
774 for (int k=0; k<p1; k++) \
775 f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
776 x[i] = FadFadType(p2, f); \
777 for (int j=0; j<p2; j++) { \
778 x[i].fastAccessDx(j) = f; \
779 } \
780 } \
781 for (int i=0; i<n; i++) { \
782 FadType f(p1, 1.0*(i+1)*(rank+1)); \
783 for (int k=0; k<p1; k++) \
784 f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
785 maxs[i] = FadFadType(p2, f); \
786 for (int j=0; j<p2; j++) \
787 maxs[i].fastAccessDx(j) = f; \
788 } \
789 for (int i=0; i<n; i++) { \
790 maxs2[i] = FadFadType(p2, FadType(p1, 0.0)); \
791 for (int j=0; j<p2; j++) \
792 maxs2[i].fastAccessDx(j) = FadType(p1, 0.0); \
793 } \
794 \
795 Teuchos::scan(*comm, Teuchos::REDUCE_MAX, n, &x[0], &maxs2[0]); \
796 bool success1 = checkFadArrays( \
797 maxs, maxs2, std::string(#FAD)+"<"+#FAD+"> Scan Max", out); \
798 success1 = checkResultOnAllProcs(*comm, out, success1); \
799 \
800 Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MAX, n, &x[0], &maxs3[0]); \
801 bool success2 = checkFadArrays( \
802 maxs, maxs3, std::string(#FAD)+"<"+#FAD+"> Scan Max FTS", out); \
803 success2 = checkResultOnAllProcs(*comm, out, success2); \
804 \
805 success = success1 && success2; \
806} \
807 \
808TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_ScanMin ) { \
809 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
810 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
811 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
812 \
813 int n = 7; \
814 int p1 = 5; \
815 int p2 = 5; \
816 int rank = comm->getRank(); \
817 RCP< ValueTypeSerializer<int,FadType> > fts = \
818 rcp(new ValueTypeSerializer<int,FadType>( \
819 rcp(new ValueTypeSerializer<int,double>), p1)); \
820 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
821 \
822 Teuchos::Array<FadFadType> x(n), mins(n), mins2(n), mins3(n); \
823 for (int i=0; i<n; i++) { \
824 FadType f(p1, 1.0*(i+1)*(rank+1)); \
825 for (int k=0; k<p1; k++) \
826 f.fastAccessDx(k) = 2.0*(i+1)*(rank+1); \
827 x[i] = FadFadType(p2, f); \
828 for (int j=0; j<p2; j++) { \
829 x[i].fastAccessDx(j) = f; \
830 } \
831 } \
832 for (int i=0; i<n; i++) { \
833 FadType f(p1, 1.0*(i+1)); \
834 for (int k=0; k<p1; k++) \
835 f.fastAccessDx(k) = 2.0*(i+1); \
836 mins[i] = FadFadType(p2, f); \
837 for (int j=0; j<p2; j++) \
838 mins[i].fastAccessDx(j) = f; \
839 } \
840 for (int i=0; i<n; i++) { \
841 mins2[i] = FadFadType(p2, FadType(p1, 0.0)); \
842 for (int j=0; j<p2; j++) \
843 mins2[i].fastAccessDx(j) = FadType(p1, 0.0); \
844 } \
845 \
846 Teuchos::scan(*comm, Teuchos::REDUCE_MIN, n, &x[0], &mins2[0]); \
847 bool success1 = checkFadArrays( \
848 mins, mins2, std::string(#FAD)+"<"+#FAD+"> Scan Min", out); \
849 success1 = checkResultOnAllProcs(*comm, out, success1); \
850 \
851 Teuchos::scan(*comm, ffts, Teuchos::REDUCE_MIN, n, &x[0], &mins3[0]); \
852 bool success2 = checkFadArrays( \
853 mins, mins3, std::string(#FAD)+"<"+#FAD+"> Scan Min FTS", out); \
854 success2 = checkResultOnAllProcs(*comm, out, success2); \
855 \
856 success = success1 && success2; \
857} \
858 \
859TEUCHOS_UNIT_TEST( FAD##_Comm, FadFad_SendReceive ) { \
860 typedef Sacado::mpl::apply<FadType,FadType>::type FadFadType; \
861 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
862 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
863 \
864 int num_proc = comm->getSize(); \
865 if (num_proc > 1) { \
866 int rank = comm->getRank(); \
867 int n = 7; \
868 int p1 = 5; \
869 int p2 = 5; \
870 RCP< ValueTypeSerializer<int,FadType> > fts = \
871 rcp(new ValueTypeSerializer<int,FadType>( \
872 rcp(new ValueTypeSerializer<int,double>), p1)); \
873 ValueTypeSerializer<int,FadFadType> ffts(fts, p2); \
874 \
875 Teuchos::Array<FadFadType> x(n), x2(n), x3(n); \
876 for (int i=0; i<n; i++) { \
877 FadType f(p1, 1.0*(i+1)); \
878 for (int k=0; k<p1; k++) \
879 f.fastAccessDx(k) = 2.0*(i+1)*(k+1); \
880 x[i] = FadFadType(p2, f); \
881 for (int j=0; j<p2; j++) \
882 x[i].fastAccessDx(j) = f; \
883 } \
884 for (int i=0; i<n; i++) { \
885 x2[i] = FadFadType(p2, FadType(p1, 0.0)); \
886 for (int j=0; j<p2; j++) \
887 x2[i].fastAccessDx(j) = FadType(p1, 0.0); \
888 } \
889 if (rank != 1) { \
890 x2 = x; \
891 x3 = x; \
892 } \
893 \
894 if (rank == 0) Teuchos::send(*comm, n, &x[0], 1); \
895 if (rank == 1) Teuchos::receive(*comm, 0, n, &x2[0]); \
896 bool success1 = checkFadArrays( \
897 x, x2, std::string(#FAD)+"<"+#FAD+"> Send/Receive", out); \
898 success1 = checkResultOnAllProcs(*comm, out, success1); \
899 \
900 if (rank == 0) Teuchos::send(*comm, ffts, n, &x[0], 1); \
901 if (rank == 1) Teuchos::receive(*comm, ffts, 0, n, &x3[0]); \
902 bool success2 = checkFadArrays( \
903 x, x3, std::string(#FAD)+"<"+#FAD+"> Send/Receive FTS", out); \
904 success2 = checkResultOnAllProcs(*comm, out, success2); \
905 \
906 success = success1 && success2; \
907 } \
908 else \
909 success = true; \
910}
911
912#if defined(HAVE_SACADO_KOKKOSCORE) && defined(HAVE_SACADO_TEUCHOSKOKKOSCOMM)
913
914#include "Kokkos_Core.hpp"
915
916#define FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Device) \
917TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_Broadcast ) { \
918 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
919 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
920 \
921 \
922 int n = 7; \
923 int p = 5; \
924 ValueTypeSerializer<int,FadType> fts( \
925 rcp(new ValueTypeSerializer<int,double>), p); \
926 \
927 typedef Kokkos::View<FadType*,Device> ViewType; \
928 typedef ViewType::HostMirror HostViewType; \
929 ViewType x("x",n,p+1), x2("x2",n,p+1), x3("x3",n,p+1); \
930 HostViewType h_x = Kokkos::create_mirror_view(x); \
931 HostViewType h_x2 = Kokkos::create_mirror_view(x2); \
932 HostViewType h_x3 = Kokkos::create_mirror_view(x3); \
933 for (int i=0; i<n; i++) { \
934 h_x[i] = FadType(p, rnd.number()); \
935 for (int j=0; j<p; j++) \
936 h_x[i].fastAccessDx(j) = rnd.number(); \
937 } \
938 for (int i=0; i<n; i++) { \
939 h_x2[i] = FadType(p, 0.0); \
940 } \
941 Kokkos::deep_copy(x, h_x); \
942 Kokkos::deep_copy(x2, h_x2); \
943 if (comm->getRank() == 0) { \
944 x2 = x; \
945 x3 = x; \
946 h_x2 = h_x; \
947 h_x3 = h_x; \
948 } \
949 \
950 /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
951 /* so only do the communication on the host. This probably makes */ \
952 /* the deep copy unnecessary. */ \
953 const bool accessible = \
954 Kokkos::Impl::MemorySpaceAccess< \
955 Kokkos::HostSpace, \
956 typename Device::memory_space >::accessible; \
957 if (accessible) { \
958 Teuchos::broadcast(*comm, 0, n, x2); \
959 Kokkos::deep_copy(h_x2, x2); \
960 } \
961 else \
962 Teuchos::broadcast(*comm, 0, n, h_x2); \
963 bool success1 = checkFadArrays( \
964 h_x, h_x2, std::string(#FAD)+" Broadcast", out); \
965 success1 = checkResultOnAllProcs(*comm, out, success1); \
966 \
967 if (accessible) { \
968 Teuchos::broadcast(*comm, fts, 0, n, x3); \
969 Kokkos::deep_copy(h_x3, x3); \
970 } \
971 else \
972 Teuchos::broadcast(*comm, fts, 0, n, h_x3); \
973 bool success2 = checkFadArrays( \
974 h_x, h_x3, std::string(#FAD)+" Broadcast FTS", out); \
975 success2 = checkResultOnAllProcs(*comm, out, success2); \
976 \
977 success = success1 && success2; \
978} \
979TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_SumAll ) { \
980 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
981 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
982 \
983 \
984 int n = 7; \
985 int p = 5; \
986 int num_proc = comm->getSize(); \
987 ValueTypeSerializer<int,FadType> fts( \
988 rcp(new ValueTypeSerializer<int,double>), p); \
989 \
990 typedef Kokkos::View<FadType*,Device> ViewType; \
991 typedef ViewType::HostMirror HostViewType; \
992 ViewType x("x",n,p+1), sums("sums",n,p+1), \
993 sums2("sums2",n,p+1), sums3("sums3",n,p+1); \
994 HostViewType h_x = Kokkos::create_mirror_view(x); \
995 HostViewType h_sums = Kokkos::create_mirror_view(sums); \
996 HostViewType h_sums2 = Kokkos::create_mirror_view(sums2); \
997 HostViewType h_sums3 = Kokkos::create_mirror_view(sums3); \
998 for (int i=0; i<n; i++) { \
999 h_x[i] = FadType(p, 1.0*(i+1)); \
1000 for (int j=0; j<p; j++) \
1001 h_x[i].fastAccessDx(j) = 2.0*(i+1); \
1002 } \
1003 for (int i=0; i<n; i++) { \
1004 h_sums[i] = FadType(p, 1.0*(i+1)*num_proc); \
1005 for (int j=0; j<p; j++) \
1006 h_sums[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
1007 } \
1008 for (int i=0; i<n; i++) { \
1009 h_sums2[i] = FadType(p, 0.0); \
1010 } \
1011 Kokkos::deep_copy(x, h_x); \
1012 Kokkos::deep_copy(sums, h_sums); \
1013 Kokkos::deep_copy(sums2, h_sums2); \
1014 \
1015 /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1016 /* so only do the communication on the host. This probably makes */ \
1017 /* the deep copy unnecessary. */ \
1018 const bool accessible = \
1019 Kokkos::Impl::MemorySpaceAccess< \
1020 Kokkos::HostSpace, \
1021 typename Device::memory_space >::accessible; \
1022 if (accessible) { \
1023 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, x, sums2); \
1024 Kokkos::deep_copy(h_sums2, sums2); \
1025 } \
1026 else \
1027 Teuchos::reduceAll(*comm, Teuchos::REDUCE_SUM, n, h_x, h_sums2); \
1028 bool success1 = checkFadArrays( \
1029 h_sums, h_sums2, std::string(#FAD)+" Sum All", out); \
1030 success1 = checkResultOnAllProcs(*comm, out, success1); \
1031 \
1032 if (accessible) { \
1033 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, x, sums3); \
1034 Kokkos::deep_copy(h_sums3, sums3); \
1035 } \
1036 else \
1037 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_SUM, n, h_x, h_sums3); \
1038 bool success2 = checkFadArrays( \
1039 h_sums, h_sums3, std::string(#FAD)+" Sum All FTS", out); \
1040 success2 = checkResultOnAllProcs(*comm, out, success2); \
1041 success = success1 && success2; \
1042 \
1043} \
1044TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MaxAll ) { \
1045 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1046 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1047 \
1048 \
1049 int n = 7; \
1050 int p = 5; \
1051 int rank = comm->getRank(); \
1052 int num_proc = comm->getSize(); \
1053 ValueTypeSerializer<int,FadType> fts( \
1054 rcp(new ValueTypeSerializer<int,double>), p); \
1055 \
1056 typedef Kokkos::View<FadType*,Device> ViewType; \
1057 typedef ViewType::HostMirror HostViewType; \
1058 ViewType x("x",n,p+1), maxs("maxs",n,p+1), \
1059 maxs2("maxs2",n,p+1), maxs3("maxs3",n,p+1); \
1060 HostViewType h_x = Kokkos::create_mirror_view(x); \
1061 HostViewType h_maxs = Kokkos::create_mirror_view(maxs); \
1062 HostViewType h_maxs2 = Kokkos::create_mirror_view(maxs2); \
1063 HostViewType h_maxs3 = Kokkos::create_mirror_view(maxs3); \
1064 for (int i=0; i<n; i++) { \
1065 h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1066 for (int j=0; j<p; j++) \
1067 h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1068 } \
1069 for (int i=0; i<n; i++) { \
1070 h_maxs[i] = FadType(p, 1.0*(i+1)*num_proc); \
1071 for (int j=0; j<p; j++) \
1072 h_maxs[i].fastAccessDx(j) = 2.0*(i+1)*num_proc; \
1073 } \
1074 for (int i=0; i<n; i++) { \
1075 h_maxs2[i] = FadType(p, 0.0); \
1076 } \
1077 Kokkos::deep_copy(x, h_x); \
1078 Kokkos::deep_copy(maxs, h_maxs); \
1079 Kokkos::deep_copy(maxs2, h_maxs2); \
1080 \
1081 /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1082 /* so only do the communication on the host. This probably makes */ \
1083 /* the deep copy unnecessary. */ \
1084 const bool accessible = \
1085 Kokkos::Impl::MemorySpaceAccess< \
1086 Kokkos::HostSpace, \
1087 typename Device::memory_space >::accessible; \
1088 if (accessible) { \
1089 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, x, maxs2); \
1090 Kokkos::deep_copy(h_maxs2, maxs2); \
1091 } \
1092 else \
1093 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MAX, n, h_x, h_maxs2); \
1094 bool success1 = checkFadArrays( \
1095 h_maxs, h_maxs2, std::string(#FAD)+" Max All", out); \
1096 success1 = checkResultOnAllProcs(*comm, out, success1); \
1097 \
1098 if (accessible) { \
1099 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, x, maxs3); \
1100 Kokkos::deep_copy(h_maxs3, maxs3); \
1101 } \
1102 else \
1103 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MAX, n, h_x, h_maxs3); \
1104 bool success2 = checkFadArrays( \
1105 h_maxs, h_maxs3, std::string(#FAD)+" Max All FTS", out); \
1106 success2 = checkResultOnAllProcs(*comm, out, success2); \
1107 success = success1 && success2; \
1108 \
1109} \
1110TEUCHOS_UNIT_TEST( FAD##_Comm_Kokkos_##Device, Fad_MinAll ) { \
1111 Teuchos::RCP<const Teuchos::Comm<Ordinal> > \
1112 comm = Teuchos::DefaultComm<Ordinal>::getComm(); \
1113 \
1114 \
1115 int n = 7; \
1116 int p = 5; \
1117 int rank = comm->getRank(); \
1118 ValueTypeSerializer<int,FadType> fts( \
1119 rcp(new ValueTypeSerializer<int,double>), p); \
1120 \
1121 typedef Kokkos::View<FadType*,Device> ViewType; \
1122 typedef ViewType::HostMirror HostViewType; \
1123 ViewType x("x",n,p+1), mins("mins",n,p+1), \
1124 mins2("mins2",n,p+1), mins3("mins3",n,p+1); \
1125 HostViewType h_x = Kokkos::create_mirror_view(x); \
1126 HostViewType h_mins = Kokkos::create_mirror_view(mins); \
1127 HostViewType h_mins2 = Kokkos::create_mirror_view(mins2); \
1128 HostViewType h_mins3 = Kokkos::create_mirror_view(mins3); \
1129 for (int i=0; i<n; i++) { \
1130 h_x[i] = FadType(p, 1.0*(i+1)*(rank+1)); \
1131 for (int j=0; j<p; j++) \
1132 h_x[i].fastAccessDx(j) = 2.0*(i+1)*(rank+1); \
1133 } \
1134 for (int i=0; i<n; i++) { \
1135 h_mins[i] = FadType(p, 1.0*(i+1)); \
1136 for (int j=0; j<p; j++) \
1137 h_mins[i].fastAccessDx(j) = 2.0*(i+1); \
1138 } \
1139 for (int i=0; i<n; i++) { \
1140 h_mins2[i] = FadType(p, 0.0); \
1141 } \
1142 Kokkos::deep_copy(x, h_x); \
1143 Kokkos::deep_copy(mins, h_mins); \
1144 Kokkos::deep_copy(mins2, h_mins2); \
1145 \
1146 /* The Teuchos MPI wrappers know nothing of CUDA nor CUDA-aware MPI*/ \
1147 /* so only do the communication on the host. This probably makes */ \
1148 /* the deep copy unnecessary. */ \
1149 const bool accessible = \
1150 Kokkos::Impl::MemorySpaceAccess< \
1151 Kokkos::HostSpace, \
1152 typename Device::memory_space >::accessible; \
1153 if (accessible) { \
1154 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, x, mins2); \
1155 Kokkos::deep_copy(h_mins2, mins2); \
1156 } \
1157 else \
1158 Teuchos::reduceAll(*comm, Teuchos::REDUCE_MIN, n, h_x, h_mins2); \
1159 bool success1 = checkFadArrays( \
1160 h_mins, h_mins2, std::string(#FAD)+" Min All", out); \
1161 success1 = checkResultOnAllProcs(*comm, out, success1); \
1162 \
1163 if (accessible) { \
1164 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, x, mins3); \
1165 Kokkos::deep_copy(h_mins3, mins3); \
1166 } \
1167 else \
1168 Teuchos::reduceAll(*comm, fts, Teuchos::REDUCE_MIN, n, h_x, h_mins3); \
1169 bool success2 = checkFadArrays( \
1170 h_mins, h_mins3, std::string(#FAD)+" Min All FTS", out); \
1171 success2 = checkResultOnAllProcs(*comm, out, success2); \
1172 success = success1 && success2; \
1173 \
1174}
1175
1176#ifdef KOKKOS_ENABLE_OPENMP
1177#define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1178 using Kokkos::OpenMP; \
1179 FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, OpenMP)
1180#else
1181#define FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD)
1182#endif
1183
1184#ifdef KOKKOS_ENABLE_THREADS
1185#define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1186 using Kokkos::Threads; \
1187 FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Threads)
1188#else
1189#define FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD)
1190#endif
1191
1192#ifdef KOKKOS_ENABLE_CUDA
1193#define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1194 using Kokkos::Cuda; \
1195 FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Cuda)
1196#else
1197#define FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD)
1198#endif
1199
1200#ifdef KOKKOS_ENABLE_HIP
1201#define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD) \
1202 using Kokkos::Experimental::HIP; \
1203 FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, HIP)
1204#else
1205#define FAD_KOKKOS_COMM_TESTS_HIP(FadType, FAD)
1206#endif
1207
1208#ifdef KOKKOS_ENABLE_SERIAL
1209#define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD) \
1210 using Kokkos::Serial; \
1211 FAD_KOKKOS_COMM_TESTS_DEV(FadType, FAD, Serial)
1212#else
1213#define FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1214#endif
1215
1216#define FAD_KOKKOS_COMM_TESTS(FadType, FAD) \
1217 FAD_KOKKOS_COMM_TESTS_OPENMP(FadType, FAD) \
1218 FAD_KOKKOS_COMM_TESTS_THREADS(FadType, FAD) \
1219 FAD_KOKKOS_COMM_TESTS_CUDA(FadType, FAD) \
1220 FAD_KOKKOS_COMM_TESTS_SERIAL(FadType, FAD)
1221
1222#else
1223
1224#define FAD_KOKKOS_COMM_TESTS(FadType, FAD)
1225
1226#endif
1227
1228#define FAD_COMM_TESTS(FadType, FAD) \
1229 FAD_BASE_COMM_TESTS(FadType, FAD)
int Ordinal
bool checkResultOnAllProcs(const Teuchos::Comm< Ordinal > &comm, Teuchos::FancyOStream &out, const bool result)
bool checkFadArrays(const ArrayType &x, const ArrayType &x2, const std::string &tag, Teuchos::FancyOStream &out)
Sacado::Fad::DFad< double > FadType
static SACADO_INLINE_FUNCTION bool eval(const T &x, const T &y)