Stokhos Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
Sacado_Fad_Exp_MP_Vector.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41
42#ifndef SACADO_FAD_EXP_MP_VECTOR_HPP
43#define SACADO_FAD_EXP_MP_VECTOR_HPP
44
45#include "Sacado_MP_Vector.hpp"
46
47namespace Sacado {
48
49 namespace Fad {
50 namespace Exp {
51
54
56
61 template <typename T>
62 class Extender<
63 T,
64 typename std::enable_if<
65 Sacado::is_mp_vector<typename T::value_type>::value >::type
66 > : public T
67 {
68 public:
69
70 typedef typename T::value_type value_type;
71 typedef typename value_type::value_type val_type;
72
73 // Define expression template specialization
75
76 // Bring in constructors
77 using T::T;
78
79 // Bring in methods we are overloading
80 using T::val;
81 using T::dx;
82 using T::fastAccessDx;
83
85 KOKKOS_INLINE_FUNCTION
86 const val_type& val(int j) const { return T::val().fastAccessCoeff(j); }
87
89 KOKKOS_INLINE_FUNCTION
90 val_type& val(int j) { return T::val().fastAccessCoeff(j); }
91
93 KOKKOS_INLINE_FUNCTION
94 val_type dx(int i, int j) const {
95 return this->size() ? this->dx_[i].fastAccessCoeff(j) : val_type(0.0);
96 }
97
99 KOKKOS_INLINE_FUNCTION
100 val_type& fastAccessDx(int i, int j) {
101 return this->dx_[i].fastAccessCoeff(j);
102 }
103
105 KOKKOS_INLINE_FUNCTION
106 const val_type& fastAccessDx(int i, int j) const {
107 return this->dx_[i].fastAccessCoeff(j);
108 }
109
110 };
111
113 template <typename DstType>
114 class ExprAssign<
115 DstType,
116 typename std::enable_if<
117 std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
118 >::type
119 > {
120 public:
121
123 typedef typename DstType::value_type value_type;
124
125 // MP::Vector size (assuming static, because that's all we care about)
126 static const int VecNum = Sacado::StaticSize<value_type>::value;
127
129 template <typename SrcType>
130 KOKKOS_INLINE_FUNCTION
131 static void assign_equal(DstType& dst, const SrcType& x)
132 {
133 const int xsz = x.size();
134
135 if (xsz != dst.size())
136 dst.resizeAndZero(xsz);
137
138 const int sz = dst.size();
139
140 // For ViewStorage, the resize above may not in fact resize the
141 // derivative array, so it is possible that sz != xsz at this point.
142 // The only valid use case here is sz > xsz == 0, so we use sz in the
143 // assignment below
144
145 if (sz) {
146 if (x.hasFastAccess()) {
147 SACADO_FAD_DERIV_LOOP(i,sz)
148 for (int j=0; j<VecNum; ++j)
149 dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
150 }
151 else
152 SACADO_FAD_DERIV_LOOP(i,sz)
153 for (int j=0; j<VecNum; ++j)
154 dst.fastAccessDx(i,j) = x.dx(i,j);
155 }
156
157 for (int j=0; j<VecNum; ++j)
158 dst.val(j) = x.val(j);
159 }
160
162 template <typename SrcType>
163 KOKKOS_INLINE_FUNCTION
164 static void assign_plus_equal(DstType& dst, const SrcType& x)
165 {
166 const int xsz = x.size(), sz = dst.size();
167
168#if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
169 if ((xsz != sz) && (xsz != 0) && (sz != 0))
170 throw "Fad Error: Attempt to assign with incompatible sizes";
171#endif
172
173 if (xsz) {
174 if (sz) {
175 if (x.hasFastAccess())
176 SACADO_FAD_DERIV_LOOP(i,sz)
177 for (int j=0; j<VecNum; ++j)
178 dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
179 else
180 for (int i=0; i<sz; ++i)
181 for (int j=0; j<VecNum; ++j)
182 dst.fastAccessDx(i,j) += x.dx(i,j);
183 }
184 else {
185 dst.resizeAndZero(xsz);
186 if (x.hasFastAccess())
187 SACADO_FAD_DERIV_LOOP(i,xsz)
188 for (int j=0; j<VecNum; ++j)
189 dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
190 else
191 SACADO_FAD_DERIV_LOOP(i,xsz)
192 for (int j=0; j<VecNum; ++j)
193 dst.fastAccessDx(i,j) = x.dx(i,j);
194 }
195 }
196
197 for (int j=0; j<VecNum; ++j)
198 dst.val(j) += x.val(j);
199 }
200
202 template <typename SrcType>
203 KOKKOS_INLINE_FUNCTION
204 static void assign_minus_equal(DstType& dst, const SrcType& x)
205 {
206 const int xsz = x.size(), sz = dst.size();
207
208#if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
209 if ((xsz != sz) && (xsz != 0) && (sz != 0))
210 throw "Fad Error: Attempt to assign with incompatible sizes";
211#endif
212
213 if (xsz) {
214 if (sz) {
215 if (x.hasFastAccess())
216 SACADO_FAD_DERIV_LOOP(i,sz)
217 for (int j=0; j<VecNum; ++j)
218 dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
219 else
220 SACADO_FAD_DERIV_LOOP(i,sz)
221 for (int j=0; j<VecNum; ++j)
222 dst.fastAccessDx(i,j) -= x.dx(i,j);
223 }
224 else {
225 dst.resizeAndZero(xsz);
226 if (x.hasFastAccess())
227 SACADO_FAD_DERIV_LOOP(i,xsz)
228 for (int j=0; j<VecNum; ++j)
229 dst.fastAccessDx(i,j) = -x.fastAccessDx(i,j);
230 else
231 SACADO_FAD_DERIV_LOOP(i,xsz)
232 for (int j=0; j<VecNum; ++j)
233 dst.fastAccessDx(i,j) = -x.dx(i,j);
234 }
235 }
236
237 for (int j=0; j<VecNum; ++j)
238 dst.val(j) -= x.val(j);
239 }
240
242 template <typename SrcType>
243 KOKKOS_INLINE_FUNCTION
244 static void assign_times_equal(DstType& dst, const SrcType& x)
245 {
246 const int xsz = x.size(), sz = dst.size();
247 const value_type xval = x.val();
248 const value_type v = dst.val();
249
250#if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
251 if ((xsz != sz) && (xsz != 0) && (sz != 0))
252 throw "Fad Error: Attempt to assign with incompatible sizes";
253#endif
254
255 if (xsz) {
256 if (sz) {
257 if (x.hasFastAccess())
258 SACADO_FAD_DERIV_LOOP(i,sz)
259 for (int j=0; j<VecNum; ++j)
260 dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
261 else
262 SACADO_FAD_DERIV_LOOP(i,sz)
263 for (int j=0; j<VecNum; ++j)
264 dst.fastAccessDx(i) = v.fastAccessCoeff(j)*x.dx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
265 }
266 else {
267 dst.resizeAndZero(xsz);
268 if (x.hasFastAccess())
269 SACADO_FAD_DERIV_LOOP(i,xsz)
270 for (int j=0; j<VecNum; ++j)
271 dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j);
272 else
273 SACADO_FAD_DERIV_LOOP(i,xsz)
274 for (int j=0; j<VecNum; ++j)
275 dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.dx(i,j);
276 }
277 }
278 else {
279 if (sz) {
280 SACADO_FAD_DERIV_LOOP(i,sz)
281 for (int j=0; j<VecNum; ++j)
282 dst.fastAccessDx(i,j) *= xval.fastAccessCoeff(j);
283 }
284 }
285
286 for (int j=0; j<VecNum; ++j)
287 dst.val(j) *= xval.fastAccessCoeff(j);
288 }
289
291 template <typename SrcType>
292 KOKKOS_INLINE_FUNCTION
293 static void assign_divide_equal(DstType& dst, const SrcType& x)
294 {
295 const int xsz = x.size(), sz = dst.size();
296 const value_type xval = x.val();
297 const value_type v = dst.val();
298
299#if defined(SACADO_DEBUG) && !defined(__CUDA_ARCH__ )
300 if ((xsz != sz) && (xsz != 0) && (sz != 0))
301 throw "Fad Error: Attempt to assign with incompatible sizes";
302#endif
303
304 if (xsz) {
305 const value_type xval2 = xval*xval;
306 if (sz) {
307 if (x.hasFastAccess())
308 SACADO_FAD_DERIV_LOOP(i,sz)
309 for (int j=0; j<VecNum; ++j)
310 dst.fastAccessDx(i,j) =
311 ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) ) / xval2.fastAccessCoeff(j);
312 else
313 SACADO_FAD_DERIV_LOOP(i,sz)
314 for (int j=0; j<VecNum; ++j)
315 dst.fastAccessDx(i,j) =
316 ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.dx(i,j) ) / xval2.fastAccessCoeff(j);
317 }
318 else {
319 dst.resizeAndZero(xsz);
320 if (x.hasFastAccess())
321 SACADO_FAD_DERIV_LOOP(i,xsz)
322 for (int j=0; j<VecNum; ++j)
323 dst.fastAccessDx(i,j) = - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) / xval2.fastAccessCoeff(j);
324 else
325 SACADO_FAD_DERIV_LOOP(i,xsz)
326 for (int j=0; j<VecNum; ++j)
327 dst.fastAccessDx(i,j) = -v.fastAccessCoeff(j)*x.dx(i,j) / xval2.fastAccessCoeff(j);
328 }
329 }
330 else {
331 if (sz) {
332 SACADO_FAD_DERIV_LOOP(i,sz)
333 for (int j=0; j<VecNum; ++j)
334 dst.fastAccessDx(i,j) /= xval.fastAccessCoeff(j);
335 }
336 }
337
338 for (int j=0; j<VecNum; ++j)
339 dst.val(j) /= xval.fastAccessCoeff(j);
340 }
341
342 };
343
348 template <typename DstType>
349 class ExprAssign<
350 DstType,
351 typename std::enable_if<
352 Sacado::IsStaticallySized<DstType>::value &&
353 std::is_same< typename DstType::expr_spec_type, ExprSpecMPVector >::value
354 >::type
355 > {
356 public:
357
359 typedef typename DstType::value_type value_type;
360
361 // MP::Vector size (assuming static, because that's all we care about)
362 static const int VecNum = Sacado::StaticSize<value_type>::value;
363
365 template <typename SrcType>
366 KOKKOS_INLINE_FUNCTION
367 static void assign_equal(DstType& dst, const SrcType& x)
368 {
369 const int sz = dst.size();
370 SACADO_FAD_DERIV_LOOP(i,sz)
371 for (int j=0; j<VecNum; ++j)
372 dst.fastAccessDx(i,j) = x.fastAccessDx(i,j);
373 for (int j=0; j<VecNum; ++j)
374 dst.val(j) = x.val(j);
375 }
376
378 template <typename SrcType>
379 KOKKOS_INLINE_FUNCTION
380 static void assign_plus_equal(DstType& dst, const SrcType& x)
381 {
382 const int sz = dst.size();
383 SACADO_FAD_DERIV_LOOP(i,sz)
384 for (int j=0; j<VecNum; ++j)
385 dst.fastAccessDx(i,j) += x.fastAccessDx(i,j);
386 for (int j=0; j<VecNum; ++j)
387 dst.val(j) += x.val(j);
388 }
389
391 template <typename SrcType>
392 KOKKOS_INLINE_FUNCTION
393 static void assign_minus_equal(DstType& dst, const SrcType& x)
394 {
395 const int sz = dst.size();
396 SACADO_FAD_DERIV_LOOP(i,sz)
397 for (int j=0; j<VecNum; ++j)
398 dst.fastAccessDx(i,j) -= x.fastAccessDx(i,j);
399 for (int j=0; j<VecNum; ++j)
400 dst.val(j) -= x.val(j);
401 }
402
404 template <typename SrcType>
405 KOKKOS_INLINE_FUNCTION
406 static void assign_times_equal(DstType& dst, const SrcType& x)
407 {
408 const int sz = dst.size();
409 const value_type xval = x.val();
410 const value_type v = dst.val();
411 SACADO_FAD_DERIV_LOOP(i,sz)
412 for (int j=0; j<VecNum; ++j)
413 dst.fastAccessDx(i,j) = v.fastAccessCoeff(j)*x.fastAccessDx(i,j) + dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j);
414 for (int j=0; j<VecNum; ++j)
415 dst.val(j) *= xval.fastAccessCoeff(j);
416 }
417
419 template <typename SrcType>
420 KOKKOS_INLINE_FUNCTION
421 static void assign_divide_equal(DstType& dst, const SrcType& x)
422 {
423 const int sz = dst.size();
424 const value_type xval = x.val();
425 const value_type xval2 = xval*xval;
426 const value_type v = dst.val();
427 SACADO_FAD_DERIV_LOOP(i,sz)
428 for (int j=0; j<VecNum; ++j)
429 dst.fastAccessDx(i,j) =
430 ( dst.fastAccessDx(i,j)*xval.fastAccessCoeff(j) - v.fastAccessCoeff(j)*x.fastAccessDx(i,j) )/ xval2.fastAccessCoeff(j);
431 for (int j=0; j<VecNum; ++j)
432 dst.val(j) /= xval.fastAccessCoeff(j);
433 }
434
435 };
436
437 } // namespace Exp
438 } // namespace Fad
439
440} // namespace Sacado
441
442// Specialize expression template operators to add similar extensions
443#include "Sacado_Fad_Exp_Ops.hpp"
444
445#define FAD_UNARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,FASTACCESSDX) \
446namespace Sacado { \
447 namespace Fad { \
448 namespace Exp { \
449 \
450 template <typename T> \
451 class OP< T,ExprSpecMPVector > : \
452 public Expr< OP< T,ExprSpecMPVector > > { \
453 public: \
454 \
455 typedef typename std::remove_cv<T>::type ExprT; \
456 typedef typename ExprT::value_type value_type; \
457 typedef typename ExprT::scalar_type scalar_type; \
458 \
459 typedef typename value_type::value_type val_type; \
460 \
461 typedef ExprSpecMPVector expr_spec_type; \
462 \
463 KOKKOS_INLINE_FUNCTION \
464 OP(const T& expr_) : expr(expr_) {} \
465 \
466 KOKKOS_INLINE_FUNCTION \
467 int size() const { return expr.size(); } \
468 \
469 KOKKOS_INLINE_FUNCTION \
470 bool hasFastAccess() const { \
471 return expr.hasFastAccess(); \
472 } \
473 \
474 KOKKOS_INLINE_FUNCTION \
475 value_type val() const { \
476 USING \
477 return MPVALUE; \
478 } \
479 \
480 KOKKOS_INLINE_FUNCTION \
481 val_type val(int j) const { \
482 USING \
483 return VALUE; \
484 } \
485 \
486 KOKKOS_INLINE_FUNCTION \
487 val_type dx(int i, int j) const { \
488 USING \
489 return DX; \
490 } \
491 \
492 KOKKOS_INLINE_FUNCTION \
493 val_type fastAccessDx(int i, int j) const { \
494 USING \
495 return FASTACCESSDX; \
496 } \
497 \
498 protected: \
499 \
500 const T& expr; \
501 }; \
502 \
503 } \
504 } \
505 \
506}
507
509 UnaryPlusOp,
510 ;,
511 expr.val(),
512 expr.val(j),
513 expr.dx(i,j),
514 expr.fastAccessDx(i,j))
515FAD_UNARYOP_MACRO(operator-,
517 ;,
518 -expr.val(),
519 -expr.val(j),
520 -expr.dx(i,j),
521 -expr.fastAccessDx(i,j))
524 using std::exp;,
525 exp(expr.val()),
526 exp(expr.val(j)),
527 exp(expr.val(j))*expr.dx(i,j),
528 exp(expr.val(j))*expr.fastAccessDx(i,j))
530 LogOp,
531 using std::log;,
532 log(expr.val()),
533 log(expr.val(j)),
534 expr.dx(i,j)/expr.val(j),
535 expr.fastAccessDx(i,j)/expr.val(j))
538 using std::log10; using std::log;,
539 log10(expr.val()),
540 log10(expr.val(j)),
541 expr.dx(i,j)/( log(value_type(10))*expr.val()),
542 expr.fastAccessDx(i,j) / ( log(value_type(10))*expr.val()))
545 using std::sqrt;,
546 sqrt(expr.val()),
547 sqrt(expr.val(j)),
548 expr.dx(i,j)/(value_type(2)* sqrt(expr.val())),
549 expr.fastAccessDx(i,j)/(value_type(2)* sqrt(expr.val())))
552 using std::cos; using std::sin;,
553 cos(expr.val()),
554 cos(expr.val(j)),
555 -expr.dx(i,j)* sin(expr.val()),
556 -expr.fastAccessDx(i,j)* sin(expr.val()))
559 using std::cos; using std::sin;,
560 sin(expr.val()),
561 sin(expr.val(j)),
562 expr.dx(i,j)* cos(expr.val()),
563 expr.fastAccessDx(i,j)* cos(expr.val()))
566 using std::tan;,
567 tan(expr.val()),
568 tan(expr.val(j)),
569 expr.dx(i,j)*
570 (value_type(1)+ tan(expr.val())* tan(expr.val())),
571 expr.fastAccessDx(i,j)*
572 (value_type(1)+ tan(expr.val())* tan(expr.val())))
575 using std::acos; using std::sqrt;,
576 acos(expr.val()),
577 acos(expr.val(j)),
578 -expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
579 -expr.fastAccessDx(i,j) /
580 sqrt(value_type(1)-expr.val()*expr.val()))
583 using std::asin; using std::sqrt;,
584 asin(expr.val()),
585 asin(expr.val(j)),
586 expr.dx(i,j)/ sqrt(value_type(1)-expr.val()*expr.val()),
587 expr.fastAccessDx(i,j) /
588 sqrt(value_type(1)-expr.val()*expr.val()))
591 using std::atan;,
592 atan(expr.val()),
593 atan(expr.val(j)),
594 expr.dx(i,j)/(value_type(1)+expr.val()*expr.val()),
595 expr.fastAccessDx(i,j)/(value_type(1)+expr.val()*expr.val()))
598 using std::cosh; using std::sinh;,
599 cosh(expr.val()),
600 cosh(expr.val(j)),
601 expr.dx(i,j)* sinh(expr.val()),
602 expr.fastAccessDx(i,j)* sinh(expr.val()))
605 using std::cosh; using std::sinh;,
606 sinh(expr.val()),
607 sinh(expr.val(j)),
608 expr.dx(i,j)* cosh(expr.val()),
609 expr.fastAccessDx(i,j)* cosh(expr.val()))
612 using std::tanh; using std::cosh;,
613 tanh(expr.val()),
614 tanh(expr.val(j)),
615 expr.dx(i,j)/( cosh(expr.val())* cosh(expr.val())),
616 expr.fastAccessDx(i,j) /
617 ( cosh(expr.val())* cosh(expr.val())))
620 using std::acosh; using std::sqrt;,
621 acosh(expr.val()),
622 acosh(expr.val(j)),
623 expr.dx(i,j)/ sqrt((expr.val()-value_type(1)) *
624 (expr.val()+value_type(1))),
625 expr.fastAccessDx(i,j)/ sqrt((expr.val()-value_type(1)) *
626 (expr.val()+value_type(1))))
629 using std::asinh; using std::sqrt;,
630 asinh(expr.val()),
631 asinh(expr.val(j)),
632 expr.dx(i,j)/ sqrt(value_type(1)+expr.val()*expr.val()),
633 expr.fastAccessDx(i,j)/ sqrt(value_type(1)+
634 expr.val()*expr.val()))
637 using std::atanh;,
638 atanh(expr.val()),
639 atanh(expr.val(j)),
640 expr.dx(i,j)/(value_type(1)-expr.val()*expr.val()),
641 expr.fastAccessDx(i,j)/(value_type(1)-
642 expr.val()*expr.val()))
645 using std::abs;,
646 abs(expr.val()),
647 abs(expr.val(j)),
648 if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
649 if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
652 using std::fabs;,
653 fabs(expr.val()),
654 fabs(expr.val(j)),
655 if_then_else( expr.val() >= 0, expr.dx(i,j), value_type(-expr.dx(i,j)) ),
656 if_then_else( expr.val() >= 0, expr.fastAccessDx(i,j), value_type(-expr.fastAccessDx(i,j)) ) )
659 using std::cbrt;,
660 cbrt(expr.val()),
661 cbrt(expr.val(j)),
662 expr.dx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())),
663 expr.fastAccessDx(i,j)/(value_type(3)*cbrt(expr.val()*expr.val())))
664
665#undef FAD_UNARYOP_MACRO
666
667namespace Sacado {
668 namespace Fad {
669 namespace Exp {
670
671 // For MP::Vector scalar type, promote constants up to expression's value
672 // type. If the constant type is the same as the value type, we can store
673 // the constant as a reference. If it isn't, we must copy it into a new
674 // value type object. We do this so that we can always access the constant
675 // as a value type.
676 template <typename ConstType, typename ValueType>
677 struct ConstTypeRef {
678 typedef ValueType type;
679 };
680
681 template <typename ValueType>
682 struct ConstTypeRef<ValueType, ValueType> {
683 typedef ValueType& type;
684 };
685 }
686 }
687}
688
689#define FAD_BINARYOP_MACRO(OPNAME,OP,USING,MPVALUE,VALUE,DX,CDX1,CDX2,FASTACCESSDX,MPVAL_CONST_DX_1,MPVAL_CONST_DX_2,VAL_CONST_DX_1,VAL_CONST_DX_2,CONST_DX_1,CONST_DX_2,CONST_FASTACCESSDX_1,CONST_FASTACCESSDX_2) \
690namespace Sacado { \
691 namespace Fad { \
692 namespace Exp { \
693 \
694 template <typename T1, typename T2 > \
695 class OP< T1, T2, false, false, ExprSpecMPVector > : \
696 public Expr< OP< T1, T2, false, false, ExprSpecMPVector > > { \
697 public: \
698 \
699 typedef typename std::remove_cv<T1>::type ExprT1; \
700 typedef typename std::remove_cv<T2>::type ExprT2; \
701 typedef typename ExprT1::value_type value_type_1; \
702 typedef typename ExprT2::value_type value_type_2; \
703 typedef typename Sacado::Promote<value_type_1, \
704 value_type_2>::type value_type; \
705 \
706 typedef typename ExprT1::scalar_type scalar_type_1; \
707 typedef typename ExprT2::scalar_type scalar_type_2; \
708 typedef typename Sacado::Promote<scalar_type_1, \
709 scalar_type_2>::type scalar_type; \
710 \
711 typedef typename value_type::value_type val_type; \
712 \
713 typedef ExprSpecMPVector expr_spec_type; \
714 \
715 KOKKOS_INLINE_FUNCTION \
716 OP(const T1& expr1_, const T2& expr2_) : \
717 expr1(expr1_), expr2(expr2_) {} \
718 \
719 KOKKOS_INLINE_FUNCTION \
720 int size() const { \
721 const int sz1 = expr1.size(), sz2 = expr2.size(); \
722 return sz1 > sz2 ? sz1 : sz2; \
723 } \
724 \
725 KOKKOS_INLINE_FUNCTION \
726 bool hasFastAccess() const { \
727 return expr1.hasFastAccess() && expr2.hasFastAccess(); \
728 } \
729 \
730 KOKKOS_INLINE_FUNCTION \
731 value_type val() const { \
732 USING \
733 return MPVALUE; \
734 } \
735 \
736 KOKKOS_INLINE_FUNCTION \
737 val_type val(int j) const { \
738 USING \
739 return VALUE; \
740 } \
741 \
742 KOKKOS_INLINE_FUNCTION \
743 val_type dx(int i, int j) const { \
744 USING \
745 const int sz1 = expr1.size(), sz2 = expr2.size(); \
746 if (sz1 > 0 && sz2 > 0) \
747 return DX; \
748 else if (sz1 > 0) \
749 return CDX2; \
750 else \
751 return CDX1; \
752 } \
753 \
754 KOKKOS_INLINE_FUNCTION \
755 val_type fastAccessDx(int i, int j) const { \
756 USING \
757 return FASTACCESSDX; \
758 } \
759 \
760 protected: \
761 \
762 const T1& expr1; \
763 const T2& expr2; \
764 \
765 }; \
766 \
767 template <typename T1, typename T2> \
768 class OP< T1, T2, false, true, ExprSpecMPVector > : \
769 public Expr< OP< T1, T2, false, true, ExprSpecMPVector > > { \
770 public: \
771 \
772 typedef typename std::remove_cv<T1>::type ExprT1; \
773 typedef T2 ConstT; \
774 typedef typename ExprT1::value_type value_type; \
775 typedef typename ExprT1::scalar_type scalar_type; \
776 \
777 typedef typename value_type::value_type val_type; \
778 \
779 typedef ExprSpecMPVector expr_spec_type; \
780 \
781 KOKKOS_INLINE_FUNCTION \
782 OP(const T1& expr1_, const ConstT& c_) : \
783 expr1(expr1_), c(c_) {} \
784 \
785 KOKKOS_INLINE_FUNCTION \
786 int size() const { \
787 return expr1.size(); \
788 } \
789 \
790 KOKKOS_INLINE_FUNCTION \
791 bool hasFastAccess() const { \
792 return expr1.hasFastAccess(); \
793 } \
794 \
795 KOKKOS_INLINE_FUNCTION \
796 value_type val() const { \
797 USING \
798 return MPVAL_CONST_DX_2; \
799 } \
800 \
801 KOKKOS_INLINE_FUNCTION \
802 val_type val(int j) const { \
803 USING \
804 return VAL_CONST_DX_2; \
805 } \
806 \
807 KOKKOS_INLINE_FUNCTION \
808 val_type dx(int i, int j) const { \
809 USING \
810 return CONST_DX_2; \
811 } \
812 \
813 KOKKOS_INLINE_FUNCTION \
814 val_type fastAccessDx(int i, int j) const { \
815 USING \
816 return CONST_FASTACCESSDX_2; \
817 } \
818 \
819 protected: \
820 \
821 const T1& expr1; \
822 const typename ConstTypeRef<ConstT,value_type>::type c; \
823 }; \
824 \
825 template <typename T1, typename T2> \
826 class OP< T1, T2, true, false,ExprSpecMPVector > : \
827 public Expr< OP< T1, T2, true, false, ExprSpecMPVector > > { \
828 public: \
829 \
830 typedef typename std::remove_cv<T2>::type ExprT2; \
831 typedef T1 ConstT; \
832 typedef typename ExprT2::value_type value_type; \
833 typedef typename ExprT2::scalar_type scalar_type; \
834 \
835 typedef typename value_type::value_type val_type; \
836 \
837 typedef ExprSpecMPVector expr_spec_type; \
838 \
839 KOKKOS_INLINE_FUNCTION \
840 OP(const ConstT& c_, const T2& expr2_) : \
841 c(c_), expr2(expr2_) {} \
842 \
843 KOKKOS_INLINE_FUNCTION \
844 int size() const { \
845 return expr2.size(); \
846 } \
847 \
848 KOKKOS_INLINE_FUNCTION \
849 bool hasFastAccess() const { \
850 return expr2.hasFastAccess(); \
851 } \
852 \
853 KOKKOS_INLINE_FUNCTION \
854 value_type val() const { \
855 USING \
856 return MPVAL_CONST_DX_1; \
857 } \
858 \
859 KOKKOS_INLINE_FUNCTION \
860 val_type val(int j) const { \
861 USING \
862 return VAL_CONST_DX_1; \
863 } \
864 \
865 KOKKOS_INLINE_FUNCTION \
866 val_type dx(int i, int j) const { \
867 USING \
868 return CONST_DX_1; \
869 } \
870 \
871 KOKKOS_INLINE_FUNCTION \
872 val_type fastAccessDx(int i, int j) const { \
873 USING \
874 return CONST_FASTACCESSDX_1; \
875 } \
876 \
877 protected: \
878 \
879 const typename ConstTypeRef<ConstT,value_type>::type c; \
880 const T2& expr2; \
881 }; \
882 \
883 } \
884 } \
885 \
886}
887
888
890 AdditionOp,
891 ;,
892 expr1.val() + expr2.val(),
893 expr1.val(j) + expr2.val(j),
894 expr1.dx(i,j) + expr2.dx(i,j),
895 expr2.dx(i,j),
896 expr1.dx(i,j),
897 expr1.fastAccessDx(i,j) + expr2.fastAccessDx(i,j),
898 c + expr2.val(),
899 expr1.val() + c,
900 c.fastAccessCoeff(j) + expr2.val(j),
901 expr1.val(j) + c.fastAccessCoeff(j),
902 expr2.dx(i,j),
903 expr1.dx(i,j),
904 expr2.fastAccessDx(i,j),
905 expr1.fastAccessDx(i,j))
906FAD_BINARYOP_MACRO(operator-,
908 ;,
909 expr1.val() - expr2.val(),
910 expr1.val(j) - expr2.val(j),
911 expr1.dx(i,j) - expr2.dx(i,j),
912 -expr2.dx(i,j),
913 expr1.dx(i,j),
914 expr1.fastAccessDx(i,j) - expr2.fastAccessDx(i,j),
915 c - expr2.val(),
916 expr1.val() - c,
917 c.fastAccessCoeff(j) - expr2.val(j),
918 expr1.val(j) - c.fastAccessCoeff(j),
919 -expr2.dx(i,j),
920 expr1.dx(i,j),
921 -expr2.fastAccessDx(i,j),
922 expr1.fastAccessDx(i,j))
923FAD_BINARYOP_MACRO(operator*,
925 ;,
926 expr1.val() * expr2.val(),
927 expr1.val(j) * expr2.val(j),
928 expr1.val(j)*expr2.dx(i,j) + expr1.dx(i,j)*expr2.val(j),
929 expr1.val(j)*expr2.dx(i,j),
930 expr1.dx(i,j)*expr2.val(j),
931 expr1.val(j)*expr2.fastAccessDx(i,j) +
932 expr1.fastAccessDx(i,j)*expr2.val(j),
933 c * expr2.val(),
934 expr1.val() * c,
935 c.fastAccessCoeff(j) * expr2.val(j),
936 expr1.val(j) * c.fastAccessCoeff(j),
937 c.fastAccessCoeff(j)*expr2.dx(i,j),
938 expr1.dx(i,j)*c.fastAccessCoeff(j),
939 c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j),
940 expr1.fastAccessDx(i,j)*c.fastAccessCoeff(j))
941FAD_BINARYOP_MACRO(operator/,
943 ;,
944 expr1.val() / expr2.val(),
945 expr1.val(j) / expr2.val(j),
946 (expr1.dx(i,j)*expr2.val(j) - expr2.dx(i,j)*expr1.val(j)) /
947 (expr2.val(j)*expr2.val(j)),
948 -expr2.dx(i,j)*expr1.val(j) / (expr2.val(j)*expr2.val(j)),
949 expr1.dx(i,j)/expr2.val(j),
950 (expr1.fastAccessDx(i,j)*expr2.val(j) -
951 expr2.fastAccessDx(i,j)*expr1.val(j)) /
952 (expr2.val(j)*expr2.val(j)),
953 c / expr2.val(),
954 expr1.val() / c,
955 c.fastAccessCoeff(j) / expr2.val(j),
956 expr1.val(j) / c.fastAccessCoeff(j),
957 -expr2.dx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
958 expr1.dx(i,j)/c.fastAccessCoeff(j),
959 -expr2.fastAccessDx(i,j)*c.fastAccessCoeff(j) / (expr2.val(j)*expr2.val(j)),
960 expr1.fastAccessDx(i,j)/c.fastAccessCoeff(j))
963 using std::atan2;,
964 atan2(expr1.val(), expr2.val()),
965 atan2(expr1.val(j), expr2.val(j)),
966 (expr2.val(j)*expr1.dx(i,j) - expr1.val(j)*expr2.dx(i,j))/
967 (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
968 -expr1.val(j)*expr2.dx(i,j)/
969 (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
970 expr2.val(j)*expr1.dx(i,j)/
971 (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
972 (expr2.val(j)*expr1.fastAccessDx(i,j) - expr1.val(j)*expr2.fastAccessDx(i,j))/
973 (expr1.val(j)*expr1.val(j) + expr2.val(j)*expr2.val(j)),
974 atan2(c, expr2.val()),
975 atan2(expr1.val(), c),
976 atan2(c.fastAccessCoeff(j), expr2.val(j)),
977 atan2(expr1.val(j), c.fastAccessCoeff(j)),
978 (-c.fastAccessCoeff(j)*expr2.dx(i,j)) / (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
979 (c.fastAccessCoeff(j)*expr1.dx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)),
980 (-c.fastAccessCoeff(j)*expr2.fastAccessDx(i,j))/ (c.fastAccessCoeff(j)*c.fastAccessCoeff(j) + expr2.val(j)*expr2.val(j)),
981 (c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j))/ (expr1.val(j)*expr1.val(j) + c.fastAccessCoeff(j)*c.fastAccessCoeff(j)))
982// FAD_BINARYOP_MACRO(pow,
983// PowerOp,
984// using std::pow; using std::log;,
985// pow(expr1.val(), expr2.val()),
986// pow(expr1.val(j), expr2.val(j)),
987// if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
988// if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
989// if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) ),
990// if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) ),
991// pow(c, expr2.val()),
992// pow(expr1.val(), c),
993// pow(c.fastAccessCoeff(j), expr2.val(j)),
994// pow(expr1.val(j), c.fastAccessCoeff(j)),
995// if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
996// if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ),
997// if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) ),
998// if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j)))) )
1001 ;,
1002 if_then_else( expr1.val() >= expr2.val(), expr1.val(), expr2.val() ),
1003 if_then_else( expr1.val(j) >= expr2.val(j), expr1.val(j), expr2.val(j) ),
1004 if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1005 if_then_else( expr1.val(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1006 if_then_else( expr1.val(j) >= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1007 if_then_else( expr1.val(j) >= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1008 if_then_else( c >= expr2.val(), c, expr2.val() ),
1009 if_then_else( expr1.val() >= c, expr1.val(), c ),
1010 if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1011 if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1012 if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1013 if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0.0) ),
1014 if_then_else( c.fastAccessCoeff(j) >= expr2.val(j), val_type(0.0), expr2.fastAccessDx(i,j) ),
1015 if_then_else( expr1.val(j) >= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0.0) ) )
1018 ;,
1019 if_then_else( expr1.val() <= expr2.val(), expr1.val(), expr2.val() ),
1020 if_then_else( expr1.val(j) <= expr2.val(j), expr1.val(j), expr2.val(j) ),
1021 if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), expr2.dx(i,j) ),
1022 if_then_else( expr1.val(j) <= expr2.val(j), val_type(0.0), expr2.dx(i,j) ),
1023 if_then_else( expr1.val(j) <= expr2.val(j), expr1.dx(i,j), val_type(0.0) ),
1024 if_then_else( expr1.val(j) <= expr2.val(j), expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) ),
1025 if_then_else( c <= expr2.val(), c, expr2.val() ),
1026 if_then_else( expr1.val() <= c, expr1.val(), c ),
1027 if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), c.fastAccessCoeff(j), expr2.val(j) ),
1028 if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.val(j), c.fastAccessCoeff(j) ),
1029 if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.dx(i,j) ),
1030 if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.dx(i,j), val_type(0) ),
1031 if_then_else( c.fastAccessCoeff(j) <= expr2.val(j), val_type(0), expr2.fastAccessDx(i,j) ),
1032 if_then_else( expr1.val(j) <= c.fastAccessCoeff(j), expr1.fastAccessDx(i,j), val_type(0) ) )
1033
1034// Special handling for std::pow() to provide specializations of PowerOp for
1035// "simd" value types that use if_then_else(). The only reason for not using
1036// if_then_else() always is to avoid evaluating the derivative if the value is
1037// zero to avoid throwing FPEs.
1038namespace Sacado {
1039 namespace Fad {
1040 namespace Exp {
1041
1042 //
1043 // Implementation for simd type using if_then_else()
1044 //
1045 template <typename T1, typename T2>
1046 class PowerOp< T1, T2, false, false, ExprSpecMPVector, PowerImpl::Simd > :
1047 public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector,
1048 PowerImpl::Simd > > {
1049 public:
1050
1051 typedef typename std::remove_cv<T1>::type ExprT1;
1052 typedef typename std::remove_cv<T2>::type ExprT2;
1053 typedef typename ExprT1::value_type value_type_1;
1054 typedef typename ExprT2::value_type value_type_2;
1055 typedef typename Sacado::Promote<value_type_1,
1056 value_type_2>::type value_type;
1057
1058 typedef typename ExprT1::scalar_type scalar_type_1;
1059 typedef typename ExprT2::scalar_type scalar_type_2;
1060 typedef typename Sacado::Promote<scalar_type_1,
1061 scalar_type_2>::type scalar_type;
1062
1063 typedef typename value_type::value_type val_type;
1064
1065 typedef ExprSpecMPVector expr_spec_type;
1066
1067 KOKKOS_INLINE_FUNCTION
1068 PowerOp(const T1& expr1_, const T2& expr2_) :
1069 expr1(expr1_), expr2(expr2_) {}
1070
1071 KOKKOS_INLINE_FUNCTION
1072 int size() const {
1073 const int sz1 = expr1.size(), sz2 = expr2.size();
1074 return sz1 > sz2 ? sz1 : sz2;
1075 }
1076
1077 KOKKOS_INLINE_FUNCTION
1078 bool hasFastAccess() const {
1079 return expr1.hasFastAccess() && expr2.hasFastAccess();
1080 }
1081
1082 KOKKOS_INLINE_FUNCTION
1083 value_type val() const {
1084 using std::pow;
1085 return pow(expr1.val(), expr2.val());
1086 }
1087
1088 KOKKOS_INLINE_FUNCTION
1089 val_type val(int j) const {
1090 using std::pow;
1091 return pow(expr1.val(j), expr2.val(j));
1092 }
1093
1094 KOKKOS_INLINE_FUNCTION
1095 val_type dx(int i, int j) const {
1096 using std::pow; using std::log;
1097 const int sz1 = expr1.size(), sz2 = expr2.size();
1098 if (sz1 > 0 && sz2 > 0)
1099 return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1100 else if (sz1 > 0)
1101 // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1102 // It seems less accurate and caused convergence problems in some codes
1103 return if_then_else( expr2.val(j) == scalar_type(1.0), expr1.dx(i,j), if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.val(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),expr2.val(j))) ));
1104 else
1105 return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1106 }
1107
1108 KOKKOS_INLINE_FUNCTION
1109 val_type fastAccessDx(int i, int j) const {
1110 using std::pow; using std::log;
1111 return if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type((expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j))) );
1112 }
1113
1114 protected:
1115
1116 const T1& expr1;
1117 const T2& expr2;
1118
1119 };
1120
1121 template <typename T1, typename T2>
1122 class PowerOp< T1, T2, false, true, ExprSpecMPVector, PowerImpl::Simd >
1123 : public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector,
1124 PowerImpl::Simd > > {
1125 public:
1126
1127 typedef typename std::remove_cv<T1>::type ExprT1;
1128 typedef T2 ConstT;
1129 typedef typename ExprT1::value_type value_type;
1130 typedef typename ExprT1::scalar_type scalar_type;
1131
1132 typedef typename value_type::value_type val_type;
1133
1134 typedef ExprSpecMPVector expr_spec_type;
1135
1136 KOKKOS_INLINE_FUNCTION
1137 PowerOp(const T1& expr1_, const ConstT& c_) :
1138 expr1(expr1_), c(c_) {}
1139
1140 KOKKOS_INLINE_FUNCTION
1141 int size() const {
1142 return expr1.size();
1143 }
1144
1145 KOKKOS_INLINE_FUNCTION
1146 bool hasFastAccess() const {
1147 return expr1.hasFastAccess();
1148 }
1149
1150 KOKKOS_INLINE_FUNCTION
1151 value_type val() const {
1152 using std::pow;
1153 return pow(expr1.val(), c);
1154 }
1155
1156 KOKKOS_INLINE_FUNCTION
1157 val_type val(int j) const {
1158 using std::pow;
1159 return pow(expr1.val(j), c.fastAccessCoeff(j));
1160 }
1161
1162 KOKKOS_INLINE_FUNCTION
1163 val_type dx(int i, int j) const {
1164 using std::pow;
1165 // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1166 // It seems less accurate and caused convergence problems in some codes
1167 return if_then_else( c.fastAccessCoeff(j) == scalar_type(1.0), expr1.dx(i,j), if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.dx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ));
1168 }
1169
1170 KOKKOS_INLINE_FUNCTION
1171 val_type fastAccessDx(int i, int j) const {
1172 using std::pow;
1173 // Don't use formula (a(x)^b)' = b*a(x)^{b-1}*a'(x)
1174 // It seems less accurate and caused convergence problems in some codes
1175 return if_then_else( c.fastAccessCoeff(j) == scalar_type(1.0), expr1.fastAccessDx(i,j), if_then_else( expr1.val(j) == val_type(0.0), val_type(0.0), val_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)/expr1.val(j)*pow(expr1.val(j),c.fastAccessCoeff(j))) ));
1176 }
1177
1178 protected:
1179
1180 const T1& expr1;
1181 const ConstT& c;
1182 };
1183
1184 template <typename T1, typename T2>
1185 class PowerOp< T1, T2, true, false, ExprSpecMPVector, PowerImpl::Simd >
1186 : public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector,
1187 PowerImpl::Simd> > {
1188 public:
1189
1190 typedef typename std::remove_cv<T2>::type ExprT2;
1191 typedef T1 ConstT;
1192 typedef typename ExprT2::value_type value_type;
1193 typedef typename ExprT2::scalar_type scalar_type;
1194
1195 typedef typename value_type::value_type val_type;
1196
1197 typedef ExprSpecMPVector expr_spec_type;
1198
1199 KOKKOS_INLINE_FUNCTION
1200 PowerOp(const ConstT& c_, const T2& expr2_) :
1201 c(c_), expr2(expr2_) {}
1202
1203 KOKKOS_INLINE_FUNCTION
1204 int size() const {
1205 return expr2.size();
1206 }
1207
1208 KOKKOS_INLINE_FUNCTION
1209 bool hasFastAccess() const {
1210 return expr2.hasFastAccess();
1211 }
1212
1213 KOKKOS_INLINE_FUNCTION
1214 value_type val() const {
1215 using std::pow;
1216 return pow(c, expr2.val());
1217 }
1218
1219 KOKKOS_INLINE_FUNCTION
1220 val_type val(int j) const {
1221 using std::pow;
1222 return pow(c.fastAccessCoeff(j), expr2.val(j));
1223 }
1224
1225 KOKKOS_INLINE_FUNCTION
1226 val_type dx(int i, int j) const {
1227 using std::pow; using std::log;
1228 return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1229 }
1230
1231 KOKKOS_INLINE_FUNCTION
1232 val_type fastAccessDx(int i, int j) const {
1233 using std::pow; using std::log;
1234 return if_then_else( c.fastAccessCoeff(j) == val_type(0.0), val_type(0.0), val_type(expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j))) );
1235 }
1236
1237 protected:
1238
1239 const ConstT& c;
1240 const T2& expr2;
1241 };
1242
1243 //
1244 // Specialization for nested derivatives. This version does not use
1245 // if_then_else/ternary-operator on the base so that nested derivatives
1246 // are correct.
1247 //
1248 template <typename T1, typename T2>
1249 class PowerOp< T1, T2, false, false, ExprSpecMPVector,
1250 PowerImpl::NestedSimd > :
1251 public Expr< PowerOp< T1, T2, false, false, ExprSpecMPVector,
1252 PowerImpl::NestedSimd > > {
1253 public:
1254
1255 typedef typename std::remove_cv<T1>::type ExprT1;
1256 typedef typename std::remove_cv<T2>::type ExprT2;
1257 typedef typename ExprT1::value_type value_type_1;
1258 typedef typename ExprT2::value_type value_type_2;
1259 typedef typename Sacado::Promote<value_type_1,
1260 value_type_2>::type value_type;
1261
1262 typedef typename ExprT1::scalar_type scalar_type_1;
1263 typedef typename ExprT2::scalar_type scalar_type_2;
1264 typedef typename Sacado::Promote<scalar_type_1,
1265 scalar_type_2>::type scalar_type;
1266
1267 typedef typename value_type::value_type val_type;
1268
1269 typedef ExprSpecMPVector expr_spec_type;
1270
1271 KOKKOS_INLINE_FUNCTION
1272 PowerOp(const T1& expr1_, const T2& expr2_) :
1273 expr1(expr1_), expr2(expr2_) {}
1274
1275 KOKKOS_INLINE_FUNCTION
1276 int size() const {
1277 const int sz1 = expr1.size(), sz2 = expr2.size();
1278 return sz1 > sz2 ? sz1 : sz2;
1279 }
1280
1281 KOKKOS_INLINE_FUNCTION
1282 bool hasFastAccess() const {
1283 return expr1.hasFastAccess() && expr2.hasFastAccess();
1284 }
1285
1286 KOKKOS_INLINE_FUNCTION
1287 value_type val() const {
1288 using std::pow;
1289 return pow(expr1.val(), expr2.val());
1290 }
1291
1292 KOKKOS_INLINE_FUNCTION
1293 val_type val(int j) const {
1294 using std::pow;
1295 return pow(expr1.val(j), expr2.val(j));
1296 }
1297
1298 KOKKOS_INLINE_FUNCTION
1299 value_type dx(int i, int j) const {
1300 using std::pow; using std::log;
1301 const int sz1 = expr1.size(), sz2 = expr2.size();
1302 if (sz1 > 0 && sz2 > 0)
1303 return (expr2.dx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.dx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j));
1304 else if (sz1 > 0)
1305 return if_then_else( expr2.val(j) == scalar_type(0.0), value_type(0.0), value_type((expr2.val(j)*expr1.dx(i,j))*pow(expr1.val(j),expr2.val(j)-scalar_type(1.0))));
1306 else
1307 return expr2.dx(i,j)*log(expr1.val(j))*pow(expr1.val(j),expr2.val(j));
1308 }
1309
1310 KOKKOS_INLINE_FUNCTION
1311 value_type fastAccessDx(int i, int j) const {
1312 using std::pow; using std::log;
1313 return (expr2.fastAccessDx(i,j)*log(expr1.val(j))+expr2.val(j)*expr1.fastAccessDx(i,j)/expr1.val(j))*pow(expr1.val(j),expr2.val(j));
1314 }
1315
1316 protected:
1317
1318 const T1& expr1;
1319 const T2& expr2;
1320
1321 };
1322
1323 template <typename T1, typename T2>
1324 class PowerOp< T1, T2, false, true, ExprSpecMPVector,
1325 PowerImpl::NestedSimd > :
1326 public Expr< PowerOp< T1, T2, false, true, ExprSpecMPVector,
1327 PowerImpl::NestedSimd > > {
1328 public:
1329
1330 typedef typename std::remove_cv<T1>::type ExprT1;
1331 typedef T2 ConstT;
1332 typedef typename ExprT1::value_type value_type;
1333 typedef typename ExprT1::scalar_type scalar_type;
1334
1335 typedef typename value_type::value_type val_type;
1336
1337 typedef ExprSpecMPVector expr_spec_type;
1338
1339 KOKKOS_INLINE_FUNCTION
1340 PowerOp(const T1& expr1_, const ConstT& c_) :
1341 expr1(expr1_), c(c_) {}
1342
1343 KOKKOS_INLINE_FUNCTION
1344 int size() const {
1345 return expr1.size();
1346 }
1347
1348 KOKKOS_INLINE_FUNCTION
1349 bool hasFastAccess() const {
1350 return expr1.hasFastAccess();
1351 }
1352
1353 KOKKOS_INLINE_FUNCTION
1354 value_type val() const {
1355 using std::pow;
1356 return pow(expr1.val(), c);
1357 }
1358
1359 KOKKOS_INLINE_FUNCTION
1360 val_type val(int j) const {
1361 using std::pow;
1362 return pow(expr1.val(j), c.fastAccessCoeff(j));
1363 }
1364
1365 KOKKOS_INLINE_FUNCTION
1366 value_type dx(int i, int j) const {
1367 using std::pow;
1368 return if_then_else( c.fastAccessCoeff(j) == scalar_type(0.0), value_type(0.0), value_type(c.fastAccessCoeff(j)*expr1.dx(i,j)*pow(expr1.val(j),c.fastAccessCoeff(j)-scalar_type(1.0))));
1369 }
1370
1371 KOKKOS_INLINE_FUNCTION
1372 value_type fastAccessDx(int i, int j) const {
1373 using std::pow;
1374 return if_then_else( c.fastAccessCoeff(j) == scalar_type(0.0), value_type(0.0), value_type(c.fastAccessCoeff(j)*expr1.fastAccessDx(i,j)*pow(expr1.val(j),c.fastAccessCoeff(j)-scalar_type(1.0))));
1375 }
1376
1377 protected:
1378
1379 const T1& expr1;
1380 const ConstT& c;
1381 };
1382
1383 template <typename T1, typename T2>
1384 class PowerOp<T1, T2, true, false, ExprSpecMPVector,
1385 PowerImpl::NestedSimd > :
1386 public Expr< PowerOp< T1, T2, true, false, ExprSpecMPVector,
1387 PowerImpl::NestedSimd > > {
1388 public:
1389
1390 typedef typename std::remove_cv<T2>::type ExprT2;
1391 typedef T1 ConstT;
1392 typedef typename ExprT2::value_type value_type;
1393 typedef typename ExprT2::scalar_type scalar_type;
1394
1395 typedef typename value_type::value_type val_type;
1396
1397 typedef ExprSpecMPVector expr_spec_type;
1398
1399 KOKKOS_INLINE_FUNCTION
1400 PowerOp(const ConstT& c_, const T2& expr2_) :
1401 c(c_), expr2(expr2_) {}
1402
1403 KOKKOS_INLINE_FUNCTION
1404 int size() const {
1405 return expr2.size();
1406 }
1407
1408 KOKKOS_INLINE_FUNCTION
1409 bool hasFastAccess() const {
1410 return expr2.hasFastAccess();
1411 }
1412
1413 KOKKOS_INLINE_FUNCTION
1414 value_type val() const {
1415 using std::pow;
1416 return pow(c, expr2.val());
1417 }
1418
1419 KOKKOS_INLINE_FUNCTION
1420 val_type val(int j) const {
1421 using std::pow;
1422 return pow(c.fastAccessCoeff(j), expr2.val(j));
1423 }
1424
1425 KOKKOS_INLINE_FUNCTION
1426 value_type dx(int i, int j) const {
1427 using std::pow; using std::log;
1428 return expr2.dx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j));
1429 }
1430
1431 KOKKOS_INLINE_FUNCTION
1432 value_type fastAccessDx(int i, int j) const {
1433 using std::pow; using std::log;
1434 return expr2.fastAccessDx(i,j)*log(c.fastAccessCoeff(j))*pow(c.fastAccessCoeff(j),expr2.val(j));
1435 }
1436
1437 protected:
1438
1439 const ConstT& c;
1440 const T2& expr2;
1441 };
1442
1443 }
1444 }
1445}
1446
1447//--------------------------if_then_else operator -----------------------
1448// Can't use the above macros because it is a ternary operator (sort of).
1449// Also, relies on C++11
1450
1451namespace Sacado {
1452 namespace Fad {
1453 namespace Exp {
1454
1455 template <typename CondT, typename T1, typename T2>
1456 class IfThenElseOp< CondT,T1,T2,false,false,ExprSpecMPVector > :
1457 public Expr< IfThenElseOp< CondT, T1, T2, false, false, ExprSpecMPVector > > {
1458
1459 public:
1460
1461 typedef typename std::remove_cv<T1>::type ExprT1;
1462 typedef typename std::remove_cv<T2>::type ExprT2;
1463 typedef typename ExprT1::value_type value_type_1;
1464 typedef typename ExprT2::value_type value_type_2;
1465 typedef typename Sacado::Promote<value_type_1,
1467
1468 typedef typename ExprT1::scalar_type scalar_type_1;
1469 typedef typename ExprT2::scalar_type scalar_type_2;
1470 typedef typename Sacado::Promote<scalar_type_1,
1472
1473 typedef typename value_type::value_type val_type;
1474
1476
1477 KOKKOS_INLINE_FUNCTION
1478 IfThenElseOp(const CondT& cond_, const T1& expr1_, const T2& expr2_) :
1479 cond(cond_), expr1(expr1_), expr2(expr2_) {}
1480
1481 KOKKOS_INLINE_FUNCTION
1482 int size() const {
1483 int sz1 = expr1.size(), sz2 = expr2.size();
1484 return sz1 > sz2 ? sz1 : sz2;
1485 }
1486
1487 KOKKOS_INLINE_FUNCTION
1488 bool hasFastAccess() const {
1489 return expr1.hasFastAccess() && expr2.hasFastAccess();
1490 }
1491
1492 KOKKOS_INLINE_FUNCTION
1493 value_type val() const {
1494 return if_then_else( cond, expr1.val(), expr2.val() );
1495 }
1496
1497 KOKKOS_INLINE_FUNCTION
1498 val_type val(int j) const {
1499 return if_then_else( cond, expr1.val(j), expr2.val(j) );
1500 }
1501
1502 KOKKOS_INLINE_FUNCTION
1503 val_type dx(int i, int j) const {
1504 return if_then_else( cond, expr1.dx(i,j), expr2.dx(i,j) );
1505 }
1506
1507 KOKKOS_INLINE_FUNCTION
1508 val_type fastAccessDx(int i, int j) const {
1509 return if_then_else( cond, expr1.fastAccessDx(i,j), expr2.fastAccessDx(i,j) );
1510 }
1511
1512 protected:
1513
1514 const CondT& cond;
1515 const T1& expr1;
1516 const T2& expr2;
1517
1518 };
1519
1520 template <typename CondT, typename T1, typename T2>
1521 class IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector> :
1522 public Expr< IfThenElseOp< CondT, T1, T2, false, true, ExprSpecMPVector > > {
1523
1524 public:
1525
1526 typedef typename std::remove_cv<T1>::type ExprT1;
1527 typedef T2 ConstT;
1528 typedef typename ExprT1::value_type value_type;
1529 typedef typename ExprT1::scalar_type scalar_type;
1530
1531 typedef typename value_type::value_type val_type;
1532
1533 KOKKOS_INLINE_FUNCTION
1534 IfThenElseOp(const CondT& cond_, const T1& expr1_, const ConstT& c_) :
1535 cond(cond_), expr1(expr1_), c(c_) {}
1536
1537 KOKKOS_INLINE_FUNCTION
1538 int size() const {
1539 return expr1.size();
1540 }
1541
1542 KOKKOS_INLINE_FUNCTION
1543 bool hasFastAccess() const {
1544 return expr1.hasFastAccess();
1545 }
1546
1547 KOKKOS_INLINE_FUNCTION
1548 value_type val() const {
1549 return if_then_else( cond, expr1.val(), c );
1550 }
1551
1552 KOKKOS_INLINE_FUNCTION
1553 val_type val(int j) const {
1554 return if_then_else( cond, expr1.val(j), c.fastAccessCoeff(j) );
1555 }
1556
1557 KOKKOS_INLINE_FUNCTION
1558 val_type dx(int i, int j) const {
1559 return if_then_else( cond, expr1.dx(i,j), val_type(0.0) );
1560 }
1561
1562 KOKKOS_INLINE_FUNCTION
1563 val_type fastAccessDx(int i, int j) const {
1564 return if_then_else( cond, expr1.fastAccessDx(i,j), val_type(0.0) );
1565 }
1566
1567 protected:
1568
1569 const CondT& cond;
1570 const T1& expr1;
1571 const typename ConstTypeRef<ConstT,value_type>::type c;
1572 };
1573
1574 template <typename CondT, typename T1, typename T2>
1575 class IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector> :
1576 public Expr< IfThenElseOp< CondT, T1, T2, true, false, ExprSpecMPVector > > {
1577
1578 public:
1579
1580 typedef typename std::remove_cv<T2>::type ExprT2;
1581 typedef T1 ConstT;
1582 typedef typename ExprT2::value_type value_type;
1583 typedef typename ExprT2::scalar_type scalar_type;
1584
1585 typedef typename value_type::value_type val_type;
1586
1588
1589 KOKKOS_INLINE_FUNCTION
1590 IfThenElseOp(const CondT& cond_, const ConstT& c_, const T2& expr2_) :
1591 cond(cond_), c(c_), expr2(expr2_) {}
1592
1593 KOKKOS_INLINE_FUNCTION
1594 int size() const {
1595 return expr2.size();
1596 }
1597
1598 KOKKOS_INLINE_FUNCTION
1599 bool hasFastAccess() const {
1600 return expr2.hasFastAccess();
1601 }
1602
1603 KOKKOS_INLINE_FUNCTION
1604 value_type val() const {
1605 return if_then_else( cond, c, expr2.val() );
1606 }
1607
1608 KOKKOS_INLINE_FUNCTION
1609 val_type val(int j) const {
1610 return if_then_else( cond, c.fastAccessCoeff(j), expr2.val(j) );
1611 }
1612
1613 KOKKOS_INLINE_FUNCTION
1614 val_type dx(int i, int j) const {
1615 return if_then_else( cond, val_type(0.0), expr2.dx(i,j) );
1616 }
1617
1618 KOKKOS_INLINE_FUNCTION
1619 val_type fastAccessDx(int i, int j) const {
1620 return if_then_else( cond, val_type(0.0), expr2.fastAccessDx(i,j) );
1621 }
1622
1623 protected:
1624
1625 const CondT& cond;
1626 const typename ConstTypeRef<ConstT,value_type>::type c;
1627 const T2& expr2;
1628 };
1629
1630 }
1631 }
1632}
1633
1634#undef FAD_BINARYOP_MACRO
1635
1636#endif // SACADO_FAD_EXP_MP_VECTOR_HPP
expr expr SinOp
expr expr SinhOp
expr expr SqrtOp
expr expr ACosOp
expr expr ATanOp
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MinOp
expr expr ACoshOp
expr expr ASinOp
#define FAD_BINARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, CDX1, CDX2, FASTACCESSDX, MPVAL_CONST_DX_1, MPVAL_CONST_DX_2, VAL_CONST_DX_1, VAL_CONST_DX_2, CONST_DX_1, CONST_DX_2, CONST_FASTACCESSDX_1, CONST_FASTACCESSDX_2)
expr expr TanhOp
expr expr TanOp
expr expr CoshOp
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 MultiplicationOp
expr expr ASinhOp
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 expr1 expr1 expr1 j *expr1 expr2 expr1 expr1 j *expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 Atan2Op
expr expr AbsOp
expr1 expr1 expr1 expr2 expr1 expr1 c expr2 expr1 c expr1 expr2 expr1 expr2 expr1 expr1 expr1 expr1 j expr1 expr1 expr1 expr1 j expr1 c *expr2 expr1 c expr1 c expr1 c expr1 DivisionOp
atan2(expr1.val(), expr2.val())
expr expr ATanhOp
expr expr expr expr ExpOp
expr expr expr expr fastAccessDx(i, j)) FAD_UNARYOP_MACRO(exp
expr val()
if_then_else(expr.val() >=0, expr.dx(i, j), value_type(-expr.dx(i, j)))
expr expr expr dx(i, j)
expr expr CosOp
expr2 j expr1 expr1 expr2 expr2 j expr1 c c c c MaxOp
#define FAD_UNARYOP_MACRO(OPNAME, OP, USING, MPVALUE, VALUE, DX, FASTACCESSDX)
Expression template specialization tag for Fad< MP::Vector >
KOKKOS_INLINE_FUNCTION const val_type & fastAccessDx(int i, int j) const
Returns derivative component i without bounds checking.
KOKKOS_INLINE_FUNCTION val_type & fastAccessDx(int i, int j)
Returns derivative component i without bounds checking.
KOKKOS_INLINE_FUNCTION val_type dx(int i, int j) const
Returns derivative component i with bounds checking.
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const T2 &expr2_)
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const T1 &expr1_, const ConstT &c_)
KOKKOS_INLINE_FUNCTION IfThenElseOp(const CondT &cond_, const ConstT &c_, const T2 &expr2_)