30#ifndef FADBLASUNITTESTS_HPP
31#define FADBLASUNITTESTS_HPP
43#define COMPARE_FAD_VECTORS(X1, X2, n) \
44 ASSERT_TRUE(X1.size() == std::size_t(n)); \
45 ASSERT_TRUE(X2.size() == std::size_t(n)); \
46 for (unsigned int i=0; i<n; i++) { \
47 COMPARE_FADS(X1[i], X2[i]); \
52template <
class FadType>
91 typedef decltype(this->fad)
FadType;
95 auto ndot = this->ndot_;
97 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
98 for (
unsigned int i=0;
i<m;
i++) {
99 ScalarType
val = this->urand.number();
103 for (
unsigned int k=0; k<ndot; k++) {
104 val = this->urand.number();
105 x1[
i].fastAccessDx(k) =
val;
106 x2[
i].fastAccessDx(k) =
val;
107 x3[
i].fastAccessDx(k) =
val;
110 FadType alpha(ndot, this->urand.number());
111 for (
unsigned int k=0; k<ndot; k++) {
112 alpha.fastAccessDx(k) = this->urand.number();
115 Teuchos::BLAS<int,FadType> teuchos_blas;
116 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
118 Teuchos::BLAS<int,FadType> sacado_blas(
false);
119 sacado_blas.SCAL(m, alpha, &x2[0], 1);
123 unsigned int sz = m*(1+ndot);
124 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
125 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
132 typedef decltype(this->fad)
FadType;
136 auto ndot = this->ndot_;
138 unsigned int incx = 2;
139 VectorType x1(m*incx,ndot), x2(m*incx,ndot), x3(m*incx,ndot);
140 for (
unsigned int i=0;
i<m*incx;
i++) {
141 ScalarType
val = this->urand.number();
145 for (
unsigned int k=0; k<ndot; k++) {
146 val = this->urand.number();
147 x1[
i].fastAccessDx(k) =
val;
148 x2[
i].fastAccessDx(k) =
val;
149 x3[
i].fastAccessDx(k) =
val;
152 FadType alpha(ndot, this->urand.number());
153 for (
unsigned int k=0; k<ndot; k++) {
154 alpha.fastAccessDx(k) = this->urand.number();
157 Teuchos::BLAS<int,FadType> teuchos_blas;
158 teuchos_blas.SCAL(m, alpha, &x1[0], incx);
160 Teuchos::BLAS<int,FadType> sacado_blas(
false);
161 sacado_blas.SCAL(m, alpha, &x2[0], incx);
165 unsigned int sz = m*(1+ndot);
166 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
167 sacado_blas2.SCAL(m, alpha, &x3[0], incx);
174 typedef decltype(this->fad)
FadType;
178 auto ndot = this->ndot_;
180 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
181 for (
unsigned int i=0;
i<m;
i++) {
182 ScalarType
val = this->urand.number();
186 for (
unsigned int k=0; k<ndot; k++) {
187 val = this->urand.number();
188 x1[
i].fastAccessDx(k) =
val;
189 x2[
i].fastAccessDx(k) =
val;
190 x3[
i].fastAccessDx(k) =
val;
193 ScalarType alpha = this->urand.number();
195 Teuchos::BLAS<int,FadType> teuchos_blas;
196 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
198 Teuchos::BLAS<int,FadType> sacado_blas(
false);
199 sacado_blas.SCAL(m, alpha, &x2[0], 1);
203 unsigned int sz = m*(1+ndot);
204 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
205 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
212 typedef decltype(this->fad)
FadType;
216 auto ndot = this->ndot_;
218 VectorType x1(m,ndot), x2(m,ndot), x3(m,ndot);
219 for (
unsigned int i=0;
i<m;
i++) {
220 ScalarType
val = this->urand.number();
226 for (
unsigned int k=0; k<ndot; k++)
227 alpha.fastAccessDx(k) = this->urand.number();
229 Teuchos::BLAS<int,FadType> teuchos_blas;
230 teuchos_blas.SCAL(m, alpha, &x1[0], 1);
232 Teuchos::BLAS<int,FadType> sacado_blas(
false);
233 sacado_blas.SCAL(m, alpha, &x2[0], 1);
237 unsigned int sz = m*(1+ndot);
238 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
239 sacado_blas2.SCAL(m, alpha, &x3[0], 1);
246 typedef decltype(this->fad)
FadType;
250 auto ndot = this->ndot_;
252 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
253 for (
unsigned int i=0;
i<m;
i++) {
254 x[
i] =
FadType(ndot, this->urand.number());
255 ScalarType
val = this->urand.number();
259 for (
unsigned int k=0; k<ndot; k++) {
260 x[
i].fastAccessDx(k) = this->urand.number();
261 val = this->urand.number();
262 y1[
i].fastAccessDx(k) =
val;
263 y2[
i].fastAccessDx(k) =
val;
264 y3[
i].fastAccessDx(k) =
val;
268 Teuchos::BLAS<int,FadType> teuchos_blas;
269 teuchos_blas.COPY(m, &
x[0], 1, &y1[0], 1);
271 Teuchos::BLAS<int,FadType> sacado_blas(
false);
272 sacado_blas.COPY(m, &
x[0], 1, &y2[0], 1);
276 unsigned int sz = 2*m*(1+ndot);
277 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
278 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
285 typedef decltype(this->fad)
FadType;
289 auto ndot = this->ndot_;
291 unsigned int incx = 2;
292 unsigned int incy = 3;
293 VectorType
x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
294 for (
unsigned int i=0;
i<m*incx;
i++) {
295 x[
i] =
FadType(ndot, this->urand.number());
296 for (
unsigned int k=0; k<ndot; k++) {
297 x[
i].fastAccessDx(k) = this->urand.number();
300 for (
unsigned int i=0;
i<m*incy;
i++) {
301 ScalarType
val = this->urand.number();
305 for (
unsigned int k=0; k<ndot; k++) {
306 val = this->urand.number();
307 y1[
i].fastAccessDx(k) =
val;
308 y2[
i].fastAccessDx(k) =
val;
309 y3[
i].fastAccessDx(k) =
val;
313 Teuchos::BLAS<int,FadType> teuchos_blas;
314 teuchos_blas.COPY(m, &
x[0], incx, &y1[0], incy);
316 Teuchos::BLAS<int,FadType> sacado_blas(
false);
317 sacado_blas.COPY(m, &
x[0], incx, &y2[0], incy);
321 unsigned int sz = 2*m*(1+ndot);
322 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
323 sacado_blas2.COPY(m, &
x[0], incx, &y3[0], incy);
330 typedef decltype(this->fad)
FadType;
334 auto ndot = this->ndot_;
336 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
337 for (
unsigned int i=0;
i<m;
i++) {
338 x[
i] = this->urand.number();
340 for (
unsigned int i=0;
i<m;
i++) {
341 ScalarType
val = this->urand.number();
345 for (
unsigned int k=0; k<ndot; k++) {
346 val = this->urand.number();
347 y1[
i].fastAccessDx(k) =
val;
348 y2[
i].fastAccessDx(k) =
val;
349 y3[
i].fastAccessDx(k) =
val;
353 Teuchos::BLAS<int,FadType> teuchos_blas;
354 teuchos_blas.COPY(m, &
x[0], 1, &y1[0], 1);
356 Teuchos::BLAS<int,FadType> sacado_blas(
false);
357 sacado_blas.COPY(m, &
x[0], 1, &y2[0], 1);
361 unsigned int sz = 2*m*(1+ndot);
362 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
363 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
370 typedef decltype(this->fad)
FadType;
374 auto ndot = this->ndot_;
376 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
377 for (
unsigned int i=0;
i<m;
i++) {
378 x[
i] =
FadType(ndot, this->urand.number());
379 ScalarType
val = this->urand.number();
383 for (
unsigned int k=0; k<ndot; k++) {
384 x[
i].fastAccessDx(k) = this->urand.number();
388 Teuchos::BLAS<int,FadType> teuchos_blas;
389 teuchos_blas.COPY(m, &
x[0], 1, &y1[0], 1);
391 Teuchos::BLAS<int,FadType> sacado_blas(
false);
392 sacado_blas.COPY(m, &
x[0], 1, &y2[0], 1);
396 unsigned int sz = 2*m*(1+ndot);
397 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
398 sacado_blas2.COPY(m, &
x[0], 1, &y3[0], 1);
405 typedef decltype(this->fad)
FadType;
409 auto ndot = this->ndot_;
411 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
412 for (
unsigned int i=0;
i<m;
i++) {
413 x[
i] =
FadType(ndot, this->urand.number());
414 ScalarType
val = this->urand.number();
418 for (
unsigned int k=0; k<ndot; k++) {
419 x[
i].fastAccessDx(k) = this->urand.number();
420 val = this->urand.number();
421 y1[
i].fastAccessDx(k) =
val;
422 y2[
i].fastAccessDx(k) =
val;
423 y3[
i].fastAccessDx(k) =
val;
426 FadType alpha(ndot, this->urand.number());
427 for (
unsigned int k=0; k<ndot; k++)
428 alpha.fastAccessDx(k) = this->urand.number();
430 Teuchos::BLAS<int,FadType> teuchos_blas;
431 teuchos_blas.AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
433 Teuchos::BLAS<int,FadType> sacado_blas(
false);
434 sacado_blas.AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
438 unsigned int sz = 2*m*(1+ndot);
439 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
440 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
447 typedef decltype(this->fad)
FadType;
451 auto ndot = this->ndot_;
453 unsigned int incx = 2;
454 unsigned int incy = 3;
455 VectorType
x(m*incx,ndot), y1(m*incy,ndot), y2(m*incy,ndot), y3(m*incy,ndot);
456 for (
unsigned int i=0;
i<m*incx;
i++) {
457 x[
i] =
FadType(ndot, this->urand.number());
458 for (
unsigned int k=0; k<ndot; k++) {
459 x[
i].fastAccessDx(k) = this->urand.number();
462 for (
unsigned int i=0;
i<m*incy;
i++) {
463 ScalarType
val = this->urand.number();
467 for (
unsigned int k=0; k<ndot; k++) {
468 val = this->urand.number();
469 y1[
i].fastAccessDx(k) =
val;
470 y2[
i].fastAccessDx(k) =
val;
471 y3[
i].fastAccessDx(k) =
val;
474 FadType alpha(ndot, this->urand.number());
475 for (
unsigned int k=0; k<ndot; k++)
476 alpha.fastAccessDx(k) = this->urand.number();
478 Teuchos::BLAS<int,FadType> teuchos_blas;
479 teuchos_blas.AXPY(m, alpha, &
x[0], incx, &y1[0], incy);
481 Teuchos::BLAS<int,FadType> sacado_blas(
false);
482 sacado_blas.AXPY(m, alpha, &
x[0], incx, &y2[0], incy);
486 unsigned int sz = 2*m*(1+ndot);
487 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
488 sacado_blas2.AXPY(m, alpha, &
x[0], incx, &y3[0], incy);
495 typedef decltype(this->fad)
FadType;
499 auto ndot = this->ndot_;
501 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot), y4(m,ndot);
502 std::vector<ScalarType> xx(m);
503 for (
unsigned int i=0;
i<m;
i++) {
504 xx[
i] = this->urand.number();
506 ScalarType
val = this->urand.number();
511 for (
unsigned int k=0; k<ndot; k++) {
512 val = this->urand.number();
513 y1[
i].fastAccessDx(k) =
val;
514 y2[
i].fastAccessDx(k) =
val;
515 y3[
i].fastAccessDx(k) =
val;
516 y4[
i].fastAccessDx(k) =
val;
519 FadType alpha(ndot, this->urand.number());
520 for (
unsigned int k=0; k<ndot; k++)
521 alpha.fastAccessDx(k) = this->urand.number();
523 Teuchos::BLAS<int,FadType> teuchos_blas;
524 teuchos_blas.AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
526 Teuchos::BLAS<int,FadType> sacado_blas(
false);
527 sacado_blas.AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
531 unsigned int sz = m*(1+ndot)+m;
532 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
533 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
537 sacado_blas.AXPY(m, alpha, &xx[0], 1, &y4[0], 1);
544 typedef decltype(this->fad)
FadType;
548 auto ndot = this->ndot_;
550 VectorType
x(m,ndot), y1(m,ndot), y2(m,ndot), y3(m,ndot);
551 for (
unsigned int i=0;
i<m;
i++) {
552 x[
i] =
FadType(ndot, this->urand.number());
553 ScalarType
val = this->urand.number();
557 for (
unsigned int k=0; k<ndot; k++) {
558 x[
i].fastAccessDx(k) = this->urand.number();
561 FadType alpha(ndot, this->urand.number());
562 for (
unsigned int k=0; k<ndot; k++)
563 alpha.fastAccessDx(k) = this->urand.number();
565 Teuchos::BLAS<int,FadType> teuchos_blas;
566 teuchos_blas.AXPY(m, alpha, &
x[0], 1, &y1[0], 1);
568 Teuchos::BLAS<int,FadType> sacado_blas(
false);
569 sacado_blas.AXPY(m, alpha, &
x[0], 1, &y2[0], 1);
573 unsigned int sz = 2*m*(1+ndot);
574 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
575 sacado_blas2.AXPY(m, alpha, &
x[0], 1, &y3[0], 1);
582 typedef decltype(this->fad)
FadType;
585 auto ndot = this->ndot_;
587 VectorType X(m,ndot), Y(m,ndot);
588 for (
unsigned int i=0;
i<m;
i++) {
589 X[
i] =
FadType(ndot, this->real_urand.number());
590 Y[
i] =
FadType(ndot, this->real_urand.number());
591 for (
unsigned int k=0; k<ndot; k++) {
592 X[
i].fastAccessDx(k) = this->real_urand.number();
593 Y[
i].fastAccessDx(k) = this->real_urand.number();
597 Teuchos::BLAS<int,FadType> teuchos_blas;
598 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
600 Teuchos::BLAS<int,FadType> sacado_blas(
false);
601 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
605 unsigned int sz = 2*m*(1+ndot);
606 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
607 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
614 typedef decltype(this->fad)
FadType;
617 auto ndot = this->ndot_;
619 unsigned int incx = 2;
620 unsigned int incy = 3;
621 VectorType X(m*incx,ndot), Y(m*incy,ndot);
622 for (
unsigned int i=0;
i<m*incx;
i++) {
623 X[
i] =
FadType(ndot, this->real_urand.number());
624 for (
unsigned int k=0; k<ndot; k++) {
625 X[
i].fastAccessDx(k) = this->real_urand.number();
628 for (
unsigned int i=0;
i<m*incy;
i++) {
629 Y[
i] =
FadType(ndot, this->real_urand.number());
630 for (
unsigned int k=0; k<ndot; k++) {
631 Y[
i].fastAccessDx(k) = this->real_urand.number();
635 Teuchos::BLAS<int,FadType> teuchos_blas;
636 FadType z1 = teuchos_blas.DOT(m, &X[0], incx, &Y[0], incy);
638 Teuchos::BLAS<int,FadType> sacado_blas(
false);
639 FadType z2 = sacado_blas.DOT(m, &X[0], incx, &Y[0], incy);
643 unsigned int sz = 2*m*(1+ndot);
644 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
645 FadType z3 = sacado_blas2.DOT(m, &X[0], incx, &Y[0], incy);
652 typedef decltype(this->fad)
FadType;
656 auto ndot = this->ndot_;
658 VectorType X(m,0), Y(m,ndot);
659 std::vector<ScalarType>
x(m);
660 for (
unsigned int i=0;
i<m;
i++) {
661 x[
i] = this->urand.number();
663 Y[
i] =
FadType(ndot, this->real_urand.number());
664 for (
unsigned int k=0; k<ndot; k++) {
665 Y[
i].fastAccessDx(k) = this->real_urand.number();
669 Teuchos::BLAS<int,FadType> teuchos_blas;
670 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
672 Teuchos::BLAS<int,FadType> sacado_blas(
false);
673 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
677 unsigned int sz = 2*m*(1+ndot);
678 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
679 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
683 FadType z4 = sacado_blas.DOT(m, &
x[0], 1, &Y[0], 1);
690 typedef decltype(this->fad)
FadType;
694 auto ndot = this->ndot_;
696 VectorType X(m,ndot), Y(m,0);
697 std::vector<ScalarType>
y(m);
698 for (
unsigned int i=0;
i<m;
i++) {
699 X[
i] =
FadType(ndot, this->real_urand.number());
700 y[
i] = this->urand.number();
702 for (
unsigned int k=0; k<ndot; k++) {
703 X[
i].fastAccessDx(k) = this->real_urand.number();
707 Teuchos::BLAS<int,FadType> teuchos_blas;
708 FadType z1 = teuchos_blas.DOT(m, &X[0], 1, &Y[0], 1);
710 Teuchos::BLAS<int,FadType> sacado_blas(
false);
711 FadType z2 = sacado_blas.DOT(m, &X[0], 1, &Y[0], 1);
715 unsigned int sz = 2*m*(1+ndot);
716 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
717 FadType z3 = sacado_blas2.DOT(m, &X[0], 1, &Y[0], 1);
721 FadType z4 = sacado_blas.DOT(m, &X[0], 1, &
y[0], 1);
728 typedef decltype(this->fad)
FadType;
731 auto ndot = this->ndot_;
733 VectorType X(m,ndot);
734 for (
unsigned int i=0;
i<m;
i++) {
735 X[
i] =
FadType(ndot, this->real_urand.number());
736 for (
unsigned int k=0; k<ndot; k++) {
737 X[
i].fastAccessDx(k) = this->real_urand.number();
741 Teuchos::BLAS<int,FadType> teuchos_blas;
742 typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 =
743 teuchos_blas.NRM2(m, &X[0], 1);
745 Teuchos::BLAS<int,FadType> sacado_blas(
false);
746 typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 =
747 sacado_blas.NRM2(m, &X[0], 1);
751 unsigned int sz = m*(1+ndot);
752 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
753 typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 =
754 sacado_blas2.NRM2(m, &X[0], 1);
761 typedef decltype(this->fad)
FadType;
764 auto ndot = this->ndot_;
766 unsigned int incx = 2;
767 VectorType X(m*incx,ndot);
768 for (
unsigned int i=0;
i<m*incx;
i++) {
769 X[
i] =
FadType(ndot, this->real_urand.number());
770 for (
unsigned int k=0; k<ndot; k++) {
771 X[
i].fastAccessDx(k) = this->real_urand.number();
775 Teuchos::BLAS<int,FadType> teuchos_blas;
776 typename Teuchos::ScalarTraits<FadType>::magnitudeType z1 =
777 teuchos_blas.NRM2(m, &X[0], incx);
779 Teuchos::BLAS<int,FadType> sacado_blas(
false);
780 typename Teuchos::ScalarTraits<FadType>::magnitudeType z2 =
781 sacado_blas.NRM2(m, &X[0], incx);
785 unsigned int sz = m*(1+ndot);
786 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
787 typename Teuchos::ScalarTraits<FadType>::magnitudeType z3 =
788 sacado_blas2.NRM2(m, &X[0], incx);
795 typedef decltype(this->fad)
FadType;
800 auto ndot = this->ndot_;
802 VectorType
A(m*n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
803 for (
unsigned int j=0; j<n; j++) {
804 for (
unsigned int i=0;
i<m;
i++) {
805 A[
i+j*m] =
FadType(ndot, this->urand.number());
806 for (
unsigned int k=0; k<ndot; k++)
809 B[j] =
FadType(ndot, this->urand.number());
810 for (
unsigned int k=0; k<ndot; k++)
813 FadType alpha(ndot, this->urand.number());
814 FadType beta(ndot, this->urand.number());
815 for (
unsigned int k=0; k<ndot; k++) {
816 alpha.fastAccessDx(k) = this->urand.number();
817 beta.fastAccessDx(k) = this->urand.number();
820 for (
unsigned int i=0;
i<m;
i++) {
821 ScalarType
val = this->urand.number();
825 for (
unsigned int k=0; k<ndot; k++) {
826 val = this->urand.number();
827 C1[
i].fastAccessDx(k) =
val;
828 C2[
i].fastAccessDx(k) =
val;
829 C3[
i].fastAccessDx(k) =
val;
833 Teuchos::BLAS<int,FadType> teuchos_blas;
834 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
837 Teuchos::BLAS<int,FadType> sacado_blas(
false);
838 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
843 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
844 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
845 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
853 typedef decltype(this->fad)
FadType;
858 auto ndot = this->ndot_;
860 unsigned int lda = m+3;
861 unsigned int incb = 2;
862 unsigned int incc = 3;
863 VectorType
A(lda*n,ndot),
B(n*incb,ndot), C1(m*incc,ndot), C2(m*incc,ndot),
865 for (
unsigned int j=0; j<n; j++) {
866 for (
unsigned int i=0;
i<lda;
i++) {
867 A[
i+j*lda] =
FadType(ndot, this->urand.number());
868 for (
unsigned int k=0; k<ndot; k++)
872 for (
unsigned int j=0; j<n*incb; j++) {
873 B[j] =
FadType(ndot, this->urand.number());
874 for (
unsigned int k=0; k<ndot; k++)
877 FadType alpha(ndot, this->urand.number());
878 FadType beta(ndot, this->urand.number());
879 for (
unsigned int k=0; k<ndot; k++) {
880 alpha.fastAccessDx(k) = this->urand.number();
881 beta.fastAccessDx(k) = this->urand.number();
884 for (
unsigned int i=0;
i<m*incc;
i++) {
885 ScalarType
val = this->urand.number();
889 for (
unsigned int k=0; k<ndot; k++) {
890 val = this->urand.number();
891 C1[
i].fastAccessDx(k) =
val;
892 C2[
i].fastAccessDx(k) =
val;
893 C3[
i].fastAccessDx(k) =
val;
897 Teuchos::BLAS<int,FadType> teuchos_blas;
898 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
901 Teuchos::BLAS<int,FadType> sacado_blas(
false);
902 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
907 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
908 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
909 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
917 typedef decltype(this->fad)
FadType;
922 auto ndot = this->ndot_;
924 VectorType
A(m*n,ndot),
B(m,ndot), C1(n,ndot), C2(n,ndot), C3(n,ndot);
925 for (
unsigned int j=0; j<n; j++) {
926 for (
unsigned int i=0;
i<m;
i++) {
927 A[
i+j*m] =
FadType(ndot, this->urand.number());
928 for (
unsigned int k=0; k<ndot; k++)
932 for (
unsigned int j=0; j<m; j++) {
933 B[j] =
FadType(ndot, this->urand.number());
934 for (
unsigned int k=0; k<ndot; k++)
937 FadType alpha(ndot, this->urand.number());
938 FadType beta(ndot, this->urand.number());
939 for (
unsigned int k=0; k<ndot; k++) {
940 alpha.fastAccessDx(k) = this->urand.number();
941 beta.fastAccessDx(k) = this->urand.number();
944 for (
unsigned int i=0;
i<n;
i++) {
945 ScalarType
val = this->urand.number();
949 for (
unsigned int k=0; k<ndot; k++) {
950 val = this->urand.number();
951 C1[
i].fastAccessDx(k) =
val;
952 C2[
i].fastAccessDx(k) =
val;
953 C3[
i].fastAccessDx(k) =
val;
957 Teuchos::BLAS<int,FadType> teuchos_blas;
958 teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
961 Teuchos::BLAS<int,FadType> sacado_blas(
false);
962 sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
967 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
968 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
969 sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
977 typedef decltype(this->fad)
FadType;
982 auto ndot = this->ndot_;
984 unsigned int lda = m+3;
985 unsigned int incb = 2;
986 unsigned int incc = 3;
987 VectorType
A(lda*n,ndot),
B(m*incb,ndot), C1(n*incc,ndot), C2(n*incc,ndot),
989 for (
unsigned int j=0; j<n; j++) {
990 for (
unsigned int i=0;
i<lda;
i++) {
991 A[
i+j*lda] =
FadType(ndot, this->urand.number());
992 for (
unsigned int k=0; k<ndot; k++)
996 for (
unsigned int j=0; j<m*incb; j++) {
997 B[j] =
FadType(ndot, this->urand.number());
998 for (
unsigned int k=0; k<ndot; k++)
1001 FadType alpha(ndot, this->urand.number());
1002 FadType beta(ndot, this->urand.number());
1003 for (
unsigned int k=0; k<ndot; k++) {
1004 alpha.fastAccessDx(k) = this->urand.number();
1005 beta.fastAccessDx(k) = this->urand.number();
1008 for (
unsigned int i=0;
i<n*incc;
i++) {
1009 ScalarType
val = this->urand.number();
1013 for (
unsigned int k=0; k<ndot; k++) {
1014 val = this->urand.number();
1015 C1[
i].fastAccessDx(k) =
val;
1016 C2[
i].fastAccessDx(k) =
val;
1017 C3[
i].fastAccessDx(k) =
val;
1021 Teuchos::BLAS<int,FadType> teuchos_blas;
1022 teuchos_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1023 beta, &C1[0], incc);
1025 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1026 sacado_blas.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1027 beta, &C2[0], incc);
1031 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1032 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1033 sacado_blas2.GEMV(Teuchos::TRANS, m, n, alpha, &
A[0], lda, &
B[0], incb,
1034 beta, &C3[0], incc);
1041 typedef decltype(this->fad)
FadType;
1046 auto ndot = this->ndot_;
1048 VectorType
A(m*n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1049 for (
unsigned int j=0; j<n; j++) {
1050 for (
unsigned int i=0;
i<m;
i++) {
1051 A[
i+j*m] =
FadType(ndot, this->urand.number());
1052 for (
unsigned int k=0; k<ndot; k++)
1055 B[j] =
FadType(ndot, this->urand.number());
1056 for (
unsigned int k=0; k<ndot; k++)
1059 FadType alpha(ndot, this->urand.number());
1060 FadType beta(ndot, this->urand.number());
1061 for (
unsigned int k=0; k<ndot; k++) {
1062 alpha.fastAccessDx(k) = this->urand.number();
1063 beta.fastAccessDx(k) = this->urand.number();
1066 for (
unsigned int i=0;
i<m;
i++) {
1067 ScalarType
val = this->urand.number();
1073 Teuchos::BLAS<int,FadType> teuchos_blas;
1074 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1077 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1078 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1083 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1084 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1085 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1093 typedef decltype(this->fad)
FadType;
1098 auto ndot = this->ndot_;
1100 VectorType
A(m*n,ndot),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot);
1101 for (
unsigned int j=0; j<n; j++) {
1102 for (
unsigned int i=0;
i<m;
i++) {
1103 A[
i+j*m] =
FadType(ndot, this->urand.number());
1104 for (
unsigned int k=0; k<ndot; k++)
1107 B[j] =
FadType(ndot, this->urand.number());
1108 for (
unsigned int k=0; k<ndot; k++)
1111 ScalarType alpha = this->urand.number();
1112 ScalarType beta = this->urand.number();
1114 for (
unsigned int i=0;
i<m;
i++) {
1115 ScalarType
val = this->urand.number();
1119 for (
unsigned int k=0; k<ndot; k++) {
1120 val = this->urand.number();
1121 C1[
i].fastAccessDx(k) =
val;
1122 C2[
i].fastAccessDx(k) =
val;
1123 C3[
i].fastAccessDx(k) =
val;
1127 Teuchos::BLAS<int,FadType> teuchos_blas;
1128 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1131 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1132 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1137 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1138 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1139 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1147 typedef decltype(this->fad)
FadType;
1152 auto ndot = this->ndot_;
1154 VectorType
A(m*n,ndot),
B(n,0), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1156 std::vector<ScalarType> b(n);
1157 for (
unsigned int j=0; j<n; j++) {
1158 for (
unsigned int i=0;
i<m;
i++) {
1159 A[
i+j*m] =
FadType(ndot, this->urand.number());
1160 for (
unsigned int k=0; k<ndot; k++)
1163 b[j] = this->urand.number();
1166 FadType alpha(ndot, this->urand.number());
1167 FadType beta(ndot, this->urand.number());
1168 for (
unsigned int k=0; k<ndot; k++) {
1169 alpha.fastAccessDx(k) = this->urand.number();
1170 beta.fastAccessDx(k) = this->urand.number();
1173 for (
unsigned int i=0;
i<m;
i++) {
1174 ScalarType
val = this->urand.number();
1179 for (
unsigned int k=0; k<ndot; k++) {
1180 val = this->urand.number();
1181 C1[
i].fastAccessDx(k) =
val;
1182 C2[
i].fastAccessDx(k) =
val;
1183 C3[
i].fastAccessDx(k) =
val;
1184 C4[
i].fastAccessDx(k) =
val;
1188 Teuchos::BLAS<int,FadType> teuchos_blas;
1189 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1192 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1193 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1198 unsigned int sz = m*n*(1+ndot) + n + m*(1+ndot);
1199 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1200 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1205 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &b[0], 1,
1213 typedef decltype(this->fad)
FadType;
1218 auto ndot = this->ndot_;
1220 VectorType
A(m*n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1222 std::vector<ScalarType>
a(m*n);
1223 for (
unsigned int j=0; j<n; j++) {
1224 for (
unsigned int i=0;
i<m;
i++) {
1225 a[
i+j*m] = this->urand.number();
1226 A[
i+j*m] =
a[
i+j*m];
1228 B[j] =
FadType(ndot, this->urand.number());
1229 for (
unsigned int k=0; k<ndot; k++)
1232 FadType alpha(ndot, this->urand.number());
1233 FadType beta(ndot, this->urand.number());
1234 for (
unsigned int k=0; k<ndot; k++) {
1235 alpha.fastAccessDx(k) = this->urand.number();
1236 beta.fastAccessDx(k) = this->urand.number();
1239 for (
unsigned int i=0;
i<m;
i++) {
1240 ScalarType
val = this->urand.number();
1245 for (
unsigned int k=0; k<ndot; k++) {
1246 val = this->urand.number();
1247 C1[
i].fastAccessDx(k) =
val;
1248 C2[
i].fastAccessDx(k) =
val;
1249 C3[
i].fastAccessDx(k) =
val;
1250 C4[
i].fastAccessDx(k) =
val;
1254 Teuchos::BLAS<int,FadType> teuchos_blas;
1255 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1258 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1259 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1264 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1265 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1266 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1271 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
a[0], m, &
B[0], 1,
1279 typedef decltype(this->fad)
FadType;
1284 auto ndot = this->ndot_;
1286 VectorType
A(m*n,0),
B(n,ndot), C1(m,ndot), C2(m,ndot), C3(m,ndot),
1288 std::vector<ScalarType>
a(m*n), b(n);
1289 for (
unsigned int j=0; j<n; j++) {
1290 for (
unsigned int i=0;
i<m;
i++) {
1291 a[
i+j*m] = this->urand.number();
1292 A[
i+j*m] =
a[
i+j*m];
1294 b[j] = this->urand.number();
1297 FadType alpha(ndot, this->urand.number());
1298 FadType beta(ndot, this->urand.number());
1299 for (
unsigned int k=0; k<ndot; k++) {
1300 alpha.fastAccessDx(k) = this->urand.number();
1301 beta.fastAccessDx(k) = this->urand.number();
1304 for (
unsigned int i=0;
i<m;
i++) {
1305 ScalarType
val = this->urand.number();
1310 for (
unsigned int k=0; k<ndot; k++) {
1311 val = this->urand.number();
1312 C1[
i].fastAccessDx(k) =
val;
1313 C2[
i].fastAccessDx(k) =
val;
1314 C3[
i].fastAccessDx(k) =
val;
1315 C4[
i].fastAccessDx(k) =
val;
1319 Teuchos::BLAS<int,FadType> teuchos_blas;
1320 teuchos_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1323 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1324 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1329 unsigned int sz = m*n* + n*(1+ndot) + m*(1+ndot);
1330 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1331 sacado_blas2.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
A[0], m, &
B[0], 1,
1336 sacado_blas.GEMV(Teuchos::NO_TRANS, m, n, alpha, &
a[0], m, &b[0], 1,
1344 typedef decltype(this->fad)
FadType;
1348 auto ndot = this->ndot_;
1350 VectorType
A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1351 for (
unsigned int j=0; j<n; j++) {
1352 for (
unsigned int i=0;
i<n;
i++) {
1353 A[
i+j*n] =
FadType(ndot, this->urand.number());
1354 for (
unsigned int k=0; k<ndot; k++)
1357 ScalarType
val = this->urand.number();
1361 for (
unsigned int k=0; k<ndot; k++) {
1362 val = this->urand.number();
1363 x1[j].fastAccessDx(k) =
val;
1364 x2[j].fastAccessDx(k) =
val;
1365 x3[j].fastAccessDx(k) =
val;
1369 Teuchos::BLAS<int,FadType> teuchos_blas;
1370 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1371 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1373 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1374 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1375 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1379 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1380 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1381 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1382 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1386 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1387 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1388 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1389 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1390 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1391 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1395 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1396 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1397 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1398 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1399 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1400 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1404 for (
unsigned int i=0;
i<n;
i++) {
1405 A[
i*n+
i].val() = 1.0;
1406 for (
unsigned int k=0; k<ndot; k++)
1409 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1410 Teuchos::UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1411 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1412 Teuchos::UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1413 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1414 Teuchos::UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1421 typedef decltype(this->fad)
FadType;
1425 auto ndot = this->ndot_;
1427 unsigned int lda = n+3;
1428 unsigned int incx = 2;
1429 VectorType
A(lda*n,ndot), x1(n*incx,ndot), x2(n*incx,ndot), x3(n*incx,ndot);
1430 for (
unsigned int j=0; j<n; j++) {
1431 for (
unsigned int i=0;
i<lda;
i++) {
1432 A[
i+j*lda] =
FadType(ndot, this->urand.number());
1433 for (
unsigned int k=0; k<ndot; k++)
1437 for (
unsigned int j=0; j<n*incx; j++) {
1438 ScalarType
val = this->urand.number();
1442 for (
unsigned int k=0; k<ndot; k++) {
1443 val = this->urand.number();
1444 x1[j].fastAccessDx(k) =
val;
1445 x2[j].fastAccessDx(k) =
val;
1446 x3[j].fastAccessDx(k) =
val;
1450 Teuchos::BLAS<int,FadType> teuchos_blas;
1451 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1452 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1454 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1455 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1456 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1460 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1461 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1462 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1463 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1467 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1468 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1469 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1470 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1471 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1472 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1476 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1477 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1478 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1479 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1480 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1481 Teuchos::NON_UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1485 for (
unsigned int i=0;
i<n;
i++) {
1486 A[
i*lda+
i].val() = 1.0;
1487 for (
unsigned int k=0; k<ndot; k++)
1490 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1491 Teuchos::UNIT_DIAG, n, &
A[0], lda, &x1[0], incx);
1492 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1493 Teuchos::UNIT_DIAG, n, &
A[0], lda, &x2[0], incx);
1494 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1495 Teuchos::UNIT_DIAG, n, &
A[0], lda, &x3[0], incx);
1502 typedef decltype(this->fad)
FadType;
1506 auto ndot = this->ndot_;
1508 VectorType
A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot), x4(n,ndot),
1510 std::vector<ScalarType>
a(n*n);
1511 for (
unsigned int j=0; j<n; j++) {
1512 for (
unsigned int i=0;
i<n;
i++) {
1513 a[
i+j*n] = this->urand.number();
1514 A[
i+j*n] =
a[
i+j*n];
1516 ScalarType
val = this->urand.number();
1522 for (
unsigned int k=0; k<ndot; k++) {
1523 val = this->urand.number();
1524 x1[j].fastAccessDx(k) =
val;
1525 x2[j].fastAccessDx(k) =
val;
1526 x3[j].fastAccessDx(k) =
val;
1527 x4[j].fastAccessDx(k) =
val;
1528 x5[j].fastAccessDx(k) =
val;
1532 Teuchos::BLAS<int,FadType> teuchos_blas;
1533 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1534 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1536 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1537 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1538 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1542 unsigned int sz = n*n+n*(1+ndot);
1543 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1544 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1545 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1549 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1550 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1554 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1555 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x5[0], 1);
1559 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1560 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1561 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1562 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1563 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1564 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1565 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1566 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1567 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1568 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x5[0], 1);
1574 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1575 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1576 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1577 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1578 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1579 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1580 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1581 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1582 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1583 Teuchos::NON_UNIT_DIAG, n, &
a[0], n, &x5[0], 1);
1589 for (
unsigned int i=0;
i<n;
i++) {
1590 A[
i*n+
i].val() = 1.0;
1591 for (
unsigned int k=0; k<ndot; k++)
1594 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1595 Teuchos::UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1596 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1597 Teuchos::UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1598 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1599 Teuchos::UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1600 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1601 Teuchos::UNIT_DIAG, n, &
a[0], n, &x4[0], 1);
1602 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1603 Teuchos::UNIT_DIAG, n, &
a[0], n, &x5[0], 1);
1612 typedef decltype(this->fad)
FadType;
1616 auto ndot = this->ndot_;
1618 VectorType
A(n*n,ndot), x1(n,ndot), x2(n,ndot), x3(n,ndot);
1619 for (
unsigned int j=0; j<n; j++) {
1620 for (
unsigned int i=0;
i<n;
i++) {
1621 A[
i+j*n] =
FadType(ndot, this->urand.number());
1622 for (
unsigned int k=0; k<ndot; k++)
1625 ScalarType
val = this->urand.number();
1631 Teuchos::BLAS<int,FadType> teuchos_blas;
1632 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1633 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1635 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1636 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1637 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1641 unsigned int sz = n*n*(1+ndot) + n*(1+ndot);
1642 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1643 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1644 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1648 teuchos_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1649 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1650 sacado_blas.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1651 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1652 sacado_blas2.TRMV(Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
1653 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1657 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1658 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1659 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1660 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1661 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::TRANS,
1662 Teuchos::NON_UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1666 for (
unsigned int i=0;
i<n;
i++) {
1667 A[
i*n+
i].val() = 1.0;
1668 for (
unsigned int k=0; k<ndot; k++)
1671 teuchos_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1672 Teuchos::UNIT_DIAG, n, &
A[0], n, &x1[0], 1);
1673 sacado_blas.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1674 Teuchos::UNIT_DIAG, n, &
A[0], n, &x2[0], 1);
1675 sacado_blas2.TRMV(Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
1676 Teuchos::UNIT_DIAG, n, &
A[0], n, &x3[0], 1);
1683 typedef decltype(this->fad)
FadType;
1688 auto ndot = this->ndot_;
1692 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1695 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
1696 for (
unsigned int j=0; j<n; j++) {
1697 for (
unsigned int i=0;
i<m;
i++) {
1698 ScalarType
val = this->urand.number();
1702 for (
unsigned int k=0; k<ndot; k++) {
1703 val = this->urand.number();
1704 A1[
i+j*m].fastAccessDx(k) =
val;
1705 A2[
i+j*m].fastAccessDx(k) =
val;
1706 A3[
i+j*m].fastAccessDx(k) =
val;
1710 for (
unsigned int i=0;
i<m;
i++) {
1711 x[
i] =
FadType(ndot, this->urand.number());
1712 for (
unsigned int k=0; k<ndot; k++)
1715 for (
unsigned int i=0;
i<n;
i++) {
1716 y[
i] =
FadType(ndot, this->urand.number());
1717 for (
unsigned int k=0; k<ndot; k++)
1720 FadType alpha(ndot, this->urand.number());
1721 for (
unsigned int k=0; k<ndot; k++) {
1722 alpha.fastAccessDx(k) = this->urand.number();
1725 Teuchos::BLAS<int,FadType> teuchos_blas;
1726 teuchos_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A1[0], m);
1728 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1729 sacado_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A2[0], m);
1733 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1734 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1735 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A3[0], m);
1742 typedef decltype(this->fad)
FadType;
1747 auto ndot = this->ndot_;
1751 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1754 unsigned int lda = m+3;
1755 unsigned int incx = 2;
1756 unsigned int incy = 3;
1757 VectorType A1(lda*n,ndot), A2(lda*n,ndot), A3(lda*n,ndot),
x(m*incx,ndot),
1759 for (
unsigned int j=0; j<n; j++) {
1760 for (
unsigned int i=0;
i<lda;
i++) {
1761 ScalarType
val = this->urand.number();
1765 for (
unsigned int k=0; k<ndot; k++) {
1766 val = this->urand.number();
1767 A1[
i+j*lda].fastAccessDx(k) =
val;
1768 A2[
i+j*lda].fastAccessDx(k) =
val;
1769 A3[
i+j*lda].fastAccessDx(k) =
val;
1773 for (
unsigned int i=0;
i<m*incx;
i++) {
1774 x[
i] =
FadType(ndot, this->urand.number());
1775 for (
unsigned int k=0; k<ndot; k++)
1778 for (
unsigned int i=0;
i<n*incy;
i++) {
1779 y[
i] =
FadType(ndot, this->urand.number());
1780 for (
unsigned int k=0; k<ndot; k++)
1783 FadType alpha(ndot, this->urand.number());
1784 for (
unsigned int k=0; k<ndot; k++) {
1785 alpha.fastAccessDx(k) = this->urand.number();
1788 Teuchos::BLAS<int,FadType> teuchos_blas;
1789 teuchos_blas.GER(m, n, alpha, &
x[0], incx, &
y[0], incy, &A1[0], lda);
1791 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1792 sacado_blas.GER(m, n, alpha, &
x[0], incx, &
y[0], incy, &A2[0], lda);
1796 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1797 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1798 sacado_blas2.GER(m, n, alpha, &
x[0], incx, &
y[0], incy, &A3[0], lda);
1805 typedef decltype(this->fad)
FadType;
1810 auto ndot = this->ndot_;
1814 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1817 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
1818 for (
unsigned int j=0; j<n; j++) {
1819 for (
unsigned int i=0;
i<m;
i++) {
1820 ScalarType
val = this->urand.number();
1824 for (
unsigned int k=0; k<ndot; k++) {
1825 val = this->urand.number();
1826 A1[
i+j*m].fastAccessDx(k) =
val;
1827 A2[
i+j*m].fastAccessDx(k) =
val;
1828 A3[
i+j*m].fastAccessDx(k) =
val;
1832 for (
unsigned int i=0;
i<m;
i++) {
1833 x[
i] =
FadType(ndot, this->urand.number());
1834 for (
unsigned int k=0; k<ndot; k++)
1837 for (
unsigned int i=0;
i<n;
i++) {
1838 y[
i] =
FadType(ndot, this->urand.number());
1839 for (
unsigned int k=0; k<ndot; k++)
1842 ScalarType alpha = this->urand.number();
1844 Teuchos::BLAS<int,FadType> teuchos_blas;
1845 teuchos_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A1[0], m);
1847 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1848 sacado_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A2[0], m);
1852 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
1853 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1854 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A3[0], m);
1861 typedef decltype(this->fad)
FadType;
1866 auto ndot = this->ndot_;
1870 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1873 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1874 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
1875 std::vector<ScalarType> xx(m);
1876 for (
unsigned int j=0; j<n; j++) {
1877 for (
unsigned int i=0;
i<m;
i++) {
1878 ScalarType
val = this->urand.number();
1884 for (
unsigned int k=0; k<ndot; k++) {
1885 val = this->urand.number();
1886 A1[
i+j*m].fastAccessDx(k) =
val;
1887 A2[
i+j*m].fastAccessDx(k) =
val;
1888 A3[
i+j*m].fastAccessDx(k) =
val;
1889 A4[
i+j*m].fastAccessDx(k) =
val;
1890 A5[
i+j*m].fastAccessDx(k) =
val;
1894 for (
unsigned int i=0;
i<m;
i++) {
1895 xx[
i] = this->urand.number();
1898 for (
unsigned int i=0;
i<n;
i++) {
1899 y[
i] =
FadType(ndot, this->urand.number());
1900 for (
unsigned int k=0; k<ndot; k++)
1903 FadType alpha(ndot, this->urand.number());
1904 for (
unsigned int k=0; k<ndot; k++) {
1905 alpha.fastAccessDx(k) = this->urand.number();
1908 Teuchos::BLAS<int,FadType> teuchos_blas;
1909 teuchos_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A1[0], m);
1911 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1912 sacado_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A2[0], m);
1916 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m;
1917 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1918 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A3[0], m);
1922 sacado_blas.GER(m, n, alpha, &xx[0], 1, &
y[0], 1, &A4[0], m);
1926 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &
y[0], 1, &A5[0], m);
1933 typedef decltype(this->fad)
FadType;
1938 auto ndot = this->ndot_;
1942 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
1945 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
1946 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
1947 std::vector<ScalarType> yy(n);
1948 for (
unsigned int j=0; j<n; j++) {
1949 for (
unsigned int i=0;
i<m;
i++) {
1950 ScalarType
val = this->urand.number();
1956 for (
unsigned int k=0; k<ndot; k++) {
1957 val = this->urand.number();
1958 A1[
i+j*m].fastAccessDx(k) =
val;
1959 A2[
i+j*m].fastAccessDx(k) =
val;
1960 A3[
i+j*m].fastAccessDx(k) =
val;
1961 A4[
i+j*m].fastAccessDx(k) =
val;
1962 A5[
i+j*m].fastAccessDx(k) =
val;
1966 for (
unsigned int i=0;
i<m;
i++) {
1967 x[
i] =
FadType(ndot, this->urand.number());
1968 for (
unsigned int k=0; k<ndot; k++)
1971 for (
unsigned int i=0;
i<n;
i++) {
1972 yy[
i] = this->urand.number();
1975 FadType alpha(ndot, this->urand.number());
1976 for (
unsigned int k=0; k<ndot; k++) {
1977 alpha.fastAccessDx(k) = this->urand.number();
1980 Teuchos::BLAS<int,FadType> teuchos_blas;
1981 teuchos_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A1[0], m);
1983 Teuchos::BLAS<int,FadType> sacado_blas(
false);
1984 sacado_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A2[0], m);
1988 unsigned int sz = m*n*(1+ndot) + m*(1+ndot) + n;
1989 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
1990 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A3[0], m);
1994 sacado_blas.GER(m, n, alpha, &
x[0], 1, &yy[0], 1, &A4[0], m);
1998 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &yy[0], 1, &A5[0], m);
2005 typedef decltype(this->fad)
FadType;
2010 auto ndot = this->ndot_;
2014 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
2017 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot), A4(m*n,ndot),
2018 A5(m*n,ndot),
x(m,ndot),
y(n,ndot);
2019 std::vector<ScalarType> xx(n), yy(n);
2020 for (
unsigned int j=0; j<n; j++) {
2021 for (
unsigned int i=0;
i<m;
i++) {
2022 ScalarType
val = this->urand.number();
2028 for (
unsigned int k=0; k<ndot; k++) {
2029 val = this->urand.number();
2030 A1[
i+j*m].fastAccessDx(k) =
val;
2031 A2[
i+j*m].fastAccessDx(k) =
val;
2032 A3[
i+j*m].fastAccessDx(k) =
val;
2033 A4[
i+j*m].fastAccessDx(k) =
val;
2034 A5[
i+j*m].fastAccessDx(k) =
val;
2038 for (
unsigned int i=0;
i<m;
i++) {
2039 xx[
i] = this->urand.number();
2042 for (
unsigned int i=0;
i<n;
i++) {
2043 yy[
i] = this->urand.number();
2046 FadType alpha(ndot, this->urand.number());
2047 for (
unsigned int k=0; k<ndot; k++) {
2048 alpha.fastAccessDx(k) = this->urand.number();
2051 Teuchos::BLAS<int,FadType> teuchos_blas;
2052 teuchos_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A1[0], m);
2054 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2055 sacado_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A2[0], m);
2059 unsigned int sz = m*n*(1+ndot) + m + n;
2060 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2061 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A3[0], m);
2065 sacado_blas.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A4[0], m);
2069 sacado_blas2.GER(m, n, alpha, &xx[0], 1, &yy[0], 1, &A5[0], m);
2076 typedef decltype(this->fad)
FadType;
2081 auto ndot = this->ndot_;
2085 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
2088 VectorType A1(m*n,ndot), A2(m*n,ndot), A3(m*n,ndot),
x(m,ndot),
y(n,ndot);
2089 for (
unsigned int j=0; j<n; j++) {
2090 for (
unsigned int i=0;
i<m;
i++) {
2091 ScalarType
val = this->urand.number();
2097 for (
unsigned int i=0;
i<m;
i++) {
2098 x[
i] =
FadType(ndot, this->urand.number());
2099 for (
unsigned int k=0; k<ndot; k++)
2102 for (
unsigned int i=0;
i<n;
i++) {
2103 y[
i] =
FadType(ndot, this->urand.number());
2104 for (
unsigned int k=0; k<ndot; k++)
2107 FadType alpha(ndot, this->urand.number());
2108 for (
unsigned int k=0; k<ndot; k++) {
2109 alpha.fastAccessDx(k) = this->urand.number();
2112 Teuchos::BLAS<int,FadType> teuchos_blas;
2113 teuchos_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A1[0], m);
2115 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2116 sacado_blas.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A2[0], m);
2120 unsigned int sz = m*n*(1+ndot) + n*(1+ndot) + m*(1+ndot);
2121 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2122 sacado_blas2.GER(m, n, alpha, &
x[0], 1, &
y[0], 1, &A3[0], m);
2129 typedef decltype(this->fad)
FadType;
2135 auto ndot = this->ndot_;
2137 VectorType
A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2138 for (
unsigned int j=0; j<l; j++) {
2139 for (
unsigned int i=0;
i<m;
i++) {
2140 A[
i+j*m] =
FadType(ndot, this->urand.number());
2141 for (
unsigned int k=0; k<ndot; k++)
2145 for (
unsigned int j=0; j<n; j++) {
2146 for (
unsigned int i=0;
i<l;
i++) {
2147 B[
i+j*l] =
FadType(ndot, this->urand.number());
2148 for (
unsigned int k=0; k<ndot; k++)
2152 FadType alpha(ndot, this->urand.number());
2153 FadType beta(ndot, this->urand.number());
2154 for (
unsigned int k=0; k<ndot; k++) {
2155 alpha.fastAccessDx(k) = this->urand.number();
2156 beta.fastAccessDx(k) = this->urand.number();
2159 for (
unsigned int j=0; j<n; j++) {
2160 for (
unsigned int i=0;
i<m;
i++) {
2161 ScalarType
val = this->urand.number();
2165 for (
unsigned int k=0; k<ndot; k++) {
2166 val = this->urand.number();
2167 C1[
i+j*m].fastAccessDx(k) =
val;
2168 C2[
i+j*m].fastAccessDx(k) =
val;
2169 C3[
i+j*m].fastAccessDx(k) =
val;
2174 Teuchos::BLAS<int,FadType> teuchos_blas;
2175 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2176 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2178 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2179 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2180 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2184 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2185 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2186 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2187 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2192 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2193 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2194 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2195 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2196 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2197 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2203 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2204 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2205 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2206 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2207 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2208 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2214 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2215 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2216 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2217 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2218 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2219 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2227 typedef decltype(this->fad)
FadType;
2233 auto ndot = this->ndot_;
2235 unsigned int lda = m+4;
2236 unsigned int ldb = l+4;
2237 unsigned int ldc = m+5;
2238 VectorType
A(lda*l,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2240 for (
unsigned int j=0; j<l; j++) {
2241 for (
unsigned int i=0;
i<lda;
i++) {
2242 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2243 for (
unsigned int k=0; k<ndot; k++)
2247 for (
unsigned int j=0; j<n; j++) {
2248 for (
unsigned int i=0;
i<ldb;
i++) {
2249 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2250 for (
unsigned int k=0; k<ndot; k++)
2254 FadType alpha(ndot, this->urand.number());
2255 FadType beta(ndot, this->urand.number());
2256 for (
unsigned int k=0; k<ndot; k++) {
2257 alpha.fastAccessDx(k) = this->urand.number();
2258 beta.fastAccessDx(k) = this->urand.number();
2261 for (
unsigned int j=0; j<n; j++) {
2262 for (
unsigned int i=0;
i<ldc;
i++) {
2263 ScalarType
val = this->urand.number();
2267 for (
unsigned int k=0; k<ndot; k++) {
2268 val = this->urand.number();
2269 C1[
i+j*ldc].fastAccessDx(k) =
val;
2270 C2[
i+j*ldc].fastAccessDx(k) =
val;
2271 C3[
i+j*ldc].fastAccessDx(k) =
val;
2276 Teuchos::BLAS<int,FadType> teuchos_blas;
2277 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2278 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2280 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2281 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2282 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2286 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2287 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2288 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2289 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2296 typedef decltype(this->fad)
FadType;
2302 auto ndot = this->ndot_;
2304 unsigned int lda = l+3;
2305 unsigned int ldb = l+4;
2306 unsigned int ldc = m+5;
2307 VectorType
A(lda*m,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2309 for (
unsigned int j=0; j<m; j++) {
2310 for (
unsigned int i=0;
i<lda;
i++) {
2311 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2312 for (
unsigned int k=0; k<ndot; k++)
2316 for (
unsigned int j=0; j<n; j++) {
2317 for (
unsigned int i=0;
i<ldb;
i++) {
2318 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2319 for (
unsigned int k=0; k<ndot; k++)
2323 FadType alpha(ndot, this->urand.number());
2324 FadType beta(ndot, this->urand.number());
2325 for (
unsigned int k=0; k<ndot; k++) {
2326 alpha.fastAccessDx(k) = this->urand.number();
2327 beta.fastAccessDx(k) = this->urand.number();
2330 for (
unsigned int j=0; j<n; j++) {
2331 for (
unsigned int i=0;
i<ldc;
i++) {
2332 ScalarType
val = this->urand.number();
2336 for (
unsigned int k=0; k<ndot; k++) {
2337 val = this->urand.number();
2338 C1[
i+j*ldc].fastAccessDx(k) =
val;
2339 C2[
i+j*ldc].fastAccessDx(k) =
val;
2340 C3[
i+j*ldc].fastAccessDx(k) =
val;
2345 Teuchos::BLAS<int,FadType> teuchos_blas;
2346 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2347 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2349 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2350 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2351 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2355 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2356 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2357 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2358 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2365 typedef decltype(this->fad)
FadType;
2371 auto ndot = this->ndot_;
2373 unsigned int lda = m+4;
2374 unsigned int ldb = n+4;
2375 unsigned int ldc = m+5;
2376 VectorType
A(lda*l,ndot),
B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2378 for (
unsigned int j=0; j<l; j++) {
2379 for (
unsigned int i=0;
i<lda;
i++) {
2380 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2381 for (
unsigned int k=0; k<ndot; k++)
2385 for (
unsigned int j=0; j<l; j++) {
2386 for (
unsigned int i=0;
i<ldb;
i++) {
2387 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2388 for (
unsigned int k=0; k<ndot; k++)
2392 FadType alpha(ndot, this->urand.number());
2393 FadType beta(ndot, this->urand.number());
2394 for (
unsigned int k=0; k<ndot; k++) {
2395 alpha.fastAccessDx(k) = this->urand.number();
2396 beta.fastAccessDx(k) = this->urand.number();
2399 for (
unsigned int j=0; j<n; j++) {
2400 for (
unsigned int i=0;
i<ldc;
i++) {
2401 ScalarType
val = this->urand.number();
2405 for (
unsigned int k=0; k<ndot; k++) {
2406 val = this->urand.number();
2407 C1[
i+j*ldc].fastAccessDx(k) =
val;
2408 C2[
i+j*ldc].fastAccessDx(k) =
val;
2409 C3[
i+j*ldc].fastAccessDx(k) =
val;
2414 Teuchos::BLAS<int,FadType> teuchos_blas;
2415 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2416 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2418 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2419 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2420 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2424 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2425 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2426 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2427 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2434 typedef decltype(this->fad)
FadType;
2440 auto ndot = this->ndot_;
2442 unsigned int lda = l+3;
2443 unsigned int ldb = n+4;
2444 unsigned int ldc = m+5;
2445 VectorType
A(lda*m,ndot),
B(ldb*l,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
2447 for (
unsigned int j=0; j<m; j++) {
2448 for (
unsigned int i=0;
i<lda;
i++) {
2449 A[
i+j*lda] =
FadType(ndot, this->urand.number());
2450 for (
unsigned int k=0; k<ndot; k++)
2454 for (
unsigned int j=0; j<l; j++) {
2455 for (
unsigned int i=0;
i<ldb;
i++) {
2456 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
2457 for (
unsigned int k=0; k<ndot; k++)
2461 FadType alpha(ndot, this->urand.number());
2462 FadType beta(ndot, this->urand.number());
2463 for (
unsigned int k=0; k<ndot; k++) {
2464 alpha.fastAccessDx(k) = this->urand.number();
2465 beta.fastAccessDx(k) = this->urand.number();
2468 for (
unsigned int j=0; j<n; j++) {
2469 for (
unsigned int i=0;
i<ldc;
i++) {
2470 ScalarType
val = this->urand.number();
2474 for (
unsigned int k=0; k<ndot; k++) {
2475 val = this->urand.number();
2476 C1[
i+j*ldc].fastAccessDx(k) =
val;
2477 C2[
i+j*ldc].fastAccessDx(k) =
val;
2478 C3[
i+j*ldc].fastAccessDx(k) =
val;
2483 Teuchos::BLAS<int,FadType> teuchos_blas;
2484 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2485 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
2487 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2488 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2489 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
2493 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2494 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2495 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2496 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
2503 typedef decltype(this->fad)
FadType;
2509 auto ndot = this->ndot_;
2511 VectorType
A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2512 for (
unsigned int j=0; j<l; j++) {
2513 for (
unsigned int i=0;
i<m;
i++) {
2514 A[
i+j*m] =
FadType(ndot, this->urand.number());
2515 for (
unsigned int k=0; k<ndot; k++)
2519 for (
unsigned int j=0; j<n; j++) {
2520 for (
unsigned int i=0;
i<l;
i++) {
2521 B[
i+j*l] =
FadType(ndot, this->urand.number());
2522 for (
unsigned int k=0; k<ndot; k++)
2526 FadType alpha(ndot, this->urand.number());
2527 FadType beta(ndot, this->urand.number());
2528 for (
unsigned int k=0; k<ndot; k++) {
2529 alpha.fastAccessDx(k) = this->urand.number();
2530 beta.fastAccessDx(k) = this->urand.number();
2533 for (
unsigned int j=0; j<n; j++) {
2534 for (
unsigned int i=0;
i<m;
i++) {
2535 ScalarType
val = this->urand.number();
2542 Teuchos::BLAS<int,FadType> teuchos_blas;
2543 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2544 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2546 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2547 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2548 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2552 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2553 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2554 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2555 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2560 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2561 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2562 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2563 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2564 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2565 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2571 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2572 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2573 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2574 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2575 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2576 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2582 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2583 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2584 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2585 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2586 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2587 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2595 typedef decltype(this->fad)
FadType;
2601 auto ndot = this->ndot_;
2603 VectorType
A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
2604 for (
unsigned int j=0; j<l; j++) {
2605 for (
unsigned int i=0;
i<m;
i++) {
2606 A[
i+j*m] =
FadType(ndot, this->urand.number());
2607 for (
unsigned int k=0; k<ndot; k++)
2611 for (
unsigned int j=0; j<n; j++) {
2612 for (
unsigned int i=0;
i<l;
i++) {
2613 B[
i+j*l] =
FadType(ndot, this->urand.number());
2614 for (
unsigned int k=0; k<ndot; k++)
2618 ScalarType alpha = this->urand.number();
2619 ScalarType beta = this->urand.number();
2621 for (
unsigned int j=0; j<n; j++) {
2622 for (
unsigned int i=0;
i<m;
i++) {
2623 ScalarType
val = this->urand.number();
2627 for (
unsigned int k=0; k<ndot; k++) {
2628 val = this->urand.number();
2629 C1[
i+j*m].fastAccessDx(k) =
val;
2630 C2[
i+j*m].fastAccessDx(k) =
val;
2631 C3[
i+j*m].fastAccessDx(k) =
val;
2636 Teuchos::BLAS<int,FadType> teuchos_blas;
2637 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2638 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2640 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2641 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2642 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2646 unsigned int sz = m*l*(1+ndot) + l*n*(1+ndot) + m*n*(1+ndot);
2647 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2648 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2649 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2654 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2655 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2656 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2657 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2658 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2659 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2665 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2666 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2667 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2668 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2669 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2670 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2676 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2677 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2678 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2679 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2680 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2681 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2689 typedef decltype(this->fad)
FadType;
2695 auto ndot = this->ndot_;
2697 VectorType
A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2698 C4(m*n,ndot), C5(m*n,ndot);
2699 std::vector<ScalarType>
a(m*l);
2700 for (
unsigned int j=0; j<l; j++) {
2701 for (
unsigned int i=0;
i<m;
i++) {
2702 a[
i+j*m] = this->urand.number();
2703 A[
i+j*m] =
a[
i+j*m];
2706 for (
unsigned int j=0; j<n; j++) {
2707 for (
unsigned int i=0;
i<l;
i++) {
2708 B[
i+j*l] =
FadType(ndot, this->urand.number());
2709 for (
unsigned int k=0; k<ndot; k++)
2713 FadType alpha(ndot, this->urand.number());
2714 FadType beta(ndot, this->urand.number());
2715 for (
unsigned int k=0; k<ndot; k++) {
2716 alpha.fastAccessDx(k) = this->urand.number();
2717 beta.fastAccessDx(k) = this->urand.number();
2720 for (
unsigned int j=0; j<n; j++) {
2721 for (
unsigned int i=0;
i<m;
i++) {
2722 ScalarType
val = this->urand.number();
2728 for (
unsigned int k=0; k<ndot; k++) {
2729 val = this->urand.number();
2730 C1[
i+j*m].fastAccessDx(k) =
val;
2731 C2[
i+j*m].fastAccessDx(k) =
val;
2732 C3[
i+j*m].fastAccessDx(k) =
val;
2733 C4[
i+j*m].fastAccessDx(k) =
val;
2734 C5[
i+j*m].fastAccessDx(k) =
val;
2739 Teuchos::BLAS<int,FadType> teuchos_blas;
2740 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2741 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2743 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2744 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2745 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2749 unsigned int sz = m*l + l*n*(1+ndot) + m*n*(1+ndot);
2750 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2751 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2752 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2756 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2757 &
a[0], m, &
B[0], l, beta, &C4[0], m);
2761 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2762 &
a[0], m, &
B[0], l, beta, &C5[0], m);
2767 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2768 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2769 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2770 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2771 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2772 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2773 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2774 &
a[0], l, &
B[0], l, beta, &C4[0], m);
2775 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2776 &
a[0], l, &
B[0], l, beta, &C5[0], m);
2784 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2785 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2786 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2787 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2788 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2789 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2790 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2791 &
a[0], m, &
B[0], n, beta, &C4[0], m);
2792 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2793 &
a[0], m, &
B[0], n, beta, &C5[0], m);
2801 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2802 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2803 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2804 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2805 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2806 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2807 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2808 &
a[0], l, &
B[0], n, beta, &C4[0], m);
2809 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2810 &
a[0], l, &
B[0], n, beta, &C5[0], m);
2820 typedef decltype(this->fad)
FadType;
2826 auto ndot = this->ndot_;
2828 VectorType
A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2829 C4(m*n,ndot), C5(m*n,ndot);
2830 std::vector<ScalarType> b(l*n);
2831 for (
unsigned int j=0; j<l; j++) {
2832 for (
unsigned int i=0;
i<m;
i++) {
2833 A[
i+j*m] =
FadType(ndot, this->urand.number());
2834 for (
unsigned int k=0; k<ndot; k++)
2838 for (
unsigned int j=0; j<n; j++) {
2839 for (
unsigned int i=0;
i<l;
i++) {
2840 b[
i+j*l] = this->urand.number();
2841 B[
i+j*l] = b[
i+j*l];
2844 FadType alpha(ndot, this->urand.number());
2845 FadType beta(ndot, this->urand.number());
2846 for (
unsigned int k=0; k<ndot; k++) {
2847 alpha.fastAccessDx(k) = this->urand.number();
2848 beta.fastAccessDx(k) = this->urand.number();
2851 for (
unsigned int j=0; j<n; j++) {
2852 for (
unsigned int i=0;
i<m;
i++) {
2853 ScalarType
val = this->urand.number();
2859 for (
unsigned int k=0; k<ndot; k++) {
2860 val = this->urand.number();
2861 C1[
i+j*m].fastAccessDx(k) =
val;
2862 C2[
i+j*m].fastAccessDx(k) =
val;
2863 C3[
i+j*m].fastAccessDx(k) =
val;
2864 C4[
i+j*m].fastAccessDx(k) =
val;
2865 C5[
i+j*m].fastAccessDx(k) =
val;
2870 Teuchos::BLAS<int,FadType> teuchos_blas;
2871 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2872 &
A[0], m, &
B[0], l, beta, &C1[0], m);
2874 Teuchos::BLAS<int,FadType> sacado_blas(
false);
2875 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2876 &
A[0], m, &
B[0], l, beta, &C2[0], m);
2880 unsigned int sz = m*l*(1+ndot) + l*n + m*n*(1+ndot);
2881 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
2882 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2883 &
A[0], m, &
B[0], l, beta, &C3[0], m);
2887 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2888 &
A[0], m, &b[0], l, beta, &C4[0], m);
2892 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2893 &
A[0], m, &b[0], l, beta, &C5[0], m);
2898 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2899 &
A[0], l, &
B[0], l, beta, &C1[0], m);
2900 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2901 &
A[0], l, &
B[0], l, beta, &C2[0], m);
2902 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2903 &
A[0], l, &
B[0], l, beta, &C3[0], m);
2904 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2905 &
A[0], l, &b[0], l, beta, &C4[0], m);
2906 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
2907 &
A[0], l, &b[0], l, beta, &C5[0], m);
2915 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2916 &
A[0], m, &
B[0], n, beta, &C1[0], m);
2917 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2918 &
A[0], m, &
B[0], n, beta, &C2[0], m);
2919 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2920 &
A[0], m, &
B[0], n, beta, &C3[0], m);
2921 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2922 &
A[0], m, &b[0], n, beta, &C4[0], m);
2923 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
2924 &
A[0], m, &b[0], n, beta, &C5[0], m);
2932 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2933 &
A[0], l, &
B[0], n, beta, &C1[0], m);
2934 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2935 &
A[0], l, &
B[0], n, beta, &C2[0], m);
2936 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2937 &
A[0], l, &
B[0], n, beta, &C3[0], m);
2938 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2939 &
A[0], l, &b[0], n, beta, &C4[0], m);
2940 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
2941 &
A[0], l, &b[0], n, beta, &C5[0], m);
2951 typedef decltype(this->fad)
FadType;
2957 auto ndot = this->ndot_;
2959 VectorType
A(m*l,ndot),
B(l*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
2960 C4(m*n,ndot), C5(m*n,ndot);
2961 std::vector<ScalarType>
a(m*l), b(l*n);
2962 for (
unsigned int j=0; j<l; j++) {
2963 for (
unsigned int i=0;
i<m;
i++) {
2964 a[
i+j*m] = this->urand.number();
2965 A[
i+j*m] =
a[
i+j*m];
2968 for (
unsigned int j=0; j<n; j++) {
2969 for (
unsigned int i=0;
i<l;
i++) {
2970 b[
i+j*l] = this->urand.number();
2971 B[
i+j*l] = b[
i+j*l];
2974 FadType alpha(ndot, this->urand.number());
2975 FadType beta(ndot, this->urand.number());
2976 for (
unsigned int k=0; k<ndot; k++) {
2977 alpha.fastAccessDx(k) = this->urand.number();
2978 beta.fastAccessDx(k) = this->urand.number();
2981 for (
unsigned int j=0; j<n; j++) {
2982 for (
unsigned int i=0;
i<m;
i++) {
2983 ScalarType
val = this->urand.number();
2989 for (
unsigned int k=0; k<ndot; k++) {
2990 val = this->urand.number();
2991 C1[
i+j*m].fastAccessDx(k) =
val;
2992 C2[
i+j*m].fastAccessDx(k) =
val;
2993 C3[
i+j*m].fastAccessDx(k) =
val;
2994 C4[
i+j*m].fastAccessDx(k) =
val;
2995 C5[
i+j*m].fastAccessDx(k) =
val;
3000 Teuchos::BLAS<int,FadType> teuchos_blas;
3001 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3002 &
A[0], m, &
B[0], l, beta, &C1[0], m);
3004 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3005 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3006 &
A[0], m, &
B[0], l, beta, &C2[0], m);
3010 unsigned int sz = m*l + l*n + m*n*(1+ndot);
3011 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3012 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3013 &
A[0], m, &
B[0], l, beta, &C3[0], m);
3017 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3018 &
a[0], m, &b[0], l, beta, &C4[0], m);
3022 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3023 &
a[0], m, &b[0], l, beta, &C5[0], m);
3028 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3029 &
A[0], l, &
B[0], l, beta, &C1[0], m);
3030 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3031 &
A[0], l, &
B[0], l, beta, &C2[0], m);
3032 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3033 &
A[0], l, &
B[0], l, beta, &C3[0], m);
3034 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3035 &
a[0], l, &b[0], l, beta, &C4[0], m);
3036 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::NO_TRANS, m, n, l, alpha,
3037 &
a[0], l, &b[0], l, beta, &C5[0], m);
3045 teuchos_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3046 &
A[0], m, &
B[0], n, beta, &C1[0], m);
3047 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3048 &
A[0], m, &
B[0], n, beta, &C2[0], m);
3049 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3050 &
A[0], m, &
B[0], n, beta, &C3[0], m);
3051 sacado_blas.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3052 &
a[0], m, &b[0], n, beta, &C4[0], m);
3053 sacado_blas2.GEMM(Teuchos::NO_TRANS, Teuchos::TRANS, m, n, l, alpha,
3054 &
a[0], m, &b[0], n, beta, &C5[0], m);
3062 teuchos_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3063 &
A[0], l, &
B[0], n, beta, &C1[0], m);
3064 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3065 &
A[0], l, &
B[0], n, beta, &C2[0], m);
3066 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3067 &
A[0], l, &
B[0], n, beta, &C3[0], m);
3068 sacado_blas.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3069 &
a[0], l, &b[0], n, beta, &C4[0], m);
3070 sacado_blas2.GEMM(Teuchos::TRANS, Teuchos::TRANS, m, n, l, alpha,
3071 &
a[0], l, &b[0], n, beta, &C5[0], m);
3081 typedef decltype(this->fad)
FadType;
3086 auto ndot = this->ndot_;
3090 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3093 VectorType
A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3094 for (
unsigned int j=0; j<m; j++) {
3095 for (
unsigned int i=0;
i<m;
i++) {
3096 A[
i+j*m] =
FadType(ndot, this->urand.number());
3097 for (
unsigned int k=0; k<ndot; k++)
3101 for (
unsigned int j=0; j<n; j++) {
3102 for (
unsigned int i=0;
i<m;
i++) {
3103 B[
i+j*m] =
FadType(ndot, this->urand.number());
3104 for (
unsigned int k=0; k<ndot; k++)
3108 FadType alpha(ndot, this->urand.number());
3109 FadType beta(ndot, this->urand.number());
3110 for (
unsigned int k=0; k<ndot; k++) {
3111 alpha.fastAccessDx(k) = this->urand.number();
3112 beta.fastAccessDx(k) = this->urand.number();
3115 for (
unsigned int j=0; j<n; j++) {
3116 for (
unsigned int i=0;
i<m;
i++) {
3117 ScalarType
val = this->urand.number();
3121 for (
unsigned int k=0; k<ndot; k++) {
3122 val = this->urand.number();
3123 C1[
i+j*m].fastAccessDx(k) =
val;
3124 C2[
i+j*m].fastAccessDx(k) =
val;
3125 C3[
i+j*m].fastAccessDx(k) =
val;
3130 Teuchos::BLAS<int,FadType> teuchos_blas;
3131 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3132 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3134 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3135 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3136 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3140 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3141 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3142 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3143 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3148 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3149 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3150 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3151 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3152 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3153 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3161 typedef decltype(this->fad)
FadType;
3166 auto ndot = this->ndot_;
3170 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3173 VectorType
A(n*n,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3174 for (
unsigned int j=0; j<n; j++) {
3175 for (
unsigned int i=0;
i<n;
i++) {
3176 A[
i+j*n] =
FadType(ndot, this->urand.number());
3177 for (
unsigned int k=0; k<ndot; k++)
3181 for (
unsigned int j=0; j<n; j++) {
3182 for (
unsigned int i=0;
i<m;
i++) {
3183 B[
i+j*m] =
FadType(ndot, this->urand.number());
3184 for (
unsigned int k=0; k<ndot; k++)
3188 FadType alpha(ndot, this->urand.number());
3189 FadType beta(ndot, this->urand.number());
3190 for (
unsigned int k=0; k<ndot; k++) {
3191 alpha.fastAccessDx(k) = this->urand.number();
3192 beta.fastAccessDx(k) = this->urand.number();
3195 for (
unsigned int j=0; j<n; j++) {
3196 for (
unsigned int i=0;
i<m;
i++) {
3197 ScalarType
val = this->urand.number();
3201 for (
unsigned int k=0; k<ndot; k++) {
3202 val = this->urand.number();
3203 C1[
i+j*m].fastAccessDx(k) =
val;
3204 C2[
i+j*m].fastAccessDx(k) =
val;
3205 C3[
i+j*m].fastAccessDx(k) =
val;
3210 Teuchos::BLAS<int,FadType> teuchos_blas;
3211 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3212 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3214 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3215 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3216 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3220 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3221 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3222 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3223 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3228 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3229 &
A[0], n, &
B[0], m, beta, &C1[0], m);
3230 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3231 &
A[0], n, &
B[0], m, beta, &C2[0], m);
3232 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3233 &
A[0], n, &
B[0], m, beta, &C3[0], m);
3241 typedef decltype(this->fad)
FadType;
3246 auto ndot = this->ndot_;
3250 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3253 unsigned int lda = m+4;
3254 unsigned int ldb = m+5;
3255 unsigned int ldc = m+6;
3256 VectorType
A(lda*m,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3258 for (
unsigned int j=0; j<m; j++) {
3259 for (
unsigned int i=0;
i<lda;
i++) {
3260 A[
i+j*lda] =
FadType(ndot, this->urand.number());
3261 for (
unsigned int k=0; k<ndot; k++)
3265 for (
unsigned int j=0; j<n; j++) {
3266 for (
unsigned int i=0;
i<ldb;
i++) {
3267 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
3268 for (
unsigned int k=0; k<ndot; k++)
3272 FadType alpha(ndot, this->urand.number());
3273 FadType beta(ndot, this->urand.number());
3274 for (
unsigned int k=0; k<ndot; k++) {
3275 alpha.fastAccessDx(k) = this->urand.number();
3276 beta.fastAccessDx(k) = this->urand.number();
3279 for (
unsigned int j=0; j<n; j++) {
3280 for (
unsigned int i=0;
i<ldc;
i++) {
3281 ScalarType
val = this->urand.number();
3285 for (
unsigned int k=0; k<ndot; k++) {
3286 val = this->urand.number();
3287 C1[
i+j*ldc].fastAccessDx(k) =
val;
3288 C2[
i+j*ldc].fastAccessDx(k) =
val;
3289 C3[
i+j*ldc].fastAccessDx(k) =
val;
3294 Teuchos::BLAS<int,FadType> teuchos_blas;
3295 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3296 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3298 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3299 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3300 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3304 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3305 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3306 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3307 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3312 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3313 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3314 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3315 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3316 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3317 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3325 typedef decltype(this->fad)
FadType;
3330 auto ndot = this->ndot_;
3334 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3337 unsigned int lda = n+4;
3338 unsigned int ldb = m+5;
3339 unsigned int ldc = m+6;
3340 VectorType
A(lda*n,ndot),
B(ldb*n,ndot), C1(ldc*n,ndot), C2(ldc*n,ndot),
3342 for (
unsigned int j=0; j<n; j++) {
3343 for (
unsigned int i=0;
i<lda;
i++) {
3344 A[
i+j*lda] =
FadType(ndot, this->urand.number());
3345 for (
unsigned int k=0; k<ndot; k++)
3349 for (
unsigned int j=0; j<n; j++) {
3350 for (
unsigned int i=0;
i<ldb;
i++) {
3351 B[
i+j*ldb] =
FadType(ndot, this->urand.number());
3352 for (
unsigned int k=0; k<ndot; k++)
3356 FadType alpha(ndot, this->urand.number());
3357 FadType beta(ndot, this->urand.number());
3358 for (
unsigned int k=0; k<ndot; k++) {
3359 alpha.fastAccessDx(k) = this->urand.number();
3360 beta.fastAccessDx(k) = this->urand.number();
3363 for (
unsigned int j=0; j<n; j++) {
3364 for (
unsigned int i=0;
i<ldc;
i++) {
3365 ScalarType
val = this->urand.number();
3369 for (
unsigned int k=0; k<ndot; k++) {
3370 val = this->urand.number();
3371 C1[
i+j*ldc].fastAccessDx(k) =
val;
3372 C2[
i+j*ldc].fastAccessDx(k) =
val;
3373 C3[
i+j*ldc].fastAccessDx(k) =
val;
3378 Teuchos::BLAS<int,FadType> teuchos_blas;
3379 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3380 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3382 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3383 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3384 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3388 unsigned int sz = n*n*(1+ndot) + 2*m*n*(1+ndot);
3389 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3390 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3391 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3396 teuchos_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3397 &
A[0], lda, &
B[0], ldb, beta, &C1[0], ldc);
3398 sacado_blas.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3399 &
A[0], lda, &
B[0], ldb, beta, &C2[0], ldc);
3400 sacado_blas2.SYMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3401 &
A[0], lda, &
B[0], ldb, beta, &C3[0], ldc);
3409 typedef decltype(this->fad)
FadType;
3414 auto ndot = this->ndot_;
3418 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3421 VectorType
A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3422 for (
unsigned int j=0; j<m; j++) {
3423 for (
unsigned int i=0;
i<m;
i++) {
3424 A[
i+j*m] =
FadType(ndot, this->urand.number());
3425 for (
unsigned int k=0; k<ndot; k++)
3429 for (
unsigned int j=0; j<n; j++) {
3430 for (
unsigned int i=0;
i<m;
i++) {
3431 B[
i+j*m] =
FadType(ndot, this->urand.number());
3432 for (
unsigned int k=0; k<ndot; k++)
3436 FadType alpha(ndot, this->urand.number());
3437 FadType beta(ndot, this->urand.number());
3438 for (
unsigned int k=0; k<ndot; k++) {
3439 alpha.fastAccessDx(k) = this->urand.number();
3440 beta.fastAccessDx(k) = this->urand.number();
3443 for (
unsigned int j=0; j<n; j++) {
3444 for (
unsigned int i=0;
i<m;
i++) {
3445 ScalarType
val = this->urand.number();
3452 Teuchos::BLAS<int,FadType> teuchos_blas;
3453 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3454 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3456 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3457 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3458 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3462 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3463 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3464 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3465 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3470 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3471 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3472 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3473 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3474 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3475 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3483 typedef decltype(this->fad)
FadType;
3488 auto ndot = this->ndot_;
3492 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3495 VectorType
A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot);
3496 for (
unsigned int j=0; j<m; j++) {
3497 for (
unsigned int i=0;
i<m;
i++) {
3498 A[
i+j*m] =
FadType(ndot, this->urand.number());
3499 for (
unsigned int k=0; k<ndot; k++)
3503 for (
unsigned int j=0; j<n; j++) {
3504 for (
unsigned int i=0;
i<m;
i++) {
3505 B[
i+j*m] =
FadType(ndot, this->urand.number());
3506 for (
unsigned int k=0; k<ndot; k++)
3510 ScalarType alpha = this->urand.number();
3511 ScalarType beta = this->urand.number();
3513 for (
unsigned int j=0; j<n; j++) {
3514 for (
unsigned int i=0;
i<m;
i++) {
3515 ScalarType
val = this->urand.number();
3519 for (
unsigned int k=0; k<ndot; k++) {
3520 val = this->urand.number();
3521 C1[
i+j*m].fastAccessDx(k) =
val;
3522 C2[
i+j*m].fastAccessDx(k) =
val;
3523 C3[
i+j*m].fastAccessDx(k) =
val;
3528 Teuchos::BLAS<int,FadType> teuchos_blas;
3529 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3530 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3532 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3533 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3534 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3538 unsigned int sz = m*m*(1+ndot) + 2*m*n*(1+ndot);
3539 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3540 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3541 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3546 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3547 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3548 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3549 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3550 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3551 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3559 typedef decltype(this->fad)
FadType;
3564 auto ndot = this->ndot_;
3568 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3571 VectorType
A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3572 C4(m*n,ndot), C5(m*n,ndot);
3573 std::vector<ScalarType>
a(m*m);
3574 for (
unsigned int j=0; j<m; j++) {
3575 for (
unsigned int i=0;
i<m;
i++) {
3576 a[
i+j*m] = this->urand.number();
3577 A[
i+j*m] =
a[
i+j*m];
3580 for (
unsigned int j=0; j<n; j++) {
3581 for (
unsigned int i=0;
i<m;
i++) {
3582 B[
i+j*m] =
FadType(ndot, this->urand.number());
3583 for (
unsigned int k=0; k<ndot; k++)
3587 FadType alpha(ndot, this->urand.number());
3588 FadType beta(ndot, this->urand.number());
3589 for (
unsigned int k=0; k<ndot; k++) {
3590 alpha.fastAccessDx(k) = this->urand.number();
3591 beta.fastAccessDx(k) = this->urand.number();
3594 for (
unsigned int j=0; j<n; j++) {
3595 for (
unsigned int i=0;
i<m;
i++) {
3596 ScalarType
val = this->urand.number();
3602 for (
unsigned int k=0; k<ndot; k++) {
3603 val = this->urand.number();
3604 C1[
i+j*m].fastAccessDx(k) =
val;
3605 C2[
i+j*m].fastAccessDx(k) =
val;
3606 C3[
i+j*m].fastAccessDx(k) =
val;
3607 C4[
i+j*m].fastAccessDx(k) =
val;
3608 C5[
i+j*m].fastAccessDx(k) =
val;
3613 Teuchos::BLAS<int,FadType> teuchos_blas;
3614 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3615 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3617 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3618 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3619 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3623 unsigned int sz = m*m + 2*m*n*(1+ndot);
3624 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3625 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3626 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3630 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3631 &
a[0], m, &
B[0], m, beta, &C4[0], m);
3635 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3636 &
a[0], m, &
B[0], m, beta, &C5[0], m);
3641 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3642 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3643 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3644 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3645 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3646 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3647 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3648 &
a[0], m, &
B[0], m, beta, &C4[0], m);
3649 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3650 &
a[0], m, &
B[0], m, beta, &C5[0], m);
3660 typedef decltype(this->fad)
FadType;
3665 auto ndot = this->ndot_;
3669 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3672 VectorType
A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3673 C4(m*n,ndot), C5(m*n,ndot);
3674 std::vector<ScalarType> b(m*n);
3675 for (
unsigned int j=0; j<m; j++) {
3676 for (
unsigned int i=0;
i<m;
i++) {
3677 A[
i+j*m] =
FadType(ndot, this->urand.number());
3678 for (
unsigned int k=0; k<ndot; k++)
3682 for (
unsigned int j=0; j<n; j++) {
3683 for (
unsigned int i=0;
i<m;
i++) {
3684 b[
i+j*m] = this->urand.number();
3685 B[
i+j*m] = b[
i+j*m];
3688 FadType alpha(ndot, this->urand.number());
3689 FadType beta(ndot, this->urand.number());
3690 for (
unsigned int k=0; k<ndot; k++) {
3691 alpha.fastAccessDx(k) = this->urand.number();
3692 beta.fastAccessDx(k) = this->urand.number();
3695 for (
unsigned int j=0; j<n; j++) {
3696 for (
unsigned int i=0;
i<m;
i++) {
3697 ScalarType
val = this->urand.number();
3703 for (
unsigned int k=0; k<ndot; k++) {
3704 val = this->urand.number();
3705 C1[
i+j*m].fastAccessDx(k) =
val;
3706 C2[
i+j*m].fastAccessDx(k) =
val;
3707 C3[
i+j*m].fastAccessDx(k) =
val;
3708 C4[
i+j*m].fastAccessDx(k) =
val;
3709 C5[
i+j*m].fastAccessDx(k) =
val;
3714 Teuchos::BLAS<int,FadType> teuchos_blas;
3715 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3716 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3718 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3719 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3720 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3724 unsigned int sz = m*m*(1+ndot) + m*n*(2+ndot);
3725 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3726 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3727 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3731 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3732 &
A[0], m, &b[0], m, beta, &C4[0], m);
3736 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3737 &
A[0], m, &b[0], m, beta, &C5[0], m);
3742 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3743 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3744 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3745 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3746 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3747 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3748 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3749 &
A[0], m, &b[0], m, beta, &C4[0], m);
3750 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3751 &
A[0], m, &b[0], m, beta, &C5[0], m);
3761 typedef decltype(this->fad)
FadType;
3766 auto ndot = this->ndot_;
3770 if (Teuchos::ScalarTraits<ScalarType>::isComplex)
3773 VectorType
A(m*m,ndot),
B(m*n,ndot), C1(m*n,ndot), C2(m*n,ndot), C3(m*n,ndot),
3774 C4(m*n,ndot), C5(m*n,ndot);
3775 std::vector<ScalarType>
a(m*m), b(m*n);
3776 for (
unsigned int j=0; j<m; j++) {
3777 for (
unsigned int i=0;
i<m;
i++) {
3778 a[
i+j*m] = this->urand.number();
3779 A[
i+j*m] =
a[
i+j*m];
3782 for (
unsigned int j=0; j<n; j++) {
3783 for (
unsigned int i=0;
i<m;
i++) {
3784 b[
i+j*m] = this->urand.number();
3785 B[
i+j*m] = b[
i+j*m];
3788 FadType alpha(ndot, this->urand.number());
3789 FadType beta(ndot, this->urand.number());
3790 for (
unsigned int k=0; k<ndot; k++) {
3791 alpha.fastAccessDx(k) = this->urand.number();
3792 beta.fastAccessDx(k) = this->urand.number();
3795 for (
unsigned int j=0; j<n; j++) {
3796 for (
unsigned int i=0;
i<m;
i++) {
3797 ScalarType
val = this->urand.number();
3803 for (
unsigned int k=0; k<ndot; k++) {
3804 val = this->urand.number();
3805 C1[
i+j*m].fastAccessDx(k) =
val;
3806 C2[
i+j*m].fastAccessDx(k) =
val;
3807 C3[
i+j*m].fastAccessDx(k) =
val;
3808 C4[
i+j*m].fastAccessDx(k) =
val;
3809 C5[
i+j*m].fastAccessDx(k) =
val;
3814 Teuchos::BLAS<int,FadType> teuchos_blas;
3815 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3816 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3818 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3819 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3820 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3824 unsigned int sz = m*m + m*n*(2+ndot);
3825 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3826 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3827 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3831 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3832 &
a[0], m, &b[0], m, beta, &C4[0], m);
3836 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, m, n, alpha,
3837 &
a[0], m, &b[0], m, beta, &C5[0], m);
3842 teuchos_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3843 &
A[0], m, &
B[0], m, beta, &C1[0], m);
3844 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3845 &
A[0], m, &
B[0], m, beta, &C2[0], m);
3846 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3847 &
A[0], m, &
B[0], m, beta, &C3[0], m);
3848 sacado_blas.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3849 &
a[0], m, &b[0], m, beta, &C4[0], m);
3850 sacado_blas2.SYMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, m, n, alpha,
3851 &
a[0], m, &b[0], m, beta, &C5[0], m);
3861 typedef decltype(this->fad)
FadType;
3866 auto ndot = this->ndot_;
3868 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3869 for (
unsigned int j=0; j<m; j++) {
3870 for (
unsigned int i=0;
i<m;
i++) {
3871 A[
i+j*m] =
FadType(ndot, this->urand.number());
3872 for (
unsigned int k=0; k<ndot; k++)
3876 FadType alpha(ndot, this->urand.number());
3877 for (
unsigned int k=0; k<ndot; k++) {
3878 alpha.fastAccessDx(k) = this->urand.number();
3881 for (
unsigned int j=0; j<n; j++) {
3882 for (
unsigned int i=0;
i<m;
i++) {
3883 ScalarType
val = this->urand.number();
3887 for (
unsigned int k=0; k<ndot; k++) {
3888 val = this->urand.number();
3889 B1[
i+j*m].fastAccessDx(k) =
val;
3890 B2[
i+j*m].fastAccessDx(k) =
val;
3891 B3[
i+j*m].fastAccessDx(k) =
val;
3896 Teuchos::BLAS<int,FadType> teuchos_blas;
3897 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3898 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3900 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3901 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3902 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3906 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
3907 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3908 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3909 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3913 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3914 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3915 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3916 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3917 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
3918 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3922 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3923 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3924 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3925 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3926 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
3927 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3931 for (
unsigned int i=0;
i<m;
i++) {
3932 A[
i*m+
i].val() = 1.0;
3933 for (
unsigned int k=0; k<ndot; k++)
3936 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3937 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
3938 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3939 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
3940 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3941 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
3948 typedef decltype(this->fad)
FadType;
3953 auto ndot = this->ndot_;
3955 VectorType
A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
3956 for (
unsigned int j=0; j<n; j++) {
3957 for (
unsigned int i=0;
i<n;
i++) {
3958 A[
i+j*n] =
FadType(ndot, this->urand.number());
3959 for (
unsigned int k=0; k<ndot; k++)
3963 FadType alpha(ndot, this->urand.number());
3964 for (
unsigned int k=0; k<ndot; k++) {
3965 alpha.fastAccessDx(k) = this->urand.number();
3968 for (
unsigned int j=0; j<n; j++) {
3969 for (
unsigned int i=0;
i<m;
i++) {
3970 ScalarType
val = this->urand.number();
3974 for (
unsigned int k=0; k<ndot; k++) {
3975 val = this->urand.number();
3976 B1[
i+j*m].fastAccessDx(k) =
val;
3977 B2[
i+j*m].fastAccessDx(k) =
val;
3978 B3[
i+j*m].fastAccessDx(k) =
val;
3983 Teuchos::BLAS<int,FadType> teuchos_blas;
3984 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3985 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
3987 Teuchos::BLAS<int,FadType> sacado_blas(
false);
3988 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3989 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
3993 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
3994 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
3995 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
3996 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4000 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4001 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4002 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4003 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4004 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4005 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4009 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4010 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4011 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4012 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4013 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4014 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4018 for (
unsigned int i=0;
i<n;
i++) {
4019 A[
i*n+
i].val() = 1.0;
4020 for (
unsigned int k=0; k<ndot; k++)
4023 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4024 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4025 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4026 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4027 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4028 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4035 typedef decltype(this->fad)
FadType;
4040 auto ndot = this->ndot_;
4042 unsigned int lda = m+4;
4043 unsigned int ldb = m+5;
4044 VectorType
A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4045 for (
unsigned int j=0; j<m; j++) {
4046 for (
unsigned int i=0;
i<lda;
i++) {
4047 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4048 for (
unsigned int k=0; k<ndot; k++)
4052 FadType alpha(ndot, this->urand.number());
4053 for (
unsigned int k=0; k<ndot; k++) {
4054 alpha.fastAccessDx(k) = this->urand.number();
4057 for (
unsigned int j=0; j<n; j++) {
4058 for (
unsigned int i=0;
i<ldb;
i++) {
4059 ScalarType
val = this->urand.number();
4063 for (
unsigned int k=0; k<ndot; k++) {
4064 val = this->urand.number();
4065 B1[
i+j*ldb].fastAccessDx(k) =
val;
4066 B2[
i+j*ldb].fastAccessDx(k) =
val;
4067 B3[
i+j*ldb].fastAccessDx(k) =
val;
4072 Teuchos::BLAS<int,FadType> teuchos_blas;
4073 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4074 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4076 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4077 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4078 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4082 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4083 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4084 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4085 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4089 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4090 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4091 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4092 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4093 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4094 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4098 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4099 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4100 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4101 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4102 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4103 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4107 for (
unsigned int i=0;
i<m;
i++) {
4108 A[
i*lda+
i].val() = 1.0;
4109 for (
unsigned int k=0; k<ndot; k++)
4112 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4113 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4114 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4115 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4116 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4117 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4124 typedef decltype(this->fad)
FadType;
4129 auto ndot = this->ndot_;
4131 unsigned int lda = n+4;
4132 unsigned int ldb = m+5;
4133 VectorType
A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4134 for (
unsigned int j=0; j<n; j++) {
4135 for (
unsigned int i=0;
i<lda;
i++) {
4136 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4137 for (
unsigned int k=0; k<ndot; k++)
4141 FadType alpha(ndot, this->urand.number());
4142 for (
unsigned int k=0; k<ndot; k++) {
4143 alpha.fastAccessDx(k) = this->urand.number();
4146 for (
unsigned int j=0; j<n; j++) {
4147 for (
unsigned int i=0;
i<ldb;
i++) {
4148 ScalarType
val = this->urand.number();
4152 for (
unsigned int k=0; k<ndot; k++) {
4153 val = this->urand.number();
4154 B1[
i+j*ldb].fastAccessDx(k) =
val;
4155 B2[
i+j*ldb].fastAccessDx(k) =
val;
4156 B3[
i+j*ldb].fastAccessDx(k) =
val;
4161 Teuchos::BLAS<int,FadType> teuchos_blas;
4162 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4163 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4165 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4166 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4167 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4171 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4172 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4173 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4174 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4178 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4179 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4180 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4181 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4182 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4183 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4187 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4188 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4189 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4190 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4191 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4192 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4196 for (
unsigned int i=0;
i<n;
i++) {
4197 A[
i*lda+
i].val() = 1.0;
4198 for (
unsigned int k=0; k<ndot; k++)
4201 teuchos_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4202 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4203 sacado_blas.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4204 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4205 sacado_blas2.TRMM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4206 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4213 typedef decltype(this->fad)
FadType;
4218 auto ndot = this->ndot_;
4220 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4221 for (
unsigned int j=0; j<m; j++) {
4222 for (
unsigned int i=0;
i<m;
i++) {
4223 A[
i+j*m] =
FadType(ndot, this->urand.number());
4224 for (
unsigned int k=0; k<ndot; k++)
4228 ScalarType alpha = this->urand.number();
4230 for (
unsigned int j=0; j<n; j++) {
4231 for (
unsigned int i=0;
i<m;
i++) {
4232 ScalarType
val = this->urand.number();
4236 for (
unsigned int k=0; k<ndot; k++) {
4237 val = this->urand.number();
4238 B1[
i+j*m].fastAccessDx(k) =
val;
4239 B2[
i+j*m].fastAccessDx(k) =
val;
4240 B3[
i+j*m].fastAccessDx(k) =
val;
4245 Teuchos::BLAS<int,FadType> teuchos_blas;
4246 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4247 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4249 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4250 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4251 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4255 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4256 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4257 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4258 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4262 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4263 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4264 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4265 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4266 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4267 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4271 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4272 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4273 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4274 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4275 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4276 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4280 for (
unsigned int i=0;
i<m;
i++) {
4281 A[
i*m+
i].val() = 1.0;
4282 for (
unsigned int k=0; k<ndot; k++)
4285 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4286 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4287 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4288 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4289 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4290 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4297 typedef decltype(this->fad)
FadType;
4302 auto ndot = this->ndot_;
4304 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4305 for (
unsigned int j=0; j<m; j++) {
4306 for (
unsigned int i=0;
i<m;
i++) {
4307 A[
i+j*m] =
FadType(ndot, this->urand.number());
4308 for (
unsigned int k=0; k<ndot; k++)
4312 FadType alpha(ndot, this->urand.number());
4313 for (
unsigned int k=0; k<ndot; k++) {
4314 alpha.fastAccessDx(k) = this->urand.number();
4317 for (
unsigned int j=0; j<n; j++) {
4318 for (
unsigned int i=0;
i<m;
i++) {
4319 ScalarType
val = this->urand.number();
4326 Teuchos::BLAS<int,FadType> teuchos_blas;
4327 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4328 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4330 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4331 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4332 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4336 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4337 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4338 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4339 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4343 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4344 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4345 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4346 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4347 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4348 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4352 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4353 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4354 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4355 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4356 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4357 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4361 for (
unsigned int i=0;
i<m;
i++) {
4362 A[
i*m+
i].val() = 1.0;
4363 for (
unsigned int k=0; k<ndot; k++)
4366 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4367 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4368 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4369 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4370 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4371 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4378 typedef decltype(this->fad)
FadType;
4383 auto ndot = this->ndot_;
4385 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
4386 B4(m*n,ndot), B5(m*n,ndot);
4387 std::vector<ScalarType>
a(m*m);
4388 for (
unsigned int j=0; j<m; j++) {
4389 for (
unsigned int i=0;
i<m;
i++) {
4390 a[
i+j*m] = this->urand.number();
4391 A[
i+j*m] =
a[
i+j*m];
4394 FadType alpha(ndot, this->urand.number());
4395 for (
unsigned int k=0; k<ndot; k++) {
4396 alpha.fastAccessDx(k) = this->urand.number();
4399 for (
unsigned int j=0; j<n; j++) {
4400 for (
unsigned int i=0;
i<m;
i++) {
4401 ScalarType
val = this->urand.number();
4407 for (
unsigned int k=0; k<ndot; k++) {
4408 val = this->urand.number();
4409 B1[
i+j*m].fastAccessDx(k) =
val;
4410 B2[
i+j*m].fastAccessDx(k) =
val;
4411 B3[
i+j*m].fastAccessDx(k) =
val;
4412 B4[
i+j*m].fastAccessDx(k) =
val;
4413 B5[
i+j*m].fastAccessDx(k) =
val;
4418 Teuchos::BLAS<int,FadType> teuchos_blas;
4419 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4420 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4422 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4423 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4424 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4428 unsigned int sz = m*m + m*n*(1+ndot);
4429 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4430 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4431 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4435 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4436 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4440 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4441 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4445 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4446 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4447 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4448 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4449 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4450 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4451 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4452 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4453 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4454 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4460 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4461 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4462 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4463 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4464 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4465 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4466 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4467 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4468 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4469 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4475 for (
unsigned int i=0;
i<m;
i++) {
4476 A[
i*m+
i].val() = 1.0;
4477 for (
unsigned int k=0; k<ndot; k++)
4480 teuchos_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4481 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4482 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4483 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4484 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4485 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4486 sacado_blas.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4487 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
4488 sacado_blas2.TRMM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4489 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
4498 typedef decltype(this->fad)
FadType;
4503 auto ndot = this->ndot_;
4505 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4506 for (
unsigned int j=0; j<m; j++) {
4507 for (
unsigned int i=0;
i<m;
i++) {
4509 A[
i+j*m] =
FadType(ndot, this->urand.number());
4510 for (
unsigned int k=0; k<ndot; k++)
4514 FadType alpha(ndot, this->urand.number());
4515 for (
unsigned int k=0; k<ndot; k++) {
4516 alpha.fastAccessDx(k) = this->urand.number();
4520 for (
unsigned int j=0; j<n; j++) {
4521 for (
unsigned int i=0;
i<m;
i++) {
4522 ScalarType
val = this->urand.number();
4529 for (
unsigned int k=0; k<ndot; k++) {
4530 val = this->urand.number();
4531 B1[
i+j*m].fastAccessDx(k) =
val;
4532 B2[
i+j*m].fastAccessDx(k) =
val;
4533 B3[
i+j*m].fastAccessDx(k) =
val;
4538 Teuchos::BLAS<int,FadType> teuchos_blas;
4539 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4540 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4542 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4543 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4544 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4548 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4549 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4550 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4551 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4555 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4556 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4557 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4558 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4559 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4560 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4564 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4565 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4566 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4567 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4568 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4569 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4573 for (
unsigned int i=0;
i<m;
i++) {
4574 A[
i*m+
i].val() = 1.0;
4575 for (
unsigned int k=0; k<ndot; k++)
4578 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4579 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4580 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4581 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4582 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4583 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4590 typedef decltype(this->fad)
FadType;
4595 auto ndot = this->ndot_;
4597 VectorType
A(n*n,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4598 for (
unsigned int j=0; j<n; j++) {
4599 for (
unsigned int i=0;
i<n;
i++) {
4600 A[
i+j*n] =
FadType(ndot, this->urand.number());
4601 for (
unsigned int k=0; k<ndot; k++)
4605 FadType alpha(ndot, this->urand.number());
4606 for (
unsigned int k=0; k<ndot; k++) {
4607 alpha.fastAccessDx(k) = this->urand.number();
4610 for (
unsigned int j=0; j<n; j++) {
4611 for (
unsigned int i=0;
i<m;
i++) {
4612 ScalarType
val = this->urand.number();
4616 for (
unsigned int k=0; k<ndot; k++) {
4617 val = this->urand.number();
4618 B1[
i+j*m].fastAccessDx(k) =
val;
4619 B2[
i+j*m].fastAccessDx(k) =
val;
4620 B3[
i+j*m].fastAccessDx(k) =
val;
4625 Teuchos::BLAS<int,FadType> teuchos_blas;
4626 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4627 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4629 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4630 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4631 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4635 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4636 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4637 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4638 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4642 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4643 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4644 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4645 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4646 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4647 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4651 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4652 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4653 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4654 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4655 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4656 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4660 for (
unsigned int i=0;
i<n;
i++) {
4661 A[
i*n+
i].val() = 1.0;
4662 for (
unsigned int k=0; k<ndot; k++)
4665 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4666 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B1[0], m);
4667 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4668 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B2[0], m);
4669 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4670 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], n, &B3[0], m);
4677 typedef decltype(this->fad)
FadType;
4682 auto ndot = this->ndot_;
4684 unsigned int lda = m+4;
4685 unsigned int ldb = m+5;
4686 VectorType
A(lda*m,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4687 for (
unsigned int j=0; j<m; j++) {
4688 for (
unsigned int i=0;
i<lda;
i++) {
4689 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4690 for (
unsigned int k=0; k<ndot; k++)
4694 FadType alpha(ndot, this->urand.number());
4695 for (
unsigned int k=0; k<ndot; k++) {
4696 alpha.fastAccessDx(k) = this->urand.number();
4699 for (
unsigned int j=0; j<n; j++) {
4700 for (
unsigned int i=0;
i<ldb;
i++) {
4701 ScalarType
val = this->urand.number();
4705 for (
unsigned int k=0; k<ndot; k++) {
4706 val = this->urand.number();
4707 B1[
i+j*ldb].fastAccessDx(k) =
val;
4708 B2[
i+j*ldb].fastAccessDx(k) =
val;
4709 B3[
i+j*ldb].fastAccessDx(k) =
val;
4714 Teuchos::BLAS<int,FadType> teuchos_blas;
4715 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4716 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4718 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4719 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4720 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4724 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4725 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4726 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4727 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4731 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4732 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4733 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4734 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4735 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4736 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4740 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4741 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4742 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4743 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4744 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4745 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4749 for (
unsigned int i=0;
i<m;
i++) {
4750 A[
i*lda+
i].val() = 1.0;
4751 for (
unsigned int k=0; k<ndot; k++)
4754 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4755 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4756 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4757 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4758 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4759 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4766 typedef decltype(this->fad)
FadType;
4771 auto ndot = this->ndot_;
4773 unsigned int lda = n+4;
4774 unsigned int ldb = m+5;
4775 VectorType
A(lda*n,ndot), B1(ldb*n,ndot), B2(ldb*n,ndot), B3(ldb*n,ndot);
4776 for (
unsigned int j=0; j<n; j++) {
4777 for (
unsigned int i=0;
i<lda;
i++) {
4778 A[
i+j*lda] =
FadType(ndot, this->urand.number());
4779 for (
unsigned int k=0; k<ndot; k++)
4783 FadType alpha(ndot, this->urand.number());
4784 for (
unsigned int k=0; k<ndot; k++) {
4785 alpha.fastAccessDx(k) = this->urand.number();
4788 for (
unsigned int j=0; j<n; j++) {
4789 for (
unsigned int i=0;
i<ldb;
i++) {
4790 ScalarType
val = this->urand.number();
4794 for (
unsigned int k=0; k<ndot; k++) {
4795 val = this->urand.number();
4796 B1[
i+j*ldb].fastAccessDx(k) =
val;
4797 B2[
i+j*ldb].fastAccessDx(k) =
val;
4798 B3[
i+j*ldb].fastAccessDx(k) =
val;
4803 Teuchos::BLAS<int,FadType> teuchos_blas;
4804 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4805 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4807 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4808 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4809 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4813 unsigned int sz = n*n*(1+ndot) + m*n*(1+ndot);
4814 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4815 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4816 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4820 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4821 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4822 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4823 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4824 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4825 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4829 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4830 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4831 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4832 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4833 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4834 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4838 for (
unsigned int i=0;
i<n;
i++) {
4839 A[
i*lda+
i].val() = 1.0;
4840 for (
unsigned int k=0; k<ndot; k++)
4843 teuchos_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4844 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B1[0], ldb);
4845 sacado_blas.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4846 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B2[0], ldb);
4847 sacado_blas2.TRSM(Teuchos::RIGHT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4848 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], lda, &B3[0], ldb);
4855 typedef decltype(this->fad)
FadType;
4860 auto ndot = this->ndot_;
4862 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4863 for (
unsigned int j=0; j<m; j++) {
4864 for (
unsigned int i=0;
i<m;
i++) {
4865 A[
i+j*m] =
FadType(ndot, this->urand.number());
4866 for (
unsigned int k=0; k<ndot; k++)
4870 ScalarType alpha = this->urand.number();
4872 for (
unsigned int j=0; j<n; j++) {
4873 for (
unsigned int i=0;
i<m;
i++) {
4874 ScalarType
val = this->urand.number();
4878 for (
unsigned int k=0; k<ndot; k++) {
4879 val = this->urand.number();
4880 B1[
i+j*m].fastAccessDx(k) =
val;
4881 B2[
i+j*m].fastAccessDx(k) =
val;
4882 B3[
i+j*m].fastAccessDx(k) =
val;
4887 Teuchos::BLAS<int,FadType> teuchos_blas;
4888 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4889 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4891 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4892 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4893 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4897 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4898 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4899 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4900 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4904 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4905 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4906 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4907 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4908 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4909 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4913 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4914 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4915 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4916 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4917 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4918 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4922 for (
unsigned int i=0;
i<m;
i++) {
4923 A[
i*m+
i].val() = 1.0;
4924 for (
unsigned int k=0; k<ndot; k++)
4927 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4928 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4929 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4930 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4931 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4932 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4939 typedef decltype(this->fad)
FadType;
4944 auto ndot = this->ndot_;
4946 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot);
4947 for (
unsigned int j=0; j<m; j++) {
4948 for (
unsigned int i=0;
i<m;
i++) {
4949 A[
i+j*m] =
FadType(ndot, this->urand.number());
4950 for (
unsigned int k=0; k<ndot; k++)
4954 FadType alpha(ndot, this->urand.number());
4955 for (
unsigned int k=0; k<ndot; k++) {
4956 alpha.fastAccessDx(k) = this->urand.number();
4959 for (
unsigned int j=0; j<n; j++) {
4960 for (
unsigned int i=0;
i<m;
i++) {
4961 ScalarType
val = this->urand.number();
4968 Teuchos::BLAS<int,FadType> teuchos_blas;
4969 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4970 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4972 Teuchos::BLAS<int,FadType> sacado_blas(
false);
4973 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4974 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4978 unsigned int sz = m*m*(1+ndot) + m*n*(1+ndot);
4979 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
4980 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
4981 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4985 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4986 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4987 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4988 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4989 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
4990 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
4994 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4995 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
4996 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4997 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
4998 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
4999 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5003 for (
unsigned int i=0;
i<m;
i++) {
5004 A[
i*m+
i].val() = 1.0;
5005 for (
unsigned int k=0; k<ndot; k++)
5008 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5009 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5010 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5011 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5012 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5013 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5020 typedef decltype(this->fad)
FadType;
5025 auto ndot = this->ndot_;
5027 VectorType
A(m*m,ndot), B1(m*n,ndot), B2(m*n,ndot), B3(m*n,ndot),
5028 B4(m*n,ndot), B5(m*n,ndot);
5029 std::vector<ScalarType>
a(m*m);
5030 for (
unsigned int j=0; j<m; j++) {
5031 for (
unsigned int i=0;
i<m;
i++) {
5032 a[
i+j*m] = this->urand.number();
5033 A[
i+j*m] =
a[
i+j*m];
5036 FadType alpha(ndot, this->urand.number());
5037 for (
unsigned int k=0; k<ndot; k++) {
5038 alpha.fastAccessDx(k) = this->urand.number();
5041 for (
unsigned int j=0; j<n; j++) {
5042 for (
unsigned int i=0;
i<m;
i++) {
5043 ScalarType
val = this->urand.number();
5049 for (
unsigned int k=0; k<ndot; k++) {
5050 val = this->urand.number();
5051 B1[
i+j*m].fastAccessDx(k) =
val;
5052 B2[
i+j*m].fastAccessDx(k) =
val;
5053 B3[
i+j*m].fastAccessDx(k) =
val;
5054 B4[
i+j*m].fastAccessDx(k) =
val;
5055 B5[
i+j*m].fastAccessDx(k) =
val;
5060 Teuchos::BLAS<int,FadType> teuchos_blas;
5061 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5062 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5064 Teuchos::BLAS<int,FadType> sacado_blas(
false);
5065 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5066 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5070 unsigned int sz = m*m + m*n*(1+ndot);
5071 Teuchos::BLAS<int,FadType> sacado_blas2(
false,
false,sz);
5072 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5073 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5077 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5078 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5082 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5083 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
5087 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5088 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5089 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5090 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5091 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5092 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5093 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5094 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5095 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::NO_TRANS,
5096 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
5102 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5103 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5104 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5105 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5106 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5107 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5108 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5109 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5110 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::TRANS,
5111 Teuchos::NON_UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
5117 for (
unsigned int i=0;
i<m;
i++) {
5118 A[
i*m+
i].val() = 1.0;
5119 for (
unsigned int k=0; k<ndot; k++)
5122 teuchos_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5123 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B1[0], m);
5124 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5125 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B2[0], m);
5126 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5127 Teuchos::UNIT_DIAG, m, n, alpha, &
A[0], m, &B3[0], m);
5128 sacado_blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5129 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B4[0], m);
5130 sacado_blas2.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
5131 Teuchos::UNIT_DIAG, m, n, alpha, &
a[0], m, &B5[0], m);
#define COMPARE_FAD_VECTORS(X1, X2, n)
TYPED_TEST_P(FadBLASUnitTests, testSCAL1)
TYPED_TEST_SUITE_P(FadBLASUnitTests)
REGISTER_TYPED_TEST_SUITE_P(FadBLASUnitTests, testSCAL1, testSCAL2, testSCAL3, testSCAL4, testCOPY1, testCOPY2, testCOPY3, testCOPY4, testAXPY1, testAXPY2, testAXPY3, testAXPY4, testDOT1, testDOT2, testDOT3, testDOT4, testNRM21, testNRM22, testGEMV1, testGEMV2, testGEMV3, testGEMV4, testGEMV5, testGEMV6, testGEMV7, testGEMV8, testGEMV9, testTRMV1, testTRMV2, testTRMV3, testTRMV4, testGER1, testGER2, testGER3, testGER4, testGER5, testGER6, testGER7, testGEMM1, testGEMM2, testGEMM3, testGEMM4, testGEMM5, testGEMM6, testGEMM7, testGEMM8, testGEMM9, testGEMM10, testSYMM1, testSYMM2, testSYMM3, testSYMM4, testSYMM5, testSYMM6, testSYMM7, testSYMM8, testSYMM9, testTRMM1, testTRMM2, testTRMM3, testTRMM4, testTRMM5, testTRMM6, testTRMM7, testTRSM1, testTRSM2, testTRSM3, testTRSM4, testTRSM5, testTRSM6, testTRSM7)
#define COMPARE_FADS(a, b)
expr expr expr fastAccessDx(i)) FAD_UNARYOP_MACRO(exp
Sacado::Fad::DFad< double > FadType
Sacado::Fad::Vector< unsigned int, FadType > VectorType
Sacado::Random< double > real_urand
Sacado::Random< ScalarType > urand
Sacado::ScalarType< FadType >::type ScalarType
A class for storing a contiguously allocated array of Fad objects. This is a general definition that ...
A random number generator that generates random numbers uniformly distributed in the interval (a,...