Sacado Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
fenl_assembly_view/TestAssembly.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Stokhos Package
5// Copyright (2009) Sandia Corporation
6//
7// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8// license for use of this work by or on behalf of the U.S. Government.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38//
39// ***********************************************************************
40// @HEADER
41#include <iostream>
42
43// Utilities
44#include "Teuchos_TestingHelpers.hpp"
45#include "Teuchos_VerboseObject.hpp"
46
47// FENL
48#include <BoxElemFixture.hpp>
49#include <fenl_functors.hpp>
50
51struct Perf {
52 size_t global_elem_count ;
53 size_t global_node_count ;
54 double fill_time ;
55
58 fill_time(0) {}
59
60 void increment(const Perf& p) {
61 global_elem_count = p.global_elem_count;
62 global_node_count = p.global_node_count;
63 fill_time += p.fill_time;
64 }
65
66 void scale(double s) {
67 fill_time *= s;
68 }
69};
70
71template <typename Scalar, typename Device,
75 const int use_print ,
76 const int use_trials ,
77 const int use_nodes[] ,
78 Kokkos::View< Scalar* , Device >& residual,
80{
81 using Teuchos::RCP;
82 using Teuchos::rcp;
83 using Teuchos::rcpFromRef;
84 using Teuchos::arrayView;
85
87
89 typedef typename LocalMatrixType::StaticCrsGraphType LocalGraphType ;
90
92
94
95 typedef typename ElementComputationType::vector_type VectorType ;
96
97 //------------------------------------
98
99 // Decompose by node to avoid parallel communication in assembly
100
101 const double bubble_x = 1.0 ;
102 const double bubble_y = 1.0 ;
103 const double bubble_z = 1.0 ;
104
105 const FixtureType fixture( Kokkos::Example::BoxElemPart::DecomposeNode ,
106 1 , 0 ,
107 use_nodes[0] , use_nodes[1] , use_nodes[2] ,
108 bubble_x , bubble_y , bubble_z );
109
110 //------------------------------------
111
112 Kokkos::Timer wall_clock ;
113
114 Perf perf_stats = Perf() ;
115
116 for ( int itrial = 0 ; itrial < use_trials ; ++itrial ) {
117
118 Perf perf = Perf() ;
119
120 perf.global_elem_count = fixture.elem_count_global();
121 perf.global_node_count = fixture.node_count_global();
122
123 //----------------------------------
124 // Create the local sparse matrix graph and element-to-graph map
125 // from the element->to->node identifier array.
126 // The graph only has rows for the owned nodes.
127
128 typename NodeNodeGraphType::Times graph_times;
129 const NodeNodeGraphType
130 mesh_to_graph( fixture.elem_node() , fixture.node_count_owned(),
131 graph_times );
132
133 // Create the local sparse matrix from the graph:
134 jacobian = LocalMatrixType( mesh_to_graph.graph );
135
136 //----------------------------------
137
138 // Allocate solution vector for each node in the mesh and residual vector for each owned node
139 VectorType solution( "solution" , fixture.node_count() );
140 residual = VectorType( "residual" , fixture.node_count_owned() );
141
142 // Create element computation functor
143 const ElementComputationType elemcomp( fixture , solution ,
144 mesh_to_graph.elem_graph ,
145 jacobian , residual );
146
147 Kokkos::deep_copy( solution , Scalar(1.2345) );
148
149 //--------------------------------
150 // Element contributions to residual and jacobian
151
152 Kokkos::deep_copy( residual , Scalar(0) );
153 Kokkos::deep_copy( jacobian.coeff , Scalar(0) );
154
155 wall_clock.reset();
156
157 elemcomp.apply();
158
159 Device().fence();
160 perf.fill_time = wall_clock.seconds();
161
162 //--------------------------------
163
164 perf_stats.increment(perf);
165
166 }
167
168 return perf_stats ;
169}
170
171template<class ValueType>
172bool compareValues(const ValueType& a1,
173 const std::string& a1_name,
174 const ValueType&a2,
175 const std::string& a2_name,
176 const ValueType& rel_tol, const ValueType& abs_tol,
177 Teuchos::FancyOStream& out)
178{
179 bool success = true;
180
181 ValueType err = std::abs(a1 - a2);
182 ValueType tol = abs_tol + rel_tol*std::max(std::abs(a1),std::abs(a2));
183 if (err > tol) {
184 out << "\nError, relErr(" << a1_name <<","
185 << a2_name << ") = relErr(" << a1 <<"," << a2 <<") = "
186 << err << " <= tol = " << tol << ": failed!\n";
187 success = false;
188 }
189
190 return success;
191}
192
193template <typename VectorType, typename MatrixType>
194bool check_assembly(const VectorType& analytic_residual,
195 const MatrixType& analytic_jacobian,
196 const VectorType& fad_residual,
197 const MatrixType& fad_jacobian,
198 const std::string& test_name)
199{
200 const double tol = 1e-14;
201 bool success = true;
202 Teuchos::RCP<Teuchos::FancyOStream> out =
203 Teuchos::VerboseObjectBase::getDefaultOStream();
204 std::stringstream buf;
205 Teuchos::FancyOStream fbuf(Teuchos::rcp(&buf,false));
206
207 typename VectorType::HostMirror host_analytic_residual =
208 Kokkos::create_mirror_view(analytic_residual);
209 typename VectorType::HostMirror host_fad_residual =
210 Kokkos::create_mirror_view(fad_residual);
211 Kokkos::deep_copy( host_analytic_residual, analytic_residual );
212 Kokkos::deep_copy( host_fad_residual, fad_residual );
213
214 fbuf << test_name << ":" << std::endl;
215
216 if (host_analytic_residual.extent(0) != host_fad_residual.extent(0)) {
217 fbuf << "Analytic residual dimension "
218 << host_analytic_residual.extent(0)
219 << " does not match Fad residual dimension "
220 << host_fad_residual.extent(0) << std::endl;
221 success = false;
222 }
223 else {
224 const size_t num_node = host_analytic_residual.extent(0);
225 for (size_t i=0; i<num_node; ++i) {
226 success = success && compareValues(
227 host_analytic_residual(i), "analytic residual",
228 host_fad_residual(i), "Fad residual",
229 tol, tol, fbuf );
230 }
231 }
232
233 typename MatrixType::HostMirror host_analytic_jacobian =
234 Kokkos::create_mirror_view(analytic_jacobian);
235 typename MatrixType::HostMirror host_fad_jacobian =
236 Kokkos::create_mirror_view(fad_jacobian);
237 Kokkos::deep_copy( host_analytic_jacobian, analytic_jacobian );
238 Kokkos::deep_copy( host_fad_jacobian, fad_jacobian );
239
240 if (host_analytic_jacobian.extent(0) != host_fad_jacobian.extent(0)) {
241 fbuf << "Analytic Jacobian dimension "
242 << host_analytic_jacobian.extent(0)
243 << " does not match Fad Jacobian dimension "
244 << host_fad_jacobian.extent(0) << std::endl;
245 success = false;
246 }
247 else {
248 const size_t num_entry = host_analytic_jacobian.extent(0);
249 for (size_t i=0; i<num_entry; ++i) {
250 success = success && compareValues(
251 host_analytic_jacobian(i), "analytic Jacobian",
252 host_fad_jacobian(i), "Fad Jacobian",
253 tol, tol, fbuf );
254 }
255 }
256
257 if (!success)
258 *out << buf.str();
259
260 return success;
261}
262
263template <class Device>
265 const int use_print ,
266 const int use_trials ,
267 const int n_begin ,
268 const int n_end ,
269 const int n_step ,
270 const bool quadratic ,
271 const bool check )
272{
278
279 std::cout.precision(8);
280 std::cout << std::endl
281 << "\"Grid Size\" , "
282 << "\"FEM Size\" , "
283 << "\"Analytic Fill Time\" , "
284 << "\"Fad Element Fill Slowdown\" , "
285 << "\"Fad Optimized Element Fill Slowdown\" , "
286 << "\"Fad QP Fill Slowdown\" , "
287 << std::endl;
288
289 typedef Kokkos::View< double* , Device > vector_type ;
291 vector_type analytic_residual, fad_residual, fad_opt_residual,
292 fad_qp_residual;
293 matrix_type analytic_jacobian, fad_jacobian, fad_opt_jacobian,
294 fad_qp_jacobian;
295
296 for (int n=n_begin; n<=n_end; n+=n_step) {
297 const int use_nodes[] = { n, n, n };
298 Perf perf_analytic, perf_fad, perf_fad_opt, perf_fad_qp;
299
300 if (quadratic) {
301 perf_analytic =
302 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,Analytic>(
303 use_print, use_trials, use_nodes,
304 analytic_residual, analytic_jacobian );
305
306 perf_fad =
307 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElement>(
308 use_print, use_trials, use_nodes,
309 fad_residual, fad_jacobian);
310
311 perf_fad_opt =
312 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadElementOptimized>(
313 use_print, use_trials, use_nodes,
314 fad_opt_residual, fad_opt_jacobian);
315
316 perf_fad_qp =
317 fenl_assembly<double,Device,BoxElemPart::ElemQuadratic,FadQuadPoint>(
318 use_print, use_trials, use_nodes,
319 fad_qp_residual, fad_qp_jacobian);
320 }
321 else {
322 perf_analytic =
323 fenl_assembly<double,Device,BoxElemPart::ElemLinear,Analytic>(
324 use_print, use_trials, use_nodes,
325 analytic_residual, analytic_jacobian );
326
327 perf_fad =
328 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElement>(
329 use_print, use_trials, use_nodes,
330 fad_residual, fad_jacobian);
331
332 perf_fad_opt =
333 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadElementOptimized>(
334 use_print, use_trials, use_nodes,
335 fad_opt_residual, fad_opt_jacobian);
336
337 perf_fad_qp =
338 fenl_assembly<double,Device,BoxElemPart::ElemLinear,FadQuadPoint>(
339 use_print, use_trials, use_nodes,
340 fad_qp_residual, fad_qp_jacobian);
341 }
342 if (check) {
343 check_assembly( analytic_residual, analytic_jacobian.coeff,
344 fad_residual, fad_jacobian.coeff,
345 "Fad" );
346 check_assembly( analytic_residual, analytic_jacobian.coeff,
347 fad_opt_residual, fad_opt_jacobian.coeff,
348 "Optimized Fad" );
349 check_assembly( analytic_residual, analytic_jacobian.coeff,
350 fad_qp_residual, fad_qp_jacobian.coeff,
351 "QP Fad" );
352 }
353
354 double s =
355 1000.0 / ( use_trials * perf_analytic.global_elem_count );
356 perf_analytic.scale(s);
357 perf_fad.scale(s);
358 perf_fad_opt.scale(s);
359 perf_fad_qp.scale(s);
360
361 std::cout.precision(3);
362 std::cout << n << " , "
363 << perf_analytic.global_node_count << " , "
364 << std::setw(2)
365 << std::scientific
366 << perf_analytic.fill_time << " , "
367 << std::fixed << std::setw(6)
368 << perf_fad.fill_time / perf_analytic.fill_time << " , "
369 << perf_fad_opt.fill_time / perf_analytic.fill_time << " , "
370 << perf_fad_qp.fill_time / perf_analytic.fill_time << " , "
371 << std::endl;
372 }
373}
Generate a distributed unstructured finite element mesh from a partitioned NX*NY*NZ box of elements.
Partition a box of hexahedral elements among subdomains.
int check(Epetra_CrsGraph &A, int NumMyRows1, int NumGlobalRows1, int NumMyNonzeros1, int NumGlobalNonzeros1, int *MyGlobalElements, bool verbose)
Perf fenl_assembly(const int use_print, const int use_trials, const int use_nodes[], Kokkos::View< Scalar *, Device > &residual, Kokkos::Example::FENL::CrsMatrix< Scalar, Device > &jacobian)
bool compareValues(const ValueType &a1, const std::string &a1_name, const ValueType &a2, const std::string &a2_name, const ValueType &rel_tol, const ValueType &abs_tol, Teuchos::FancyOStream &out)
void performance_test_driver(const int use_print, const int use_trials, const int n_begin, const int n_end, const int n_step, const bool quadratic, const bool check)
bool check_assembly(const VectorType &analytic_residual, const MatrixType &analytic_jacobian, const VectorType &fad_residual, const MatrixType &fad_jacobian, const std::string &test_name)
const char * p
#define Method
void increment(const Perf &p)
const double tol