Ifpack2 Templated Preconditioning Package Version 1.0
Loading...
Searching...
No Matches
Ifpack2_Details_ScaledDampedResidual_def.hpp
1/*
2//@HEADER
3// ***********************************************************************
4//
5// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
6// Copyright (2009) Sandia Corporation
7//
8// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9// license for use of this work by or on behalf of the U.S. Government.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// ***********************************************************************
39//@HEADER
40*/
41
42#ifndef IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
43#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
44
45#include "Tpetra_CrsMatrix.hpp"
46#include "Tpetra_MultiVector.hpp"
47#include "Tpetra_Operator.hpp"
48#include "Tpetra_Vector.hpp"
49#include "Tpetra_Export_decl.hpp"
50#include "Tpetra_Import_decl.hpp"
51#include "Kokkos_ArithTraits.hpp"
52#include "Teuchos_Assert.hpp"
53#include <type_traits>
54#include "KokkosSparse_spmv_impl.hpp"
55
56namespace Ifpack2 {
57namespace Details {
58namespace Impl {
59
64template<class WVector,
65 class DVector,
66 class BVector,
67 class AMatrix,
68 class XVector,
69 class Scalar,
70 bool use_beta>
72 static_assert (static_cast<int> (WVector::Rank) == 1,
73 "WVector must be a rank 1 View.");
74 static_assert (static_cast<int> (DVector::Rank) == 1,
75 "DVector must be a rank 1 View.");
76 static_assert (static_cast<int> (BVector::Rank) == 1,
77 "BVector must be a rank 1 View.");
78 static_assert (static_cast<int> (XVector::Rank) == 1,
79 "XVector must be a rank 1 View.");
80
81 using execution_space = typename AMatrix::execution_space;
82 using LO = typename AMatrix::non_const_ordinal_type;
83 using value_type = typename AMatrix::non_const_value_type;
84 using team_policy = typename Kokkos::TeamPolicy<execution_space>;
85 using team_member = typename team_policy::member_type;
86 using ATV = Kokkos::ArithTraits<value_type>;
87
88 const Scalar alpha;
89 WVector m_w;
90 DVector m_d;
91 BVector m_b;
92 AMatrix m_A;
93 XVector m_x;
94 const Scalar beta;
95
96 const LO rows_per_team;
97
98 ScaledDampedResidualVectorFunctor (const Scalar& alpha_,
99 const WVector& m_w_,
100 const DVector& m_d_,
101 const BVector& m_b_,
102 const AMatrix& m_A_,
103 const XVector& m_x_,
104 const Scalar& beta_,
105 const int rows_per_team_) :
106 alpha (alpha_),
107 m_w (m_w_),
108 m_d (m_d_),
109 m_b (m_b_),
110 m_A (m_A_),
111 m_x (m_x_),
112 beta (beta_),
113 rows_per_team (rows_per_team_)
114 {
115 const size_t numRows = m_A.numRows ();
116 const size_t numCols = m_A.numCols ();
117
118 TEUCHOS_ASSERT( m_w.extent (0) == m_d.extent (0) );
119 TEUCHOS_ASSERT( m_w.extent (0) == m_b.extent (0) );
120 TEUCHOS_ASSERT( numRows == size_t (m_w.extent (0)) );
121 TEUCHOS_ASSERT( numCols <= size_t (m_x.extent (0)) );
122 }
123
124 KOKKOS_INLINE_FUNCTION
125 void operator() (const team_member& dev) const
126 {
127 using residual_value_type = typename BVector::non_const_value_type;
128 using KAT = Kokkos::ArithTraits<residual_value_type>;
129
130 Kokkos::parallel_for
131 (Kokkos::TeamThreadRange (dev, 0, rows_per_team),
132 [&] (const LO& loop) {
133 const LO lclRow =
134 static_cast<LO> (dev.league_rank ()) * rows_per_team + loop;
135 if (lclRow >= m_A.numRows ()) {
136 return;
137 }
138 const auto A_row = m_A.rowConst(lclRow);
139 const LO row_length = static_cast<LO> (A_row.length);
140 residual_value_type A_x = KAT::zero ();
141
142 Kokkos::parallel_reduce
143 (Kokkos::ThreadVectorRange (dev, row_length),
144 [&] (const LO iEntry, residual_value_type& lsum) {
145 const auto A_val = A_row.value(iEntry);
146 lsum += A_val * m_x(A_row.colidx(iEntry));
147 }, A_x);
148
149 Kokkos::single
150 (Kokkos::PerThread(dev),
151 [&] () {
152 const auto alpha_D_res =
153 alpha * m_d(lclRow) * (m_b(lclRow) - A_x);
154 if (use_beta) {
155 m_w(lclRow) = beta * m_w(lclRow) + alpha_D_res;
156 }
157 else {
158 m_w(lclRow) = alpha_D_res;
159 }
160 });
161 });
162 }
163
164};
165
166
167// W := alpha * D * (B - A*X) + beta * W.
168template<class WVector,
169 class DVector,
170 class BVector,
171 class AMatrix,
172 class XVector,
173 class Scalar>
174static void
175scaled_damped_residual_vector
176(const Scalar& alpha,
177 const WVector& w,
178 const DVector& d,
179 const BVector& b,
180 const AMatrix& A,
181 const XVector& x,
182 const Scalar& beta)
183{
184 using execution_space = typename AMatrix::execution_space;
185
186 if (A.numRows () == 0) {
187 return;
188 }
189
190 int team_size = -1;
191 int vector_length = -1;
192 int64_t rows_per_thread = -1;
193
194 const int64_t rows_per_team = KokkosSparse::Impl::spmv_launch_parameters<execution_space>(A.numRows(), A.nnz(), rows_per_thread, team_size, vector_length);
195 int64_t worksets = (b.extent (0) + rows_per_team - 1) / rows_per_team;
196
197 using Kokkos::Dynamic;
198 using Kokkos::Static;
199 using Kokkos::Schedule;
200 using Kokkos::TeamPolicy;
201 using policy_type_dynamic = TeamPolicy<execution_space, Schedule<Dynamic> >;
202 using policy_type_static = TeamPolicy<execution_space, Schedule<Static> >;
203 const char kernel_label[] = "scaled_damped_residual_vector";
204 policy_type_dynamic policyDynamic (1, 1);
205 policy_type_static policyStatic (1, 1);
206 if (team_size < 0) {
207 policyDynamic = policy_type_dynamic (worksets, Kokkos::AUTO, vector_length);
208 policyStatic = policy_type_static (worksets, Kokkos::AUTO, vector_length);
209 }
210 else {
211 policyDynamic = policy_type_dynamic (worksets, team_size, vector_length);
212 policyStatic = policy_type_static (worksets, team_size, vector_length);
213 }
214
215 // Canonicalize template arguments to avoid redundant instantiations.
216 using w_vec_type = typename WVector::non_const_type;
217 using d_vec_type = typename DVector::const_type;
218 using b_vec_type = typename BVector::const_type;
219 using matrix_type = AMatrix;
220 using x_vec_type = typename XVector::const_type;
221 using scalar_type = typename Kokkos::ArithTraits<Scalar>::val_type;
222
223 if (beta == Kokkos::ArithTraits<Scalar>::zero ()) {
224 constexpr bool use_beta = false;
225 using functor_type =
226 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
227 b_vec_type, matrix_type,
228 x_vec_type, scalar_type,
229 use_beta>;
230 functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
231 if(A.nnz()>10000000)
232 Kokkos::parallel_for (kernel_label, policyDynamic, func);
233 else
234 Kokkos::parallel_for (kernel_label, policyStatic, func);
235 }
236 else {
237 constexpr bool use_beta = true;
238 using functor_type =
239 ScaledDampedResidualVectorFunctor<w_vec_type, d_vec_type,
240 b_vec_type, matrix_type,
241 x_vec_type, scalar_type,
242 use_beta>;
243 functor_type func (alpha, w, d, b, A, x, beta, rows_per_team);
244 if(A.nnz()>10000000)
245 Kokkos::parallel_for (kernel_label, policyDynamic, func);
246 else
247 Kokkos::parallel_for (kernel_label, policyStatic, func);
248 }
249}
250
251} // namespace Impl
252
253template<class TpetraOperatorType>
254ScaledDampedResidual<TpetraOperatorType>::
255ScaledDampedResidual (const Teuchos::RCP<const operator_type>& A)
256{
257 setMatrix (A);
258}
259
260template<class TpetraOperatorType>
261void
262ScaledDampedResidual<TpetraOperatorType>::
263setMatrix (const Teuchos::RCP<const operator_type>& A)
264{
265 if (A_op_.get () != A.get ()) {
266 A_op_ = A;
267
268 // We'll (re)allocate these on demand.
269 X_colMap_ = std::unique_ptr<vector_type> (nullptr);
270 V1_ = std::unique_ptr<multivector_type> (nullptr);
271
272 using Teuchos::rcp_dynamic_cast;
273 Teuchos::RCP<const crs_matrix_type> A_crs =
274 rcp_dynamic_cast<const crs_matrix_type> (A);
275 if (A_crs.is_null ()) {
276 A_crs_ = Teuchos::null;
277 imp_ = Teuchos::null;
278 exp_ = Teuchos::null;
279 }
280 else {
281 TEUCHOS_ASSERT( A_crs->isFillComplete () );
282 A_crs_ = A_crs;
283 auto G = A_crs->getCrsGraph ();
284 imp_ = G->getImporter ();
285 exp_ = G->getExporter ();
286 }
287 }
288}
289
290template<class TpetraOperatorType>
291void
292ScaledDampedResidual<TpetraOperatorType>::
293compute (multivector_type& W,
294 const SC& alpha,
295 vector_type& D_inv,
296 multivector_type& B,
297 multivector_type& X,
298 const SC& beta)
299{
300 using Teuchos::RCP;
301 using Teuchos::rcp;
302
303 if (canFuse (B)) {
304 // "nonconst" here has no effect other than on the return type.
305 W_vec_ = W.getVectorNonConst (0);
306 B_vec_ = B.getVectorNonConst (0);
307 X_vec_ = X.getVectorNonConst (0);
308 TEUCHOS_ASSERT( ! A_crs_.is_null () );
309 fusedCase (*W_vec_, alpha, D_inv, *B_vec_, *A_crs_, *X_vec_, beta);
310 }
311 else {
312 TEUCHOS_ASSERT( ! A_op_.is_null () );
313 unfusedCase (W, alpha, D_inv, B, *A_op_, X, beta);
314 }
315}
316
317template<class TpetraOperatorType>
318typename ScaledDampedResidual<TpetraOperatorType>::vector_type&
319ScaledDampedResidual<TpetraOperatorType>::
320importVector (vector_type& X_domMap)
321{
322 if (imp_.is_null ()) {
323 return X_domMap;
324 }
325 else {
326 if (X_colMap_.get () == nullptr) {
327 using V = vector_type;
328 X_colMap_ = std::unique_ptr<V> (new V (imp_->getTargetMap ()));
329 }
330 X_colMap_->doImport (X_domMap, *imp_, Tpetra::REPLACE);
331 return *X_colMap_;
332 }
333}
334
335template<class TpetraOperatorType>
336bool
337ScaledDampedResidual<TpetraOperatorType>::
338canFuse (const multivector_type& B) const
339{
340 return B.getNumVectors () == size_t (1) &&
341 ! A_crs_.is_null () &&
342 exp_.is_null ();
343}
344
345template<class TpetraOperatorType>
346void
347ScaledDampedResidual<TpetraOperatorType>::
348unfusedCase (multivector_type& W,
349 const SC& alpha,
350 vector_type& D_inv,
351 multivector_type& B,
352 const operator_type& A,
353 multivector_type& X,
354 const SC& beta)
355{
356 if (V1_.get () == nullptr) {
357 using MV = multivector_type;
358 const size_t numVecs = B.getNumVectors ();
359 V1_ = std::unique_ptr<MV> (new MV (B.getMap (), numVecs));
360 }
361 const SC one = Teuchos::ScalarTraits<SC>::one ();
362
363 // V1 = B - A*X
364 Tpetra::deep_copy (*V1_, B);
365 A.apply (X, *V1_, Teuchos::NO_TRANS, -one, one);
366
367 // W := alpha * D_inv * V1 + beta * W
368 W.elementWiseMultiply (alpha, D_inv, *V1_, beta);
369}
370
371template<class TpetraOperatorType>
372void
373ScaledDampedResidual<TpetraOperatorType>::
374fusedCase (vector_type& W,
375 const SC& alpha,
376 vector_type& D_inv,
377 vector_type& B,
378 const crs_matrix_type& A,
379 vector_type& X,
380 const SC& beta)
381{
382 vector_type& X_colMap = importVector (X);
383
384 using Impl::scaled_damped_residual_vector;
385 using STS = Teuchos::ScalarTraits<SC>;
386
387 auto A_lcl = A.getLocalMatrixDevice ();
388 auto Dinv_lcl = Kokkos::subview(D_inv.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
389 auto B_lcl = Kokkos::subview(B.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
390 auto X_lcl = Kokkos::subview(X_colMap.getLocalViewDevice(Tpetra::Access::ReadOnly), Kokkos::ALL(), 0);
391 if (beta == STS::zero ()) {
392 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::OverwriteAll), Kokkos::ALL(), 0);
393 scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
394 B_lcl, A_lcl, X_lcl, beta);
395 }
396 else { // need to read _and_ write W if beta != 0
397 auto W_lcl = Kokkos::subview(W.getLocalViewDevice(Tpetra::Access::ReadWrite), Kokkos::ALL(), 0);
398 scaled_damped_residual_vector (alpha, W_lcl, Dinv_lcl,
399 B_lcl, A_lcl, X_lcl, beta);
400 }
401}
402
403} // namespace Details
404} // namespace Ifpack2
405
406#define IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_INSTANT(SC,LO,GO,NT) \
407 template class Ifpack2::Details::ScaledDampedResidual<Tpetra::Operator<SC, LO, GO, NT> >;
408
409#endif // IFPACK2_DETAILS_SCALEDDAMPEDRESIDUAL_DEF_HPP
Ifpack2 implementation details.
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:74
Functor for computing W := alpha * D * (B - A*X) + beta * W.
Definition: Ifpack2_Details_ScaledDampedResidual_def.hpp:71