RTOp Package Browser (Single Doxygen Collection) Version of the Day
Loading...
Searching...
No Matches
RTOpPack_TOpLinearCombination_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// RTOp: Interfaces and Support Software for Vector Reduction Transformation
5// Operations
6// Copyright (2006) Sandia Corporation
7//
8// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
9// license for use of this work by or on behalf of the U.S. Government.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Roscoe A. Bartlett (rabartl@sandia.gov)
39//
40// ***********************************************************************
41// @HEADER
42
43#ifndef RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
44#define RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
45
46
47#include "Teuchos_Workspace.hpp"
48
49
50namespace RTOpPack {
51
52
53template<class Scalar>
55 const ArrayView<const Scalar> &alpha_in,
56 const Scalar &beta_in
57 )
58 :beta_(beta_in)
59{
60 if (alpha_in.size())
61 this->alpha(alpha_in);
62 this->setOpNameBase("TOpLinearCombination");
63}
64
65
66
67template<class Scalar>
69 const ArrayView<const Scalar> &alpha_in )
70{
71 TEUCHOS_TEST_FOR_EXCEPT( alpha_in.size() == 0 );
72 alpha_ = alpha_in;
73}
74
75
76template<class Scalar>
77const ArrayView<const Scalar>
79{ return alpha_; }
80
81
82template<class Scalar>
83void TOpLinearCombination<Scalar>::beta( const Scalar& beta_in ) { beta_ = beta_in; }
84
85
86template<class Scalar>
87Scalar TOpLinearCombination<Scalar>::beta() const { return beta_; }
88
89
90template<class Scalar>
91int TOpLinearCombination<Scalar>::num_vecs() const { return alpha_.size(); }
92
93
94// Overridden from RTOpT
95
96
97template<class Scalar>
99 const ArrayView<const ConstSubVectorView<Scalar> > &sub_vecs,
100 const ArrayView<const SubVectorView<Scalar> > &targ_sub_vecs,
101 const Ptr<ReductTarget> &reduct_obj_inout
102 ) const
103{
104
105 using Teuchos::as;
106 using Teuchos::Workspace;
107 typedef Teuchos::ScalarTraits<Scalar> ST;
108 typedef typename Teuchos::ArrayRCP<Scalar>::iterator iter_t;
109 typedef typename Teuchos::ArrayRCP<const Scalar>::iterator const_iter_t;
110 Teuchos::WorkspaceStore* wss = Teuchos::get_default_workspace_store().get();
111
112#ifdef TEUCHOS_DEBUG
113 validate_apply_op<Scalar>(*this, as<int>(alpha_.size()), 1, false,
114 sub_vecs, targ_sub_vecs, reduct_obj_inout.getConst());
115#else
116 (void)reduct_obj_inout;
117#endif
118
119 const int l_num_vecs = alpha_.size();
120
121 // Get iterators to local data
122 const RTOpPack::index_type subDim = targ_sub_vecs[0].subDim();
123 iter_t z0_val = targ_sub_vecs[0].values().begin();
124 const ptrdiff_t z0_s = targ_sub_vecs[0].stride();
125 Workspace<const_iter_t> v_val(wss,l_num_vecs);
126 Workspace<ptrdiff_t> v_s(wss,l_num_vecs,false);
127 for( int k = 0; k < l_num_vecs; ++k ) {
128#ifdef TEUCHOS_DEBUG
129 TEUCHOS_TEST_FOR_EXCEPT( sub_vecs[k].subDim() != subDim );
130 TEUCHOS_TEST_FOR_EXCEPT( sub_vecs[k].globalOffset() != targ_sub_vecs[0].globalOffset() );
131#endif
132 v_val[k] = sub_vecs[k].values().begin();
133 v_s[k] = sub_vecs[k].stride();
134 }
135
136 //
137 // Perform the operation and specialize the cases for l_num_vecs = 1 and 2
138 // in order to get good performance.
139 //
140 if( l_num_vecs == 1 ) {
141 //
142 // z0 = alpha*v0 + beta*z0
143 //
144 const Scalar l_alpha = alpha_[0], l_beta = beta_;
145 const_iter_t v0_val = v_val[0];
146 const ptrdiff_t v0_s = v_s[0];
147 if( l_beta==ST::zero() ) {
148 // z0 = alpha*v0
149 if( z0_s==1 && v0_s==1 ) {
150 for( int j = 0; j < subDim; ++j )
151 (*z0_val++) = l_alpha * (*v0_val++);
152 }
153 else {
154 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
155 (*z0_val) = l_alpha * (*v0_val);
156 }
157 }
158 else if( l_beta==ST::one() ) {
159 //
160 // z0 = alpha*v0 + z0
161 //
162 if( z0_s==1 && v0_s==1 ) {
163 for( int j = 0; j < subDim; ++j )
164 (*z0_val++) += l_alpha * (*v0_val++);
165 }
166 else {
167 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
168 (*z0_val) += l_alpha * (*v0_val);
169 }
170 }
171 else {
172 // z0 = alpha*v0 + beta*z0
173 if( z0_s==1 && v0_s==1 ) {
174 for( int j = 0; j < subDim; ++j, ++z0_val )
175 (*z0_val) = l_alpha * (*v0_val++) + l_beta*(*z0_val);
176 }
177 else {
178 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s )
179 (*z0_val) = l_alpha * (*v0_val) + l_beta*(*z0_val);
180 }
181 }
182 }
183 else if( l_num_vecs == 2 ) {
184 //
185 // z0 = alpha0*v0 + alpha1*v1 + beta*z0
186 //
187 const Scalar alpha0 = alpha_[0], alpha1=alpha_[1], l_beta = beta_;
188 const_iter_t v0_val = v_val[0];
189 const ptrdiff_t v0_s = v_s[0];
190 const_iter_t v1_val = v_val[1];
191 const ptrdiff_t v1_s = v_s[1];
192 if( l_beta==ST::zero() ) {
193 if( alpha0 == ST::one() ) {
194 if( alpha1 == ST::one() ) {
195 // z0 = v0 + v1
196 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
197 for( int j = 0; j < subDim; ++j )
198 (*z0_val++) = (*v0_val++) + (*v1_val++);
199 }
200 else {
201 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
202 (*z0_val) = (*v0_val) + (*v1_val);
203 }
204 }
205 else {
206 // z0 = v0 + alpha1*v1
207 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
208 for( int j = 0; j < subDim; ++j )
209 (*z0_val++) = (*v0_val++) + alpha1*(*v1_val++);
210 }
211 else {
212 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
213 (*z0_val) = (*v0_val) + alpha1*(*v1_val);
214 }
215 }
216 }
217 else {
218 if( alpha1 == ST::one() ) {
219 // z0 = alpha0*v0 + v1
220 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
221 for( int j = 0; j < subDim; ++j )
222 (*z0_val++) = alpha0*(*v0_val++) + (*v1_val++);
223 }
224 else {
225 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
226 (*z0_val) = alpha0*(*v0_val) + (*v1_val);
227 }
228 }
229 else {
230 // z0 = alpha0*v0 + alpha1*v1
231 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
232 for( int j = 0; j < subDim; ++j )
233 (*z0_val++) = alpha0*(*v0_val++) + alpha1*(*v1_val++);
234 }
235 else {
236 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
237 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val);
238 }
239 }
240 }
241 }
242 else if( l_beta==ST::one() ) {
243 if( alpha0 == ST::one() ) {
244 if( alpha1 == ST::one() ) {
245 // z0 = v0 + v1 + z0
246 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
247 for( int j = 0; j < subDim; ++j, ++z0_val )
248 (*z0_val) += (*v0_val++) + (*v1_val++);
249 }
250 else {
251 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
252 (*z0_val) += (*v0_val) + (*v1_val);
253 }
254 }
255 else {
256 // z0 = v0 + alpha1*v1 + z0
257 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
258 for( int j = 0; j < subDim; ++j, ++z0_val )
259 (*z0_val) += (*v0_val++) + alpha1*(*v1_val++);
260 }
261 else {
262 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
263 (*z0_val) += (*v0_val) + alpha1*(*v1_val);
264 }
265 }
266 }
267 else {
268 if( alpha1 == ST::one() ) {
269 // z0 = alpha0*v0 + v1 + z0
270 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
271 for( int j = 0; j < subDim; ++j, ++z0_val )
272 (*z0_val) += alpha0*(*v0_val++) + (*v1_val++);
273 }
274 else {
275 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
276 (*z0_val) += alpha0*(*v0_val) + (*v1_val);
277 }
278 }
279 else {
280 // z0 = alpha0*v0 + alpha1*v1 + z0
281 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
282 for( int j = 0; j < subDim; ++j, ++z0_val )
283 (*z0_val) += alpha0*(*v0_val++) + alpha1*(*v1_val++);
284 }
285 else {
286 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
287 (*z0_val) += alpha0*(*v0_val) + alpha1*(*v1_val);
288 }
289 }
290 }
291 }
292 else {
293 if( alpha0 == ST::one() ) {
294 if( alpha1 == ST::one() ) {
295 // z0 = v0 + v1 + beta*z0
296 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
297 for( int j = 0; j < subDim; ++j, ++z0_val )
298 (*z0_val) = (*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
299 }
300 else {
301 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
302 (*z0_val) = (*v0_val) + (*v1_val) + l_beta*(*z0_val);
303 }
304 }
305 else {
306 // z0 = v0 + alpha1*v1 + beta*z0
307 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
308 for( int j = 0; j < subDim; ++j, ++z0_val )
309 (*z0_val) = (*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
310 }
311 else {
312 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
313 (*z0_val) = (*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
314 }
315 }
316 }
317 else {
318 if( alpha1 == ST::one() ) {
319 // z0 = alpha0*v0 + v1 + beta*z0
320 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
321 for( int j = 0; j < subDim; ++j, ++z0_val )
322 (*z0_val) = alpha0*(*v0_val++) + (*v1_val++) + l_beta*(*z0_val);
323 }
324 else {
325 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
326 (*z0_val) = alpha0*(*v0_val) + (*v1_val) + l_beta*(*z0_val);
327 }
328 }
329 else {
330 // z0 = alpha0*v0 + alpha1*v1 + beta*z0
331 if( z0_s==1 && v0_s==1 && v1_s==1 ) {
332 for( int j = 0; j < subDim; ++j, ++z0_val )
333 (*z0_val) = alpha0*(*v0_val++) + alpha1*(*v1_val++) + l_beta*(*z0_val);
334 }
335 else {
336 for( int j = 0; j < subDim; ++j, z0_val+=z0_s, v0_val+=v0_s, v1_val+=v1_s )
337 (*z0_val) = alpha0*(*v0_val) + alpha1*(*v1_val) + l_beta*(*z0_val);
338 }
339 }
340 }
341 }
342 }
343 else {
344 //
345 // Totally general implementation (but least efficient)
346 //
347 // z0 *= beta
348 if( beta_ == ST::zero() ) {
349 for( int j = 0; j < subDim; ++j, z0_val += z0_s )
350 (*z0_val) = ST::zero();
351 }
352 else if( beta_ != ST::one() ) {
353 for( int j = 0; j < subDim; ++j, z0_val += z0_s )
354 (*z0_val) *= beta_;
355 }
356 // z0 += sum( alpha[k]*v[k], k=0...l_num_vecs-1)
357 z0_val = targ_sub_vecs[0].values().begin();
358 for( int j = 0; j < subDim; ++j, z0_val += z0_s ) {
359 for( int k = 0; k < l_num_vecs; ++k ) {
360 const Scalar
361 &alpha_k = alpha_[k],
362 &v_k_val = *v_val[k];
363 (*z0_val) += alpha_k * v_k_val;
364 v_val[k] += v_s[k];
365 }
366 }
367 }
368}
369
370
371} // namespace RTOpPack
372
373
374#endif // RTOPPACK_TOP_LINEAR_COMBINATION_DEF_HPP
Class for a non-changeable sub-vector.
void setOpNameBase(const std::string &op_name_base)
Just set the operator name.
Class for a changeable sub-vector.
void apply_op_impl(const ArrayView< const ConstSubVectorView< Scalar > > &sub_vecs, const ArrayView< const SubVectorView< Scalar > > &targ_sub_vecs, const Ptr< ReductTarget > &reduct_obj_inout) const
const ArrayView< const Scalar > alpha() const
TOpLinearCombination(const ArrayView< const Scalar > &alpha_in=Teuchos::null, const Scalar &beta=Teuchos::ScalarTraits< Scalar >::zero())
Teuchos_Ordinal index_type