Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_copyOffsets.hpp
Go to the documentation of this file.
1/*
2// @HEADER
3// ***********************************************************************
4//
5// Tpetra: Templated Linear Algebra Services Package
6// Copyright (2008) Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// ************************************************************************
39// @HEADER
40*/
41
42#ifndef TPETRA_DETAILS_COPYOFFSETS_HPP
43#define TPETRA_DETAILS_COPYOFFSETS_HPP
44
49
50#include "TpetraCore_config.h"
52#include "Kokkos_Core.hpp"
53#include <limits>
54#include <type_traits>
55
56namespace Tpetra {
57namespace Details {
58
59//
60// Implementation details for copyOffsets (see below).
61// Users should skip over this anonymous namespace.
62//
63namespace { // (anonymous)
64
65 // Implementation detail of copyOffsets (see below). Determines
66 // whether integer overflow is impossible on assignment from an
67 // InputType to an OutputType.
68 //
69 // Implicit here is the assumption that both input and output types
70 // are integers.
71 template<class OutputType, class InputType>
72 struct OutputCanFitInput {
73 private:
74 static constexpr bool output_signed = std::is_signed<OutputType>::value;
75 static constexpr bool input_signed = std::is_signed<InputType>::value;
76
77 public:
78 static const bool value = sizeof (OutputType) > sizeof (InputType) ||
79 (sizeof (OutputType) == sizeof (InputType) &&
80 ! output_signed && input_signed);
81 };
82
83 // Avoid warnings for "unsigned integer < 0" comparisons.
84 template<class InputType,
85 bool input_signed = std::is_signed<InputType>::value>
86 struct Negative {};
87
88 template<class InputType>
89 struct Negative<InputType, true> {
90 static KOKKOS_INLINE_FUNCTION bool
91 negative (const InputType src) {
92 return src < InputType (0);
93 }
94 };
95
96 template<class InputType>
97 struct Negative<InputType, false> {
98 static KOKKOS_INLINE_FUNCTION bool
99 negative (const InputType /* src */) {
100 return false;
101 }
102 };
103
104 template<class InputType>
105 KOKKOS_INLINE_FUNCTION bool negative (const InputType src) {
106 return Negative<InputType>::negative (src);
107 }
108
109 template<class OutputType, class InputType>
110 struct OverflowChecker {
111 private:
112 static constexpr bool output_signed = std::is_signed<OutputType>::value;
113 static constexpr bool input_signed = std::is_signed<InputType>::value;
114
115 public:
116 // 1. Signed to unsigned could overflow due to negative numbers.
117 // 2. Larger to smaller could overflow.
118 // 3. Same size but unsigned to signed could overflow.
119 static constexpr bool could_overflow =
120 (! output_signed && input_signed) ||
121 (sizeof (OutputType) < sizeof (InputType)) ||
122 (sizeof (OutputType) == sizeof (InputType) &&
123 output_signed && ! input_signed);
124
125 KOKKOS_INLINE_FUNCTION bool
126 overflows (const InputType src) const
127 {
128 if (! could_overflow) {
129 return false;
130 }
131 else {
132 // Signed to unsigned could overflow due to negative numbers.
133 if (! output_signed && input_signed) {
134 return negative (src);
135 }
136 // We're only comparing InputType with InputType here, so this
137 // should not emit warnings.
138 return src < minDstVal_ || src > maxDstVal_;
139 }
140 }
141
142 private:
143 // If InputType is unsigned and OutputType is signed, casting max
144 // OutputType to InputType could overflow. See #5548.
145 InputType minDstVal_ = input_signed ?
146 std::numeric_limits<OutputType>::min () : OutputType (0);
147 InputType maxDstVal_ = std::numeric_limits<OutputType>::max ();
148 };
149
150
151 template<class OutputViewType, class InputViewType>
152 void
153 errorIfOverflow (const OutputViewType& dst,
154 const InputViewType& src,
155 const size_t overflowCount)
156 {
157 if (overflowCount == 0) {
158 return;
159 }
160
161 std::ostringstream os;
162 const bool plural = overflowCount != size_t (1);
163 os << "copyOffsets: " << overflowCount << " value" <<
164 (plural ? "s" : "") << " in src were too big (in the "
165 "sense of integer overflow) to fit in dst.";
166
167 const bool verbose = Details::Behavior::verbose ();
168 if (verbose) {
169 const size_t maxNumToPrint =
171 const size_t srcLen (src.extent (0));
172 if (srcLen <= maxNumToPrint) {
173 auto dst_h = Kokkos::create_mirror_view (dst);
174 auto src_h = Kokkos::create_mirror_view (src);
175 // DEEP_COPY REVIEW - NOT TESTED
176 Kokkos::deep_copy (src_h, src);
177 // DEEP_COPY REVIEW - NOT TESTED
178 Kokkos::deep_copy (dst_h, dst);
179
180 os << " src: [";
181 for (size_t k = 0; k < srcLen; ++k) {
182 os << src_h[k];
183 if (k + size_t (1) < srcLen) {
184 os << ", ";
185 }
186 }
187 os << "], ";
188
189 os << " dst: [";
190 for (size_t k = 0; k < srcLen; ++k) {
191 os << dst_h[k];
192 if (k + size_t (1) < srcLen) {
193 os << ", ";
194 }
195 }
196 os << "].";
197 }
198 else {
199 os << " src.extent(0) > " << maxNumToPrint << ", Tpetra's "
200 "verbose print count threshold. To increase this, set the "
201 "environment variable TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD "
202 "to the desired threshold and rerun. You do NOT need to "
203 "rebuild Trilinos.";
204 }
205 }
206 TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, os.str ());
207 }
208
209 // Implementation detail of copyOffsets (see below).
210 //
211 // Kokkos parallel_reduce functor for copying offset ("ptr") arrays.
212 // Tpetra::Details::FixedHashTable uses this in its "copy"
213 // constructor for converting between different Device types. All
214 // the action happens in the partial specializations for different
215 // values of outputCanFitInput. "Output can fit input" means that
216 // casting the input's value type to the output's value type will
217 // never result in integer overflow.
218 template<class OutputViewType,
219 class InputViewType,
220 const bool outputCanFitInput =
221 OutputCanFitInput<typename OutputViewType::non_const_value_type,
222 typename InputViewType::non_const_value_type>::value>
223 class CopyOffsetsFunctor {};
224
225 // Specialization for when overflow is possible.
226 template<class OutputViewType, class InputViewType>
227 class CopyOffsetsFunctor<OutputViewType, InputViewType, false> {
228 public:
229 using execution_space = typename OutputViewType::execution_space;
230 using size_type = typename OutputViewType::size_type;
231 using value_type = size_t;
232
233 using input_value_type = typename InputViewType::non_const_value_type;
234 using output_value_type = typename OutputViewType::non_const_value_type;
235
236 CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
237 dst_ (dst), src_ (src)
238 {
239 static_assert (Kokkos::SpaceAccessibility<
240 typename OutputViewType::memory_space,
241 typename InputViewType::memory_space>::accessible,
242 "CopyOffsetsFunctor (implements copyOffsets): Output "
243 "View's space must be able to access the input View's "
244 "memory space.");
245 }
246
247 KOKKOS_INLINE_FUNCTION void
248 operator () (const size_type i, value_type& overflowCount) const {
249 const input_value_type src_i = src_(i);
250 if (checker_.overflows (src_i)) {
251 ++overflowCount;
252 }
253 dst_(i) = static_cast<output_value_type> (src_i);
254 }
255
256 KOKKOS_INLINE_FUNCTION void
257 operator () (const size_type i) const {
258 const input_value_type src_i = src_(i);
259 dst_(i) = static_cast<output_value_type> (src_i);
260 }
261
262 KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
263 overflowCount = 0;
264 }
265
266 KOKKOS_INLINE_FUNCTION void
267 join (value_type& result,
268 const value_type& current) const {
269 result += current;
270 }
271
272 private:
273 OutputViewType dst_;
274 InputViewType src_;
275 OverflowChecker<output_value_type, input_value_type> checker_;
276 };
277
278 // Specialization for when overflow is impossible.
279 template<class OutputViewType, class InputViewType>
280 class CopyOffsetsFunctor<OutputViewType, InputViewType, true> {
281 public:
282 using execution_space = typename OutputViewType::execution_space;
283 using size_type = typename OutputViewType::size_type;
284 using value_type = size_t;
285
286 CopyOffsetsFunctor (const OutputViewType& dst, const InputViewType& src) :
287 dst_ (dst),
288 src_ (src)
289 {
290 static_assert (Kokkos::SpaceAccessibility<
291 typename OutputViewType::memory_space,
292 typename InputViewType::memory_space>::accessible,
293 "CopyOffsetsFunctor (implements copyOffsets): Output "
294 "View's space must be able to access the input View's "
295 "memory space.");
296 }
297
298 KOKKOS_INLINE_FUNCTION void
299 operator () (const size_type i, value_type& /* overflowCount */) const {
300 // Overflow is impossible in this case, so there's no need to check.
301 dst_(i) = src_(i);
302 }
303
304 KOKKOS_INLINE_FUNCTION void
305 operator () (const size_type i) const {
306 dst_(i) = src_(i);
307 }
308
309 KOKKOS_INLINE_FUNCTION void init (value_type& overflowCount) const {
310 overflowCount = 0;
311 }
312
313 KOKKOS_INLINE_FUNCTION void
314 join (value_type& /* result */,
315 const value_type& /* current */) const
316 {}
317
318 private:
319 OutputViewType dst_;
320 InputViewType src_;
321 };
322
323 // Implementation detail of copyOffsets (see below).
324 //
325 // We specialize copyOffsets on two different conditions:
326 //
327 // 1. Are the two Views' layouts the same, and do the input and
328 // output Views have the same value type?
329 // 2. Can the output View's execution space access the input View's
330 // memory space?
331 //
332 // If (1) is true, that makes the implementation simple: just call
333 // Kokkos::deep_copy (FixedHashTable always uses the same layout, no
334 // matter the device type). Otherwise, we need a custom copy
335 // functor. If (2) is true, then we can use CopyOffsetsFunctor
336 // directly. Otherwise, we have to copy the input View into the
337 // output View's memory space, before we can use the functor.
338 //
339 template<class OutputViewType,
340 class InputViewType,
341 const bool sameLayoutsSameOffsetTypes =
342 std::is_same<typename OutputViewType::array_layout,
343 typename InputViewType::array_layout>::value &&
344 std::is_same<typename OutputViewType::non_const_value_type,
345 typename InputViewType::non_const_value_type>::value,
346 const bool outputExecSpaceCanAccessInputMemSpace =
347 Kokkos::SpaceAccessibility<
348 typename OutputViewType::memory_space,
349 typename InputViewType::memory_space>::accessible>
350 struct CopyOffsetsImpl {
351 static void run (const OutputViewType& dst, const InputViewType& src);
352 };
353
354 // Specialization for sameLayoutsSameOffsetTypes = true:
355 //
356 // If both input and output Views have the same layout, and both
357 // input and output use the same type for offsets, then we don't
358 // need to check for overflow, and we can use Kokkos::deep_copy
359 // directly. It doesn't matter whether the output execution space
360 // can access the input memory space: Kokkos::deep_copy takes care
361 // of the details.
362 template<class OutputViewType,
363 class InputViewType,
364 const bool outputExecSpaceCanAccessInputMemSpace>
365 struct CopyOffsetsImpl<OutputViewType, InputViewType,
366 true, outputExecSpaceCanAccessInputMemSpace> {
367 static void run (const OutputViewType& dst, const InputViewType& src) {
368 static_assert (std::is_same<typename OutputViewType::non_const_value_type,
369 typename InputViewType::non_const_value_type>::value,
370 "CopyOffsetsImpl (implementation of copyOffsets): In order"
371 " to call this specialization, the input and output must "
372 "use the same offset type.");
373 static_assert (static_cast<int> (OutputViewType::rank) ==
374 static_cast<int> (InputViewType::rank),
375 "CopyOffsetsImpl (implementation of copyOffsets): In order"
376 " to call this specialization, src and dst must have the "
377 "same rank.");
378 static_assert (std::is_same<typename OutputViewType::array_layout,
379 typename InputViewType::array_layout>::value,
380 "CopyOffsetsImpl (implementation of copyOffsets): In order"
381 " to call this specialization, src and dst must have the "
382 "the same array_layout.");
383 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
384 using execution_space = typename OutputViewType::execution_space;
385 Kokkos::deep_copy (execution_space(), dst, src);
386 }
387 };
388
389 // Specializations for sameLayoutsSameOffsetTypes = false:
390 //
391 // If input and output don't have the same layout, or use different
392 // types for offsets, then we can't use Kokkos::deep_copy directly,
393 // and we may have to check for overflow.
394
395 // Specialization for sameLayoutsSameOffsetTypes = false and
396 // outputExecSpaceCanAccessInputMemSpace = true:
397 //
398 // If the output execution space can access the input memory space,
399 // then we can use CopyOffsetsFunctor directly.
400 template<class OutputViewType,
401 class InputViewType>
402 struct CopyOffsetsImpl<OutputViewType, InputViewType,
403 false, true> {
404 static void run (const OutputViewType& dst, const InputViewType& src) {
405 static_assert (static_cast<int> (OutputViewType::rank) ==
406 static_cast<int> (InputViewType::rank),
407 "CopyOffsetsImpl (implementation of copyOffsets): "
408 "src and dst must have the same rank.");
409 constexpr bool sameLayoutsSameOffsetTypes =
410 std::is_same<typename OutputViewType::array_layout,
411 typename InputViewType::array_layout>::value &&
412 std::is_same<typename OutputViewType::non_const_value_type,
413 typename InputViewType::non_const_value_type>::value;
414 static_assert (! sameLayoutsSameOffsetTypes,
415 "CopyOffsetsImpl (implements copyOffsets): In order to "
416 "call this specialization, sameLayoutsSameOffsetTypes "
417 "must be false. That is, either the input and output "
418 "must have different array layouts, or their value types "
419 "must differ.");
420 static_assert (Kokkos::SpaceAccessibility<
421 typename OutputViewType::memory_space,
422 typename InputViewType::memory_space>::accessible,
423 "CopyOffsetsImpl (implements copyOffsets): In order to "
424 "call this specialization, the output View's space must "
425 "be able to access the input View's memory space.");
426 using functor_type = CopyOffsetsFunctor<OutputViewType, InputViewType>;
427 using execution_space = typename OutputViewType::execution_space;
428 using size_type = typename OutputViewType::size_type;
429 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
430
431 const bool debug = Details::Behavior::debug ();
432 if (debug) {
433 size_t overflowCount = 0; // output argument of the reduction
434 Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
435 range_type (0, dst.extent (0)),
436 functor_type (dst, src),
437 overflowCount);
438 errorIfOverflow (dst, src, overflowCount);
439 }
440 else {
441 Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
442 range_type (0, dst.extent (0)),
443 functor_type (dst, src));
444 }
445 }
446 };
447
448 // Specialization for sameLayoutsSameOffsetTypes = false and
449 // outputExecSpaceCanAccessInputMemSpace = false.
450 //
451 // If the output execution space canNOT access the input memory
452 // space, then we can't use CopyOffsetsFunctor directly. Instead,
453 // tell Kokkos to copy the input View's data into the output View's
454 // memory space _first_. Since the offset types are different for
455 // this specialization, we can't just call Kokkos::deep_copy
456 // directly between the input and output Views of offsets; that
457 // wouldn't compile.
458 //
459 // This case can and does come up in practice: If the output View's
460 // execution space is Cuda, it cannot currently access host memory
461 // (that's the opposite direction from what UVM allows).
462 // Furthermore, that case specifically requires overflow checking,
463 // since (as of 28 Jan 2016 at least) Kokkos::Cuda uses a smaller
464 // offset type than Kokkos' host spaces.
465 template<class OutputViewType, class InputViewType>
466 struct CopyOffsetsImpl<OutputViewType, InputViewType,
467 false, false> {
468 static void run (const OutputViewType& dst, const InputViewType& src) {
469 static_assert (static_cast<int> (OutputViewType::rank) ==
470 static_cast<int> (InputViewType::rank),
471 "CopyOffsetsImpl (implementation of copyOffsets): In order"
472 " to call this specialization, src and dst must have the "
473 "same rank.");
474 constexpr bool sameLayoutsSameOffsetTypes =
475 std::is_same<typename OutputViewType::array_layout,
476 typename InputViewType::array_layout>::value &&
477 std::is_same<typename OutputViewType::non_const_value_type,
478 typename InputViewType::non_const_value_type>::value;
479 static_assert (! sameLayoutsSameOffsetTypes,
480 "CopyOffsetsImpl (implements copyOffsets): In order to "
481 "call this specialization, sameLayoutsSameOffsetTypes "
482 "must be false. That is, either the input and output "
483 "must have different array layouts, or their value types "
484 "must differ.");
485 using output_space_copy_type =
486 Kokkos::View<typename InputViewType::non_const_value_type*,
487 Kokkos::LayoutLeft, typename OutputViewType::device_type>;
488 using Kokkos::view_alloc;
489 using Kokkos::WithoutInitializing;
490 using execution_space = typename OutputViewType::execution_space;
491 output_space_copy_type
492 outputSpaceCopy (view_alloc ("outputSpace", WithoutInitializing),
493 src.extent (0));
494 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
495 Kokkos::deep_copy (execution_space(), outputSpaceCopy, src);
496
497 // The output View's execution space can access
498 // outputSpaceCopy's data, so we can run the functor now.
499 using functor_type =
500 CopyOffsetsFunctor<OutputViewType, output_space_copy_type>;
501 using size_type = typename OutputViewType::size_type;
502 using range_type = Kokkos::RangePolicy<execution_space, size_type>;
503
504 const bool debug = Details::Behavior::debug ();
505 if (debug) {
506 size_t overflowCount = 0;
507 Kokkos::parallel_reduce ("Tpetra::Details::copyOffsets",
508 range_type (0, dst.extent (0)),
509 functor_type (dst, outputSpaceCopy),
510 overflowCount);
511 errorIfOverflow (dst, src, overflowCount);
512 }
513 else {
514 Kokkos::parallel_for ("Tpetra::Details::copyOffsets",
515 range_type (0, dst.extent (0)),
516 functor_type (dst, outputSpaceCopy));
517 }
518 }
519 };
520} // namespace (anonymous)
521
533template<class OutputViewType, class InputViewType>
534void
535copyOffsets (const OutputViewType& dst, const InputViewType& src)
536{
537 static_assert (Kokkos::is_view<OutputViewType>::value,
538 "OutputViewType (the type of dst) must be a Kokkos::View.");
539 static_assert (Kokkos::is_view<InputViewType>::value,
540 "InputViewType (the type of src) must be a Kokkos::View.");
541 static_assert (std::is_same<typename OutputViewType::value_type,
542 typename OutputViewType::non_const_value_type>::value,
543 "OutputViewType (the type of dst) must be a nonconst Kokkos::View.");
544 static_assert (static_cast<int> (OutputViewType::rank) == 1,
545 "OutputViewType (the type of dst) must be a rank-1 Kokkos::View.");
546 static_assert (static_cast<int> (InputViewType::rank) == 1,
547 "InputViewType (the type of src) must be a rank-1 Kokkos::View.");
548 static_assert (std::is_integral<typename std::decay<decltype (dst(0)) >::type>::value,
549 "The entries of dst must be built-in integers.");
550 static_assert (std::is_integral<typename std::decay<decltype (src(0)) >::type>::value,
551 "The entries of src must be built-in integers.");
552
553 TEUCHOS_TEST_FOR_EXCEPTION
554 (dst.extent (0) != src.extent (0), std::invalid_argument,
555 "copyOffsets: dst.extent(0) = " << dst.extent (0)
556 << " != src.extent(0) = " << src.extent (0) << ".");
557
558 CopyOffsetsImpl<OutputViewType, InputViewType>::run (dst, src);
559}
560
561} // namespace Details
562} // namespace Tpetra
563
564#endif // TPETRA_DETAILS_COPYOFFSETS_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
Implementation details of Tpetra.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
Namespace Tpetra contains the class and methods constituting the Tpetra library.