Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
40#ifndef TPETRA_CRSGRAPH_DEF_HPP
41#define TPETRA_CRSGRAPH_DEF_HPP
42
45
50#include "Tpetra_Details_getGraphDiagOffsets.hpp"
51#include "Tpetra_Details_getGraphOffRankOffsets.hpp"
52#include "Tpetra_Details_makeColMap.hpp"
56#include "Tpetra_Distributor.hpp"
57#include "Teuchos_SerialDenseMatrix.hpp"
58#include "Tpetra_Vector.hpp"
61#include "Tpetra_Details_packCrsGraph.hpp"
62#include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
63#include "Tpetra_Details_CrsPadding.hpp"
64#include "Tpetra_Util.hpp"
65#include <algorithm>
66#include <limits>
67#include <map>
68#include <sstream>
69#include <string>
70#include <type_traits>
71#include <utility>
72#include <vector>
73
74namespace Tpetra {
75 namespace Details {
76 namespace Impl {
77
78 template<class MapIter>
79 void
80 verbosePrintMap(std::ostream& out,
81 MapIter beg,
82 MapIter end,
83 const size_t numEnt,
84 const char mapName[])
85 {
86 using ::Tpetra::Details::Behavior;
87 using ::Tpetra::Details::verbosePrintArray;
88
89 out << mapName << ": {";
90 const size_t maxNumToPrint =
92 if (maxNumToPrint == 0) {
93 if (numEnt != 0) {
94 out << "...";
95 }
96 }
97 else {
98 const size_t numToPrint = numEnt > maxNumToPrint ?
99 maxNumToPrint : numEnt;
100 size_t count = 0;
101 for (MapIter it = beg; it != end; ++it) {
102 out << "(" << (*it).first << ", ";
103 verbosePrintArray(out, (*it).second, "gblColInds",
104 maxNumToPrint);
105 out << ")";
106 if (count + size_t(1) < numToPrint) {
107 out << ", ";
108 }
109 ++count;
110 }
111 if (count < numEnt) {
112 out << ", ...";
113 }
114 }
115 out << "}";
116 }
117
118 template<class LO, class GO, class Node>
119 Teuchos::ArrayView<GO>
120 getRowGraphGlobalRow(
121 std::vector<GO>& gblColIndsStorage,
122 const RowGraph<LO, GO, Node>& graph,
123 const GO gblRowInd)
124 {
125 size_t origNumEnt = graph.getNumEntriesInGlobalRow(gblRowInd);
126 if (gblColIndsStorage.size() < origNumEnt) {
127 gblColIndsStorage.resize(origNumEnt);
128 }
129 typename CrsGraph<LO,GO,Node>::nonconst_global_inds_host_view_type gblColInds(gblColIndsStorage.data(),
130 origNumEnt);
131 graph.getGlobalRowCopy(gblRowInd, gblColInds, origNumEnt);
132 Teuchos::ArrayView<GO> retval(gblColIndsStorage.data(),origNumEnt);
133 return retval;
134 }
135
136 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
137 class ConvertColumnIndicesFromGlobalToLocal {
138 public:
139 ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
140 const ::Kokkos::View<const GO*, DT>& gblColInds,
141 const ::Kokkos::View<const OffsetType*, DT>& ptr,
142 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
143 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
144 lclColInds_ (lclColInds),
145 gblColInds_ (gblColInds),
146 ptr_ (ptr),
147 lclColMap_ (lclColMap),
148 numRowEnt_ (numRowEnt)
149 {}
150
151 KOKKOS_FUNCTION void
152 operator () (const LO& lclRow, OffsetType& curNumBad) const
153 {
154 const OffsetType offset = ptr_(lclRow);
155 // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
156 // of entries in a row to LO, as long as the row doesn't have
157 // too many duplicate entries.
158 const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
159 for (LO j = 0; j < numEnt; ++j) {
160 const GO gid = gblColInds_(offset + j);
161 const LO lid = lclColMap_.getLocalElement (gid);
162 lclColInds_(offset + j) = lid;
163 if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
164 ++curNumBad;
165 }
166 }
167 }
168
169 static OffsetType
170 run (const ::Kokkos::View<LO*, DT>& lclColInds,
171 const ::Kokkos::View<const GO*, DT>& gblColInds,
172 const ::Kokkos::View<const OffsetType*, DT>& ptr,
173 const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
174 const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
175 {
176 typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
177 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
178
179 const LO lclNumRows = ptr.extent (0) == 0 ?
180 static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
181 OffsetType numBad = 0;
182 // Count of "bad" column indices is a reduction over rows.
183 ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
184 functor_type (lclColInds, gblColInds, ptr,
185 lclColMap, numRowEnt),
186 numBad);
187 return numBad;
188 }
189
190 private:
191 ::Kokkos::View<LO*, DT> lclColInds_;
192 ::Kokkos::View<const GO*, DT> gblColInds_;
193 ::Kokkos::View<const OffsetType*, DT> ptr_;
195 ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
196 };
197
198 } // namespace Impl
199
214 template<class LO, class GO, class DT, class OffsetType, class NumEntType>
215 OffsetType
216 convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
217 const Kokkos::View<const GO*, DT>& gblColInds,
218 const Kokkos::View<const OffsetType*, DT>& ptr,
219 const LocalMap<LO, GO, DT>& lclColMap,
220 const Kokkos::View<const NumEntType*, DT>& numRowEnt)
221 {
222 using Impl::ConvertColumnIndicesFromGlobalToLocal;
223 typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
224 return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
225 }
226
227 template<class ViewType, class LO>
228 class MaxDifference {
229 public:
230 MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
231
232 KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
233 dst = 0;
234 }
235
236 KOKKOS_INLINE_FUNCTION void
237 join (LO& dst, const LO& src) const
238 {
239 dst = (src > dst) ? src : dst;
240 }
241
242 KOKKOS_INLINE_FUNCTION void
243 operator () (const LO lclRow, LO& maxNumEnt) const
244 {
245 const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
246 maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
247 }
248 private:
249 typename ViewType::const_type ptr_;
250 };
251
252 template<class ViewType, class LO>
253 typename ViewType::non_const_value_type
254 maxDifference (const char kernelLabel[],
255 const ViewType& ptr,
256 const LO lclNumRows)
257 {
258 if (lclNumRows == 0) {
259 // mfh 07 May 2018: Weirdly, I need this special case,
260 // otherwise I get the wrong answer.
261 return static_cast<LO> (0);
262 }
263 else {
264 using execution_space = typename ViewType::execution_space;
265 using range_type = Kokkos::RangePolicy<execution_space, LO>;
266 LO theMaxNumEnt {0};
267 Kokkos::parallel_reduce (kernelLabel,
268 range_type (0, lclNumRows),
269 MaxDifference<ViewType, LO> (ptr),
270 theMaxNumEnt);
271 return theMaxNumEnt;
272 }
273 }
274
275 } // namespace Details
276
277 template <class LocalOrdinal, class GlobalOrdinal, class Node>
278 bool
279 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
280 getDebug() {
281 return Details::Behavior::debug("CrsGraph");
282 }
283
284 template <class LocalOrdinal, class GlobalOrdinal, class Node>
285 bool
286 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
287 getVerbose() {
288 return Details::Behavior::verbose("CrsGraph");
289 }
290
291 template <class LocalOrdinal, class GlobalOrdinal, class Node>
293 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
294 const size_t maxNumEntriesPerRow,
295 const Teuchos::RCP<Teuchos::ParameterList>& params) :
296 dist_object_type (rowMap)
297 , rowMap_ (rowMap)
298 , numAllocForAllRows_ (maxNumEntriesPerRow)
299 {
300 const char tfecfFuncName[] =
301 "CrsGraph(rowMap,maxNumEntriesPerRow,params): ";
302 staticAssertions ();
303 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
304 (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
305 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
306 "a valid size_t value, which in this case means it must not be "
307 "Teuchos::OrdinalTraits<size_t>::invalid().");
308 resumeFill (params);
310 }
311
312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
314 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
315 const Teuchos::RCP<const map_type>& colMap,
316 const size_t maxNumEntriesPerRow,
317 const Teuchos::RCP<Teuchos::ParameterList>& params) :
318 dist_object_type (rowMap)
319 , rowMap_ (rowMap)
320 , colMap_ (colMap)
321 , numAllocForAllRows_ (maxNumEntriesPerRow)
322 {
323 const char tfecfFuncName[] =
324 "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,params): ";
325 staticAssertions ();
326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
327 maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
328 std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
329 "a valid size_t value, which in this case means it must not be "
330 "Teuchos::OrdinalTraits<size_t>::invalid().");
331 resumeFill (params);
332 checkInternalState ();
333 }
334
335
336 template <class LocalOrdinal, class GlobalOrdinal, class Node>
338 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
339 const Teuchos::ArrayView<const size_t>& numEntPerRow,
340 const Teuchos::RCP<Teuchos::ParameterList>& params) :
341 dist_object_type (rowMap)
342 , rowMap_ (rowMap)
343 , numAllocForAllRows_ (0)
344 {
345 const char tfecfFuncName[] =
346 "CrsGraph(rowMap,numEntPerRow,params): ";
347 staticAssertions ();
348
349 const size_t lclNumRows = rowMap.is_null () ?
350 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
351 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
352 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
353 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
354 << " != the local number of rows " << lclNumRows << " as specified by "
355 "the input row Map.");
356
357 if (debug_) {
358 for (size_t r = 0; r < lclNumRows; ++r) {
359 const size_t curRowCount = numEntPerRow[r];
360 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
361 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
362 std::invalid_argument, "numEntPerRow(" << r << ") "
363 "specifies an invalid number of entries "
364 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
365 }
366 }
367
368 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
369 // The latter is a const View, so we have to copy into a nonconst
370 // View first, then assign.
371 typedef decltype (k_numAllocPerRow_) out_view_type;
372 typedef typename out_view_type::non_const_type nc_view_type;
373 typedef Kokkos::View<const size_t*,
374 typename nc_view_type::array_layout,
375 Kokkos::HostSpace,
376 Kokkos::MemoryUnmanaged> in_view_type;
377 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
378 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
379 lclNumRows);
380 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
381 using exec_space = typename nc_view_type::execution_space;
382 Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
383 k_numAllocPerRow_ = numAllocPerRowOut;
384
385 resumeFill (params);
387 }
388
389
390
391 template <class LocalOrdinal, class GlobalOrdinal, class Node>
393 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
394 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
395 const Teuchos::RCP<Teuchos::ParameterList>& params) :
396 dist_object_type (rowMap)
397 , rowMap_ (rowMap)
398 , k_numAllocPerRow_ (numEntPerRow.h_view)
399 , numAllocForAllRows_ (0)
400 {
401 const char tfecfFuncName[] =
402 "CrsGraph(rowMap,numEntPerRow,params): ";
403 staticAssertions ();
404
405 const size_t lclNumRows = rowMap.is_null () ?
406 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
407 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
408 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
409 std::invalid_argument, "numEntPerRow has length " <<
410 numEntPerRow.extent (0) << " != the local number of rows " <<
411 lclNumRows << " as specified by " "the input row Map.");
412
413 if (debug_) {
414 for (size_t r = 0; r < lclNumRows; ++r) {
415 const size_t curRowCount = numEntPerRow.h_view(r);
416 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
417 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
418 std::invalid_argument, "numEntPerRow(" << r << ") "
419 "specifies an invalid number of entries "
420 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
421 }
422 }
423
424 resumeFill (params);
425 checkInternalState ();
426 }
427
428
429 template <class LocalOrdinal, class GlobalOrdinal, class Node>
431 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
432 const Teuchos::RCP<const map_type>& colMap,
433 const Kokkos::DualView<const size_t*, device_type>& numEntPerRow,
434 const Teuchos::RCP<Teuchos::ParameterList>& params) :
435 dist_object_type (rowMap)
436 , rowMap_ (rowMap)
437 , colMap_ (colMap)
438 , k_numAllocPerRow_ (numEntPerRow.h_view)
439 , numAllocForAllRows_ (0)
440 {
441 const char tfecfFuncName[] =
442 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
443 staticAssertions ();
444
445 const size_t lclNumRows = rowMap.is_null () ?
446 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
448 static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
449 std::invalid_argument, "numEntPerRow has length " <<
450 numEntPerRow.extent (0) << " != the local number of rows " <<
451 lclNumRows << " as specified by " "the input row Map.");
452
453 if (debug_) {
454 for (size_t r = 0; r < lclNumRows; ++r) {
455 const size_t curRowCount = numEntPerRow.h_view(r);
456 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
457 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
458 std::invalid_argument, "numEntPerRow(" << r << ") "
459 "specifies an invalid number of entries "
460 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
461 }
462 }
463
464 resumeFill (params);
465 checkInternalState ();
466 }
467
468
469 template <class LocalOrdinal, class GlobalOrdinal, class Node>
471 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
472 const Teuchos::RCP<const map_type>& colMap,
473 const Teuchos::ArrayView<const size_t>& numEntPerRow,
474 const Teuchos::RCP<Teuchos::ParameterList>& params) :
475 dist_object_type (rowMap)
476 , rowMap_ (rowMap)
477 , colMap_ (colMap)
478 , numAllocForAllRows_ (0)
479 {
480 const char tfecfFuncName[] =
481 "CrsGraph(rowMap,colMap,numEntPerRow,params): ";
482 staticAssertions ();
483
484 const size_t lclNumRows = rowMap.is_null () ?
485 static_cast<size_t> (0) : rowMap->getLocalNumElements ();
486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
487 static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
488 std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
489 << " != the local number of rows " << lclNumRows << " as specified by "
490 "the input row Map.");
491
492 if (debug_) {
493 for (size_t r = 0; r < lclNumRows; ++r) {
494 const size_t curRowCount = numEntPerRow[r];
495 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
496 (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
497 std::invalid_argument, "numEntPerRow(" << r << ") "
498 "specifies an invalid number of entries "
499 "(Teuchos::OrdinalTraits<size_t>::invalid()).");
500 }
501 }
502
503 // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
504 // The latter is a const View, so we have to copy into a nonconst
505 // View first, then assign.
506 typedef decltype (k_numAllocPerRow_) out_view_type;
507 typedef typename out_view_type::non_const_type nc_view_type;
508 typedef Kokkos::View<const size_t*,
509 typename nc_view_type::array_layout,
510 Kokkos::HostSpace,
511 Kokkos::MemoryUnmanaged> in_view_type;
512 in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
513 nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
514 lclNumRows);
515 // DEEP_COPY REVIEW - HOST-TO-HOSTMIRROR
516 using exec_space = typename nc_view_type::execution_space;
517 Kokkos::deep_copy (exec_space(), numAllocPerRowOut, numAllocPerRowIn);
518 k_numAllocPerRow_ = numAllocPerRowOut;
519
520 resumeFill (params);
522 }
523
524
525 template <class LocalOrdinal, class GlobalOrdinal, class Node>
528 const Teuchos::RCP<const map_type>& rowMap,
529 const Teuchos::RCP<Teuchos::ParameterList>& params) :
530 dist_object_type (rowMap)
531 , rowMap_(rowMap)
532 , colMap_(originalGraph.colMap_)
533 , numAllocForAllRows_(originalGraph.numAllocForAllRows_)
534 , storageStatus_(originalGraph.storageStatus_)
535 , indicesAreAllocated_(originalGraph.indicesAreAllocated_)
536 , indicesAreLocal_(originalGraph.indicesAreLocal_)
537 , indicesAreSorted_(originalGraph.indicesAreSorted_)
538 {
539 staticAssertions();
540
541 int numRows = rowMap->getLocalNumElements();
542 size_t numNonZeros = originalGraph.rowPtrsPacked_host_(numRows);
543 auto rowsToUse = Kokkos::pair<size_t, size_t>(0, numRows+1);
544
545 rowPtrsUnpacked_dev_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_dev_, rowsToUse);
546 rowPtrsUnpacked_host_ = Kokkos::subview(originalGraph.rowPtrsUnpacked_host_, rowsToUse);
547
548 rowPtrsPacked_dev_ = Kokkos::subview(originalGraph.rowPtrsPacked_dev_, rowsToUse);
549 rowPtrsPacked_host_ = Kokkos::subview(originalGraph.rowPtrsPacked_host_, rowsToUse);
550
551 if (indicesAreLocal_) {
552 lclIndsUnpacked_wdv = local_inds_wdv_type(originalGraph.lclIndsUnpacked_wdv, 0, numNonZeros);
553 lclIndsPacked_wdv = local_inds_wdv_type(originalGraph.lclIndsPacked_wdv, 0, numNonZeros);
555 else {
556 gblInds_wdv = global_inds_wdv_type(originalGraph.gblInds_wdv, 0, numNonZeros);
557 }
558
560 }
561
562 template <class LocalOrdinal, class GlobalOrdinal, class Node>
564 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
565 const Teuchos::RCP<const map_type>& colMap,
566 const typename local_graph_device_type::row_map_type& rowPointers,
567 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
568 const Teuchos::RCP<Teuchos::ParameterList>& params) :
569 dist_object_type (rowMap)
570 , rowMap_(rowMap)
571 , colMap_(colMap)
572 , numAllocForAllRows_(0)
573 , storageStatus_(Details::STORAGE_1D_PACKED)
574 , indicesAreAllocated_(true)
575 , indicesAreLocal_(true)
576 {
577 staticAssertions ();
578 if (! params.is_null() && params->isParameter("sorted") &&
579 ! params->get<bool>("sorted")) {
580 indicesAreSorted_ = false;
581 }
582 else {
583 indicesAreSorted_ = true;
584 }
585 setAllIndices (rowPointers, columnIndices);
587 }
588
589 template <class LocalOrdinal, class GlobalOrdinal, class Node>
591 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
592 const Teuchos::RCP<const map_type>& colMap,
593 const Teuchos::ArrayRCP<size_t>& rowPointers,
594 const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
595 const Teuchos::RCP<Teuchos::ParameterList>& params) :
596 dist_object_type (rowMap)
597 , rowMap_ (rowMap)
598 , colMap_ (colMap)
599 , numAllocForAllRows_ (0)
600 , storageStatus_ (Details::STORAGE_1D_PACKED)
601 , indicesAreAllocated_ (true)
602 , indicesAreLocal_ (true)
603 {
604 staticAssertions ();
605 if (! params.is_null() && params->isParameter("sorted") &&
606 ! params->get<bool>("sorted")) {
607 indicesAreSorted_ = false;
608 }
609 else {
610 indicesAreSorted_ = true;
611 }
612 setAllIndices (rowPointers, columnIndices);
614 }
615
616 template <class LocalOrdinal, class GlobalOrdinal, class Node>
618 CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
619 const Teuchos::RCP<const map_type>& colMap,
620 const local_graph_device_type& k_local_graph_,
621 const Teuchos::RCP<Teuchos::ParameterList>& params)
622 : CrsGraph (k_local_graph_,
623 rowMap,
624 colMap,
625 Teuchos::null,
626 Teuchos::null,
627 params)
628 {}
629
630 template <class LocalOrdinal, class GlobalOrdinal, class Node>
632 CrsGraph (const local_graph_device_type& k_local_graph_,
633 const Teuchos::RCP<const map_type>& rowMap,
634 const Teuchos::RCP<const map_type>& colMap,
635 const Teuchos::RCP<const map_type>& domainMap,
636 const Teuchos::RCP<const map_type>& rangeMap,
637 const Teuchos::RCP<Teuchos::ParameterList>& params)
638 : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
639 , rowMap_ (rowMap)
640 , colMap_ (colMap)
641 , numAllocForAllRows_ (0)
642 , storageStatus_ (Details::STORAGE_1D_PACKED)
643 , indicesAreAllocated_ (true)
644 , indicesAreLocal_ (true)
645 {
646 staticAssertions();
647 const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
648
649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
650 colMap.is_null (), std::runtime_error,
651 ": The input column Map must be nonnull.");
652 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
653 k_local_graph_.numRows () != rowMap->getLocalNumElements (),
654 std::runtime_error,
655 ": The input row Map and the input local graph need to have the same "
656 "number of rows. The row Map claims " << rowMap->getLocalNumElements ()
657 << " row(s), but the local graph claims " << k_local_graph_.numRows ()
658 << " row(s).");
659
660 // NOTE (mfh 17 Mar 2014) getLocalNumRows() returns
661 // rowMap_->getLocalNumElements(), but it doesn't have to.
662 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
663 // k_local_graph_.numRows () != getLocalNumRows (), std::runtime_error,
664 // ": The input row Map and the input local graph need to have the same "
665 // "number of rows. The row Map claims " << getLocalNumRows () << " row(s), "
666 // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
668 lclIndsUnpacked_wdv.extent (0) != 0 || gblInds_wdv.extent (0) != 0, std::logic_error,
669 ": cannot have 1D data structures allocated.");
670
671 if(! params.is_null() && params->isParameter("sorted") &&
672 ! params->get<bool>("sorted")) {
673 indicesAreSorted_ = false;
674 }
675 else {
676 indicesAreSorted_ = true;
677 }
678
679 setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
680 rangeMap .is_null() ? rowMap_ : rangeMap);
681 Teuchos::Array<int> remotePIDs (0); // unused output argument
682 this->makeImportExport (remotePIDs, false);
683
684 lclIndsPacked_wdv = local_inds_wdv_type(k_local_graph_.entries);
685 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
686 this->setRowPtrs(k_local_graph_.row_map);
687
688 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
689
690 const bool callComputeGlobalConstants = params.get () == nullptr ||
691 params->get ("compute global constants", true);
692
693 if (callComputeGlobalConstants) {
694 this->computeGlobalConstants ();
695 }
696 this->fillComplete_ = true;
697 this->checkInternalState ();
698 }
699
700 template <class LocalOrdinal, class GlobalOrdinal, class Node>
702 CrsGraph (const local_graph_device_type& lclGraph,
703 const Teuchos::RCP<const map_type>& rowMap,
704 const Teuchos::RCP<const map_type>& colMap,
705 const Teuchos::RCP<const map_type>& domainMap,
706 const Teuchos::RCP<const map_type>& rangeMap,
707 const Teuchos::RCP<const import_type>& importer,
708 const Teuchos::RCP<const export_type>& exporter,
709 const Teuchos::RCP<Teuchos::ParameterList>& params) :
710 DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
711 rowMap_ (rowMap),
712 colMap_ (colMap),
713 rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
714 domainMap_ (domainMap.is_null () ? rowMap : domainMap),
715 importer_ (importer),
716 exporter_ (exporter),
717 numAllocForAllRows_ (0),
718 storageStatus_ (Details::STORAGE_1D_PACKED),
719 indicesAreAllocated_ (true),
720 indicesAreLocal_ (true)
721 {
722 staticAssertions();
723 const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_device_type,"
724 "Map,Map,Map,Map,Import,Export,params): ";
726 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
727 (colMap.is_null (), std::runtime_error,
728 "The input column Map must be nonnull.");
729
730 lclIndsPacked_wdv = local_inds_wdv_type(lclGraph.entries);
731 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
732 setRowPtrs(lclGraph.row_map);
733
734 set_need_sync_host_uvm_access(); // lclGraph_ potentially still in a kernel
735
736 if (! params.is_null() && params->isParameter("sorted") &&
737 ! params->get<bool>("sorted")) {
738 indicesAreSorted_ = false;
739 }
740 else {
741 indicesAreSorted_ = true;
742 }
743
744 const bool callComputeGlobalConstants =
745 params.get () == nullptr ||
746 params->get ("compute global constants", true);
747 if (callComputeGlobalConstants) {
748 this->computeGlobalConstants ();
749 }
750 fillComplete_ = true;
751 checkInternalState ();
752 }
753
754 template <class LocalOrdinal, class GlobalOrdinal, class Node>
755 Teuchos::RCP<const Teuchos::ParameterList>
757 getValidParameters () const
758 {
759 using Teuchos::RCP;
760 using Teuchos::ParameterList;
761 using Teuchos::parameterList;
762
763 RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
764
765 // Make a sublist for the Import.
766 RCP<ParameterList> importSublist = parameterList ("Import");
767
768 // FIXME (mfh 02 Apr 2012) We should really have the Import and
769 // Export objects fill in these lists. However, we don't want to
770 // create an Import or Export unless we need them. For now, we
771 // know that the Import and Export just pass the list directly to
772 // their Distributor, so we can create a Distributor here
773 // (Distributor's constructor is a lightweight operation) and have
774 // it fill in the list.
775
776 // Fill in Distributor default parameters by creating a
777 // Distributor and asking it to do the work.
778 Distributor distributor (rowMap_->getComm (), importSublist);
779 params->set ("Import", *importSublist, "How the Import performs communication.");
780
781 // Make a sublist for the Export. For now, it's a clone of the
782 // Import sublist. It's not a shallow copy, though, since we
783 // might like the Import to do communication differently than the
784 // Export.
785 params->set ("Export", *importSublist, "How the Export performs communication.");
786
787 return params;
788 }
789
790 template <class LocalOrdinal, class GlobalOrdinal, class Node>
791 void
793 setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
794 {
795 Teuchos::RCP<const Teuchos::ParameterList> validParams =
796 getValidParameters ();
797 params->validateParametersAndSetDefaults (*validParams);
798 this->setMyParamList (params);
799 }
801 template <class LocalOrdinal, class GlobalOrdinal, class Node>
804 getGlobalNumRows () const
805 {
806 return rowMap_->getGlobalNumElements ();
807 }
808
809 template <class LocalOrdinal, class GlobalOrdinal, class Node>
812 getGlobalNumCols () const
813 {
814 const char tfecfFuncName[] = "getGlobalNumCols: ";
815 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
816 ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
817 "The graph does not have a domain Map. You may not call this method in "
818 "that case.");
819 return getDomainMap ()->getGlobalNumElements ();
820 }
821
822
823 template <class LocalOrdinal, class GlobalOrdinal, class Node>
824 size_t
826 getLocalNumRows () const
827 {
828 return this->rowMap_.is_null () ?
829 static_cast<size_t> (0) :
830 this->rowMap_->getLocalNumElements ();
831 }
833
834 template <class LocalOrdinal, class GlobalOrdinal, class Node>
835 size_t
837 getLocalNumCols () const
838 {
839 const char tfecfFuncName[] = "getLocalNumCols: ";
840 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
841 ! hasColMap (), std::runtime_error,
842 "The graph does not have a column Map. You may not call this method "
843 "unless the graph has a column Map. This requires either that a custom "
844 "column Map was given to the constructor, or that fillComplete() has "
845 "been called.");
846 return colMap_.is_null () ? static_cast<size_t> (0) :
847 colMap_->getLocalNumElements ();
848 }
849
850
851
852 template <class LocalOrdinal, class GlobalOrdinal, class Node>
853 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
855 getRowMap () const
856 {
857 return rowMap_;
858 }
859
860 template <class LocalOrdinal, class GlobalOrdinal, class Node>
861 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
864 {
865 return colMap_;
866 }
867
868 template <class LocalOrdinal, class GlobalOrdinal, class Node>
869 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
871 getDomainMap () const
872 {
873 return domainMap_;
874 }
875
876 template <class LocalOrdinal, class GlobalOrdinal, class Node>
877 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
879 getRangeMap () const
881 return rangeMap_;
882 }
884 template <class LocalOrdinal, class GlobalOrdinal, class Node>
885 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
887 getImporter () const
888 {
889 return importer_;
890 }
891
892 template <class LocalOrdinal, class GlobalOrdinal, class Node>
893 Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
896 {
897 return exporter_;
898 }
899
900 template <class LocalOrdinal, class GlobalOrdinal, class Node>
901 bool
903 hasColMap () const
904 {
905 return ! colMap_.is_null ();
907
908 template <class LocalOrdinal, class GlobalOrdinal, class Node>
909 bool
911 isStorageOptimized () const
912 {
913 // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
914 // getLocalNumRows() is zero?
915
916 const bool isOpt = indicesAreAllocated_ &&
917 k_numRowEntries_.extent (0) == 0 &&
918 getLocalNumRows () > 0;
919
920 return isOpt;
921 }
923
924 template <class LocalOrdinal, class GlobalOrdinal, class Node>
928 {
929 const char tfecfFuncName[] = "getGlobalNumEntries: ";
930 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
931 (! this->haveGlobalConstants_, std::logic_error,
932 "The graph does not have global constants computed, "
933 "but the user has requested them.");
934
935 return globalNumEntries_;
936 }
937
939 template <class LocalOrdinal, class GlobalOrdinal, class Node>
940 size_t
942 getLocalNumEntries () const
943 {
944 typedef LocalOrdinal LO;
945
946 if (this->indicesAreAllocated_) {
947 const LO lclNumRows = this->getLocalNumRows ();
948 if (lclNumRows == 0) {
949 return static_cast<size_t> (0);
950 }
951 else {
952 // Avoid the "*this capture" issue by creating a local Kokkos::View.
953 auto numEntPerRow = this->k_numRowEntries_;
954 const LO numNumEntPerRow = numEntPerRow.extent (0);
955 if (numNumEntPerRow == 0) {
956 if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
957 static_cast<LO> (lclNumRows + 1)) {
958 return static_cast<size_t> (0);
959 }
960 else {
961 return this->rowPtrsPacked_host_(lclNumRows);
962 }
963 }
964 else { // k_numRowEntries_ is populated
965 // k_numRowEntries_ is actually be a host View, so we run
966 // the sum in its native execution space. This also means
967 // that we can use explicit capture (which could perhaps
968 // improve build time) instead of KOKKOS_LAMBDA, and avoid
969 // any CUDA build issues with trying to run a __device__ -
970 // only function on host.
971 typedef typename num_row_entries_type::execution_space
972 host_exec_space;
973 typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
974
975 const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
976 lclNumRows :
977 numNumEntPerRow;
978 size_t nodeNumEnt = 0;
979 Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
980 range_type (0, upperLoopBound),
981 [=] (const LO& k, size_t& lclSum) {
982 lclSum += numEntPerRow(k);
983 }, nodeNumEnt);
984 return nodeNumEnt;
985 }
987 }
988 else { // nothing allocated on this process, so no entries
989 return static_cast<size_t> (0);
990 }
991 }
992
993 template <class LocalOrdinal, class GlobalOrdinal, class Node>
997 {
998 const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
999 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1000 (! this->haveGlobalConstants_, std::logic_error,
1001 "The graph does not have global constants computed, "
1002 "but the user has requested them.");
1003
1004 return globalMaxNumRowEntries_;
1005 }
1006
1007 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1008 size_t
1011 {
1012 return nodeMaxNumRowEntries_;
1013 }
1014
1015 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1016 bool
1018 isFillComplete () const
1019 {
1020 return fillComplete_;
1021 }
1022
1023 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1024 bool
1026 isFillActive () const
1027 {
1028 return ! fillComplete_;
1029 }
1030
1031
1032 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1033 bool
1035 isLocallyIndexed () const
1036 {
1037 return indicesAreLocal_;
1038 }
1039
1040 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1041 bool
1044 {
1045 return indicesAreGlobal_;
1047
1048 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1049 size_t
1052 {
1053 typedef LocalOrdinal LO;
1054
1055 if (this->indicesAreAllocated_) {
1056 const LO lclNumRows = this->getLocalNumRows ();
1057 if (lclNumRows == 0) {
1058 return static_cast<size_t> (0);
1059 }
1060 else if (storageStatus_ == Details::STORAGE_1D_PACKED) {
1061 if (static_cast<LO> (this->rowPtrsPacked_dev_.extent (0)) <
1062 static_cast<LO> (lclNumRows + 1)) {
1063 return static_cast<size_t> (0);
1064 }
1065 else {
1066 return this->rowPtrsPacked_host_(lclNumRows);
1068 }
1069 else if (storageStatus_ == Details::STORAGE_1D_UNPACKED) {
1070 if (rowPtrsUnpacked_host_.extent (0) == 0) {
1071 return static_cast<size_t> (0);
1072 }
1073 else {
1074 return rowPtrsUnpacked_host_(lclNumRows);
1075 }
1077 else {
1078 return static_cast<size_t> (0);
1079 }
1080 }
1081 else {
1082 return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1083 }
1084 }
1085
1086 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1087 Teuchos::RCP<const Teuchos::Comm<int> >
1089 getComm () const
1090 {
1091 return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1092 }
1093
1094 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1095 GlobalOrdinal
1097 getIndexBase () const
1098 {
1099 return rowMap_->getIndexBase ();
1100 }
1101
1102 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1103 bool
1105 indicesAreAllocated () const
1106 {
1107 return indicesAreAllocated_;
1108 }
1109
1110 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1111 bool
1113 isSorted () const
1114 {
1115 return indicesAreSorted_;
1116 }
1117
1118 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1119 bool
1121 isMerged () const
1122 {
1123 return noRedundancies_;
1125
1126 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1127 void
1130 {
1131 // FIXME (mfh 07 May 2013) How do we know that the change
1132 // introduced a redundancy, or even that it invalidated the sorted
1133 // order of indices? CrsGraph has always made this conservative
1134 // guess. It could be a bit costly to check at insertion time,
1135 // though.
1136 indicesAreSorted_ = false;
1137 noRedundancies_ = false;
1138
1139 // We've modified the graph, so we'll have to recompute local
1140 // constants like the number of diagonal entries on this process.
1141 haveLocalConstants_ = false;
1142 }
1143
1144 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1145 void
1147 allocateIndices (const ELocalGlobal lg, const bool verbose)
1148 {
1150 using Teuchos::arcp;
1151 using Teuchos::Array;
1152 using Teuchos::ArrayRCP;
1153 using std::endl;
1154 typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1155 typedef typename local_graph_device_type::row_map_type::non_const_type
1156 non_const_row_map_type;
1157 const char tfecfFuncName[] = "allocateIndices: ";
1158 const char suffix[] =
1159 " Please report this bug to the Tpetra developers.";
1160 ProfilingRegion profRegion("Tpetra::CrsGraph::allocateIndices");
1161
1162 std::unique_ptr<std::string> prefix;
1163 if (verbose) {
1164 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
1165 std::ostringstream os;
1166 os << *prefix << "Start: lg="
1167 << (lg == GlobalIndices ? "GlobalIndices" : "LocalIndices")
1168 << ", numRows: " << this->getLocalNumRows() << endl;
1169 std::cerr << os.str();
1170 }
1171
1172 // This is a protected function, only callable by us. If it was
1173 // called incorrectly, it is our fault. That's why the tests
1174 // below throw std::logic_error instead of std::invalid_argument.
1175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1176 (isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1177 ": The graph is locally indexed, but Tpetra code is calling "
1178 "this method with lg=GlobalIndices." << suffix);
1179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1180 (isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1181 ": The graph is globally indexed, but Tpetra code is calling "
1182 "this method with lg=LocalIndices." << suffix);
1183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1184 (indicesAreAllocated (), std::logic_error, ": The graph's "
1185 "indices are already allocated, but Tpetra is calling "
1186 "allocateIndices again." << suffix);
1187 const size_t numRows = this->getLocalNumRows ();
1188
1189 //
1190 // STATIC ALLOCATION PROFILE
1191 //
1192 {
1193 if (verbose) {
1194 std::ostringstream os;
1195 os << *prefix << "Allocate k_rowPtrs: " << (numRows+1) << endl;
1196 std::cerr << os.str();
1197 }
1198 non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1199
1200 if (this->k_numAllocPerRow_.extent (0) != 0) {
1201 // It's OK to throw std::invalid_argument here, because we
1202 // haven't incurred any side effects yet. Throwing that
1203 // exception (and not, say, std::logic_error) implies that the
1204 // instance can recover.
1205 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1206 (this->k_numAllocPerRow_.extent (0) != numRows,
1207 std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1208 "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1209 << ", but its length != numRows = " << numRows << ".");
1210
1211 // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1212 // we want to compute here) lives on device. That's OK;
1213 // computeOffsetsFromCounts can handle this case.
1215
1216 // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1217 // doesn't attempt to check its input for "invalid" flag
1218 // values. For now, we omit that feature of the sequential
1219 // code disabled below.
1220 computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1221 }
1222 else {
1223 // It's OK to throw std::invalid_argument here, because we
1224 // haven't incurred any side effects yet. Throwing that
1225 // exception (and not, say, std::logic_error) implies that the
1226 // instance can recover.
1227 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1228 (this->numAllocForAllRows_ ==
1229 Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1230 std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1231 "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1232 Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1233
1235 computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1236 }
1237
1238 // "Commit" the resulting row offsets.
1239 setRowPtrsUnpacked(k_rowPtrs);
1240 }
1241
1242 const size_type numInds = rowPtrsUnpacked_host_(numRows);
1243 if (lg == LocalIndices) {
1244 if (verbose) {
1245 std::ostringstream os;
1246 os << *prefix << "Allocate local column indices "
1247 "lclIndsUnpacked_wdv: " << numInds << endl;
1248 std::cerr << os.str();
1249 }
1250 lclIndsUnpacked_wdv = local_inds_wdv_type (
1251 local_inds_dualv_type("Tpetra::CrsGraph::lclInd",numInds));
1252 }
1253 else {
1254 if (verbose) {
1255 std::ostringstream os;
1256 os << *prefix << "Allocate global column indices "
1257 "gblInds_wdv: " << numInds << endl;
1258 std::cerr << os.str();
1259 }
1260 gblInds_wdv = global_inds_wdv_type (
1261 global_inds_dualv_type("Tpetra::CrsGraph::gblInd",numInds));
1262 }
1263 storageStatus_ = Details::STORAGE_1D_UNPACKED;
1264
1265 this->indicesAreLocal_ = (lg == LocalIndices);
1266 this->indicesAreGlobal_ = (lg == GlobalIndices);
1267
1268 if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1269 using Kokkos::ViewAllocateWithoutInitializing;
1270 typedef decltype (k_numRowEntries_) row_ent_type;
1271 const char label[] = "Tpetra::CrsGraph::numRowEntries";
1272 if (verbose) {
1273 std::ostringstream os;
1274 os << *prefix << "Allocate k_numRowEntries_: " << numRows
1275 << endl;
1276 std::cerr << os.str();
1277 }
1278 row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1279 // DEEP_COPY REVIEW - VALUE-TO-HOSTMIRROR
1280 Kokkos::deep_copy (execution_space(), numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1281 Kokkos::fence(); // TODO: Need to understand downstream failure points and move this fence.
1282 this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1283 }
1284
1285 // Once indices are allocated, CrsGraph needs to free this information.
1286 this->numAllocForAllRows_ = 0;
1287 this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1288 this->indicesAreAllocated_ = true;
1289
1290 try {
1291 this->checkInternalState ();
1292 }
1293 catch (std::logic_error& e) {
1294 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1295 (true, std::logic_error, "At end of allocateIndices, "
1296 "checkInternalState threw std::logic_error: "
1297 << e.what ());
1298 }
1299 catch (std::exception& e) {
1300 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1301 (true, std::runtime_error, "At end of allocateIndices, "
1302 "checkInternalState threw std::exception: "
1303 << e.what ());
1304 }
1305 catch (...) {
1306 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1307 (true, std::runtime_error, "At end of allocateIndices, "
1308 "checkInternalState threw an exception "
1309 "not a subclass of std::exception.");
1310 }
1312 if (verbose) {
1313 std::ostringstream os;
1314 os << *prefix << "Done" << endl;
1315 std::cerr << os.str();
1316 }
1317 }
1318
1319 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1320 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1321 local_inds_dualv_type::t_host::const_type
1323 getLocalIndsViewHost (const RowInfo& rowinfo) const
1324 {
1325 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1326 return typename local_inds_dualv_type::t_host::const_type ();
1327 else
1328 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1329 rowinfo.allocSize,
1330 Access::ReadOnly);
1331 }
1332
1333 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1335 local_inds_dualv_type::t_host
1337 getLocalIndsViewHostNonConst (const RowInfo& rowinfo)
1338 {
1339 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1340 return typename local_inds_dualv_type::t_host ();
1341 else
1342 return lclIndsUnpacked_wdv.getHostSubview(rowinfo.offset1D,
1343 rowinfo.allocSize,
1344 Access::ReadWrite);
1345 }
1346
1347 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1349 global_inds_dualv_type::t_host::const_type
1351 getGlobalIndsViewHost (const RowInfo& rowinfo) const
1352 {
1353 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1354 return typename global_inds_dualv_type::t_host::const_type ();
1355 else
1356 return gblInds_wdv.getHostSubview(rowinfo.offset1D,
1357 rowinfo.allocSize,
1358 Access::ReadOnly);
1359 }
1361 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1363 local_inds_dualv_type::t_dev::const_type
1365 getLocalIndsViewDevice (const RowInfo& rowinfo) const
1366 {
1367 if (rowinfo.allocSize == 0 || lclIndsUnpacked_wdv.extent(0) == 0)
1368 return typename local_inds_dualv_type::t_dev::const_type ();
1369 else
1370 return lclIndsUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
1371 rowinfo.allocSize,
1372 Access::ReadOnly);
1373 }
1374
1375 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1377 global_inds_dualv_type::t_dev::const_type
1379 getGlobalIndsViewDevice (const RowInfo& rowinfo) const
1380 {
1381 if (rowinfo.allocSize == 0 || gblInds_wdv.extent(0) == 0)
1382 return typename global_inds_dualv_type::t_dev::const_type ();
1383 else
1384 return gblInds_wdv.getDeviceSubview(rowinfo.offset1D,
1385 rowinfo.allocSize,
1386 Access::ReadOnly);
1387 }
1388
1389
1390 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1391 RowInfo
1393 getRowInfo (const LocalOrdinal myRow) const
1394 {
1395 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1396 RowInfo ret;
1397 if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1398 ret.localRow = STINV;
1399 ret.allocSize = 0;
1400 ret.numEntries = 0;
1401 ret.offset1D = STINV;
1402 return ret;
1403 }
1404
1405 ret.localRow = static_cast<size_t> (myRow);
1406 if (this->indicesAreAllocated ()) {
1407 // Offsets tell us the allocation size in this case.
1408 if (rowPtrsUnpacked_host_.extent (0) == 0) {
1409 ret.offset1D = 0;
1410 ret.allocSize = 0;
1411 }
1412 else {
1413 ret.offset1D = rowPtrsUnpacked_host_(myRow);
1414 ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1415 }
1417 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1418 ret.allocSize :
1419 this->k_numRowEntries_(myRow);
1420 }
1421 else { // haven't performed allocation yet; probably won't hit this code
1422 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1423 // allocate, rather than doing lazy allocation at first insert.
1424 // This will make k_numAllocPerRow_ obsolete.
1425 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1426 this->k_numAllocPerRow_(myRow) : // this is a host View
1427 this->numAllocForAllRows_;
1428 ret.numEntries = 0;
1429 ret.offset1D = STINV;
1430 }
1431
1432 return ret;
1433 }
1434
1435
1436 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1437 RowInfo
1439 getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
1440 {
1441 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1443 if (this->rowMap_.is_null ()) {
1444 ret.localRow = STINV;
1445 ret.allocSize = 0;
1446 ret.numEntries = 0;
1447 ret.offset1D = STINV;
1448 return ret;
1449 }
1450 const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
1451 if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
1452 ret.localRow = STINV;
1453 ret.allocSize = 0;
1454 ret.numEntries = 0;
1455 ret.offset1D = STINV;
1456 return ret;
1457 }
1459 ret.localRow = static_cast<size_t> (myRow);
1460 if (this->indicesAreAllocated ()) {
1461 // graph data structures have the info that we need
1462 //
1463 // if static graph, offsets tell us the allocation size
1464 if (rowPtrsUnpacked_host_.extent (0) == 0) {
1465 ret.offset1D = 0;
1466 ret.allocSize = 0;
1467 }
1468 else {
1469 ret.offset1D = rowPtrsUnpacked_host_(myRow);
1470 ret.allocSize = rowPtrsUnpacked_host_(myRow+1) - rowPtrsUnpacked_host_(myRow);
1471 }
1472
1473 ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1474 ret.allocSize :
1475 this->k_numRowEntries_(myRow);
1476 }
1477 else { // haven't performed allocation yet; probably won't hit this code
1478 // FIXME (mfh 07 Aug 2014) We want graph's constructors to
1479 // allocate, rather than doing lazy allocation at first insert.
1480 // This will make k_numAllocPerRow_ obsolete.
1481 ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
1482 this->k_numAllocPerRow_(myRow) : // this is a host View
1483 this->numAllocForAllRows_;
1484 ret.numEntries = 0;
1485 ret.offset1D = STINV;
1486 }
1487
1488 return ret;
1489 }
1491
1492 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1493 void
1495 staticAssertions () const
1496 {
1497 using Teuchos::OrdinalTraits;
1498 typedef LocalOrdinal LO;
1499 typedef GlobalOrdinal GO;
1500 typedef global_size_t GST;
1501
1502 // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
1503 // This is so that we can store local indices in the memory
1504 // formerly occupied by global indices.
1505 static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
1506 "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
1507 // Assumption: max(size_t) >= max(LocalOrdinal)
1508 // This is so that we can represent any LocalOrdinal as a size_t.
1509 static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
1510 "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
1511 static_assert (sizeof(GST) >= sizeof(size_t),
1512 "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
1513
1514 // FIXME (mfh 30 Sep 2015) We're not using
1515 // Teuchos::CompileTimeAssert any more. Can we do these checks
1516 // with static_assert?
1517
1518 // can't call max() with CompileTimeAssert, because it isn't a
1519 // constant expression; will need to make this a runtime check
1520 const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
1521 "given template arguments: size assumptions are not valid.";
1522 TEUCHOS_TEST_FOR_EXCEPTION(
1523 static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
1524 std::runtime_error, msg);
1525 TEUCHOS_TEST_FOR_EXCEPTION(
1526 static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
1527 std::runtime_error, msg);
1528 TEUCHOS_TEST_FOR_EXCEPTION(
1529 static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
1530 std::runtime_error, msg);
1531 TEUCHOS_TEST_FOR_EXCEPTION(
1532 Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
1533 std::runtime_error, msg);
1534 }
1535
1536
1537 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1538 size_t
1540 insertIndices (RowInfo& rowinfo,
1541 const SLocalGlobalViews &newInds,
1542 const ELocalGlobal lg,
1543 const ELocalGlobal I)
1544 {
1545 using Teuchos::ArrayView;
1546 typedef LocalOrdinal LO;
1547 typedef GlobalOrdinal GO;
1548 const char tfecfFuncName[] = "insertIndices: ";
1549
1550 size_t oldNumEnt = 0;
1551 if (debug_) {
1552 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1553 (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
1554 "lg must be either GlobalIndices or LocalIndices.");
1555 oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
1556 }
1557
1558 size_t numNewInds = 0;
1559 if (lg == GlobalIndices) { // input indices are global
1560 ArrayView<const GO> new_ginds = newInds.ginds;
1561 numNewInds = new_ginds.size();
1562 if (I == GlobalIndices) { // store global indices
1563 auto gind_view = gblInds_wdv.getHostView(Access::ReadWrite);
1564 if (debug_) {
1565 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1566 (static_cast<size_t> (gind_view.size ()) <
1567 rowinfo.numEntries + numNewInds, std::logic_error,
1568 "gind_view.size() = " << gind_view.size ()
1569 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1570 << ") + numNewInds (= " << numNewInds << ").");
1571 }
1572 GO* const gblColInds_out = gind_view.data () + rowinfo.offset1D
1573 + rowinfo.numEntries;
1574 for (size_t k = 0; k < numNewInds; ++k) {
1575 gblColInds_out[k] = new_ginds[k];
1576 }
1577 }
1578 else if (I == LocalIndices) { // store local indices
1579 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1580 if (debug_) {
1581 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1582 (static_cast<size_t> (lind_view.size ()) <
1583 rowinfo.numEntries + numNewInds, std::logic_error,
1584 "lind_view.size() = " << lind_view.size ()
1585 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1586 << ") + numNewInds (= " << numNewInds << ").");
1587 }
1588 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1589 + rowinfo.numEntries;
1590 for (size_t k = 0; k < numNewInds; ++k) {
1591 lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
1592 }
1593 }
1594 }
1595 else if (lg == LocalIndices) { // input indices are local
1596 ArrayView<const LO> new_linds = newInds.linds;
1597 numNewInds = new_linds.size();
1598 if (I == LocalIndices) { // store local indices
1599 auto lind_view = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1600 if (debug_) {
1601 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1602 (static_cast<size_t> (lind_view.size ()) <
1603 rowinfo.numEntries + numNewInds, std::logic_error,
1604 "lind_view.size() = " << lind_view.size ()
1605 << " < rowinfo.numEntries (= " << rowinfo.numEntries
1606 << ") + numNewInds (= " << numNewInds << ").");
1607 }
1608 LO* const lclColInds_out = lind_view.data () + rowinfo.offset1D
1609 + rowinfo.numEntries;
1610 for (size_t k = 0; k < numNewInds; ++k) {
1611 lclColInds_out[k] = new_linds[k];
1612 }
1613 }
1614 else if (I == GlobalIndices) {
1615 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1616 (true, std::logic_error, "The case where the input indices are local "
1617 "and the indices to write are global (lg=LocalIndices, I="
1618 "GlobalIndices) is not implemented, because it does not make sense."
1619 << std::endl << "If you have correct local column indices, that "
1620 "means the graph has a column Map. In that case, you should be "
1621 "storing local indices.");
1622 }
1623 }
1624
1625 rowinfo.numEntries += numNewInds;
1626 this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
1627 this->setLocallyModified ();
1629 if (debug_) {
1630 const size_t chkNewNumEnt =
1631 this->getNumEntriesInLocalRow (rowinfo.localRow);
1632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1633 (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
1634 "chkNewNumEnt = " << chkNewNumEnt
1635 << " != oldNumEnt (= " << oldNumEnt
1636 << ") + numNewInds (= " << numNewInds << ").");
1637 }
1638
1639 return numNewInds;
1640 }
1641
1642 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1643 size_t
1645 insertGlobalIndicesImpl (const LocalOrdinal lclRow,
1646 const GlobalOrdinal inputGblColInds[],
1647 const size_t numInputInds)
1648 {
1649 return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
1650 inputGblColInds, numInputInds);
1651 }
1653 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1654 size_t
1656 insertGlobalIndicesImpl (const RowInfo& rowInfo,
1657 const GlobalOrdinal inputGblColInds[],
1658 const size_t numInputInds,
1659 std::function<void(const size_t, const size_t, const size_t)> fun)
1660 {
1662 using Kokkos::View;
1663 using Kokkos::subview;
1664 using Kokkos::MemoryUnmanaged;
1665 using Teuchos::ArrayView;
1666 using LO = LocalOrdinal;
1667 using GO = GlobalOrdinal;
1668 const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
1669 const LO lclRow = static_cast<LO> (rowInfo.localRow);
1670
1671 auto numEntries = rowInfo.numEntries;
1672 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1673 inp_view_type inputInds(inputGblColInds, numInputInds);
1674 size_t numInserted;
1675 {
1676 auto gblIndsHostView = this->gblInds_wdv.getHostView(Access::ReadWrite);
1677 numInserted = Details::insertCrsIndices(lclRow, this->rowPtrsUnpacked_host_,
1678 gblIndsHostView,
1679 numEntries, inputInds, fun);
1680 }
1681
1682 const bool insertFailed =
1683 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1684 if(insertFailed) {
1685 constexpr size_t ONE (1);
1686 const int myRank = this->getComm()->getRank();
1687 std::ostringstream os;
1688
1689 os << "Proc " << myRank << ": Not enough capacity to insert "
1690 << numInputInds
1691 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1692 << " into local row " << lclRow << ", which currently has "
1693 << rowInfo.numEntries
1694 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1695 << " and total allocation size " << rowInfo.allocSize
1696 << ". ";
1697 const size_t maxNumToPrint =
1699 ArrayView<const GO> inputGblColIndsView(inputGblColInds,
1700 numInputInds);
1701 verbosePrintArray(os, inputGblColIndsView, "Input global "
1702 "column indices", maxNumToPrint);
1703 os << ", ";
1704 auto curGblColInds = getGlobalIndsViewHost(rowInfo);
1705 ArrayView<const GO> curGblColIndsView(curGblColInds.data(),
1706 rowInfo.numEntries);
1707 verbosePrintArray(os, curGblColIndsView, "Current global "
1708 "column indices", maxNumToPrint);
1709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1710 (true, std::runtime_error, os.str());
1711 }
1712
1713 this->k_numRowEntries_(lclRow) += numInserted;
1714
1715 this->setLocallyModified();
1716 return numInserted;
1717 }
1718
1719
1720 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1721 void
1722 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1723 insertLocalIndicesImpl (const LocalOrdinal myRow,
1724 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1725 std::function<void(const size_t, const size_t, const size_t)> fun)
1726 {
1727 using Kokkos::MemoryUnmanaged;
1728 using Kokkos::subview;
1729 using Kokkos::View;
1730 using LO = LocalOrdinal;
1731 const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
1732
1733 const RowInfo rowInfo = this->getRowInfo(myRow);
1734
1735 size_t numNewInds = 0;
1736 size_t newNumEntries = 0;
1737
1738 auto numEntries = rowInfo.numEntries;
1739 // Note: Teuchos::ArrayViews are in HostSpace
1740 using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
1741 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1742 size_t numInserted = 0;
1743 {
1744 auto lclInds = lclIndsUnpacked_wdv.getHostView(Access::ReadWrite);
1745 numInserted = Details::insertCrsIndices(myRow, rowPtrsUnpacked_host_, lclInds,
1746 numEntries, inputInds, fun);
1747 }
1748
1749 const bool insertFailed =
1750 numInserted == Teuchos::OrdinalTraits<size_t>::invalid();
1751 if(insertFailed) {
1752 constexpr size_t ONE (1);
1753 const size_t numInputInds(indices.size());
1754 const int myRank = this->getComm()->getRank();
1755 std::ostringstream os;
1756 os << "On MPI Process " << myRank << ": Not enough capacity to "
1757 "insert " << numInputInds
1758 << " ind" << (numInputInds != ONE ? "ices" : "ex")
1759 << " into local row " << myRow << ", which currently has "
1760 << rowInfo.numEntries
1761 << " entr" << (rowInfo.numEntries != ONE ? "ies" : "y")
1762 << " and total allocation size " << rowInfo.allocSize << ".";
1763 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1764 (true, std::runtime_error, os.str());
1765 }
1766 numNewInds = numInserted;
1767 newNumEntries = rowInfo.numEntries + numNewInds;
1768
1769 this->k_numRowEntries_(myRow) += numNewInds;
1770 this->setLocallyModified ();
1771
1772 if (debug_) {
1773 const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
1774 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1775 (chkNewNumEntries != newNumEntries, std::logic_error,
1776 "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
1777 << " != newNumEntries = " << newNumEntries
1778 << ". Please report this bug to the Tpetra developers.");
1779 }
1780 }
1781
1782 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1783 size_t
1785 findGlobalIndices(const RowInfo& rowInfo,
1786 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1787 std::function<void(const size_t, const size_t, const size_t)> fun) const
1788 {
1789 using GO = GlobalOrdinal;
1790 using Kokkos::View;
1791 using Kokkos::MemoryUnmanaged;
1792 auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
1793
1794 using inp_view_type = View<const GO*, Kokkos::HostSpace, MemoryUnmanaged>;
1795 inp_view_type inputInds(indices.getRawPtr(), indices.size());
1796
1797 size_t numFound = 0;
1798 LocalOrdinal lclRow = rowInfo.localRow;
1799 if (this->isLocallyIndexed())
1800 {
1801 if (this->colMap_.is_null())
1802 return invalidCount;
1803 const auto& colMap = *(this->colMap_);
1804 auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
1805 numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1806 rowInfo.numEntries,
1807 lclIndsUnpacked_wdv.getHostView(Access::ReadOnly), inputInds, map, fun);
1808 }
1809 else if (this->isGloballyIndexed())
1810 {
1811 numFound = Details::findCrsIndices(lclRow, rowPtrsUnpacked_host_,
1812 rowInfo.numEntries,
1813 gblInds_wdv.getHostView(Access::ReadOnly), inputInds, fun);
1814 }
1815 return numFound;
1817
1818
1819 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1820 size_t
1822 sortAndMergeRowIndices (const RowInfo& rowInfo,
1823 const bool sorted,
1824 const bool merged)
1825 {
1826 const size_t origNumEnt = rowInfo.numEntries;
1827 if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
1828 origNumEnt != 0) {
1829 auto lclColInds = this->getLocalIndsViewHostNonConst (rowInfo);
1830
1831 LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
1832 if (! sorted) {
1833 std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
1834 }
1835
1836 if (! merged) {
1837 LocalOrdinal* const beg = lclColIndsRaw;
1838 LocalOrdinal* const end = beg + rowInfo.numEntries;
1839 LocalOrdinal* const newend = std::unique (beg, end);
1840 const size_t newNumEnt = newend - beg;
1841
1842 // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
1843 this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
1844 return origNumEnt - newNumEnt; // the number of duplicates in the row
1846 else {
1847 return static_cast<size_t> (0); // assume no duplicates
1848 }
1849 }
1850 else {
1851 return static_cast<size_t> (0); // no entries in the row
1852 }
1853 }
1854
1855
1856 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1857 void
1859 setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
1860 const Teuchos::RCP<const map_type>& rangeMap)
1861 {
1862 // simple pointer comparison for equality
1863 if (domainMap_ != domainMap) {
1864 domainMap_ = domainMap;
1865 importer_ = Teuchos::null;
1866 }
1867 if (rangeMap_ != rangeMap) {
1868 rangeMap_ = rangeMap;
1869 exporter_ = Teuchos::null;
1870 }
1871 }
1872
1874 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1875 void
1878 {
1879 const auto INV = Teuchos::OrdinalTraits<global_size_t>::invalid();
1880
1881 globalNumEntries_ = INV;
1882 globalMaxNumRowEntries_ = INV;
1883 haveGlobalConstants_ = false;
1884 }
1885
1886
1887 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1888 void
1890 checkInternalState () const
1891 {
1892 if (debug_) {
1893 using std::endl;
1894 const char tfecfFuncName[] = "checkInternalState: ";
1895 const char suffix[] = " Please report this bug to the Tpetra developers.";
1896
1897 std::unique_ptr<std::string> prefix;
1898 if (verbose_) {
1899 prefix = this->createPrefix("CrsGraph", "checkInternalState");
1900 std::ostringstream os;
1901 os << *prefix << "Start" << endl;
1902 std::cerr << os.str();
1903 }
1904
1905 const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
1906 //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
1907 // check the internal state of this data structure
1908 // this is called by numerous state-changing methods, in a debug build, to ensure that the object
1909 // always remains in a valid state
1910
1911 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1912 (this->rowMap_.is_null (), std::logic_error,
1913 "Row Map is null." << suffix);
1914 // This may access the row Map, so we need to check first (above)
1915 // whether the row Map is null.
1916 const LocalOrdinal lclNumRows =
1917 static_cast<LocalOrdinal> (this->getLocalNumRows ());
1918
1919 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1920 (this->isFillActive () == this->isFillComplete (), std::logic_error,
1921 "Graph cannot be both fill active and fill complete." << suffix);
1922 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1923 (this->isFillComplete () &&
1924 (this->colMap_.is_null () ||
1925 this->rangeMap_.is_null () ||
1926 this->domainMap_.is_null ()),
1927 std::logic_error,
1928 "Graph is full complete, but at least one of {column, range, domain} "
1929 "Map is null." << suffix);
1930 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1931 (this->isStorageOptimized () && ! this->indicesAreAllocated (),
1932 std::logic_error, "Storage is optimized, but indices are not "
1933 "allocated, not even trivially." << suffix);
1934
1935 size_t nodeAllocSize = 0;
1936 try {
1937 nodeAllocSize = this->getLocalAllocationSize ();
1938 }
1939 catch (std::logic_error& e) {
1940 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1941 (true, std::runtime_error, "getLocalAllocationSize threw "
1942 "std::logic_error: " << e.what ());
1943 }
1944 catch (std::exception& e) {
1945 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1946 (true, std::runtime_error, "getLocalAllocationSize threw an "
1947 "std::exception: " << e.what ());
1948 }
1949 catch (...) {
1950 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1951 (true, std::runtime_error, "getLocalAllocationSize threw an exception "
1952 "not a subclass of std::exception.");
1954
1955 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1956 (this->isStorageOptimized () &&
1957 nodeAllocSize != this->getLocalNumEntries (),
1958 std::logic_error, "Storage is optimized, but "
1959 "this->getLocalAllocationSize() = " << nodeAllocSize
1960 << " != this->getLocalNumEntries() = " << this->getLocalNumEntries ()
1961 << "." << suffix);
1962 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1963 (! this->haveGlobalConstants_ &&
1964 (this->globalNumEntries_ != GSTI ||
1965 this->globalMaxNumRowEntries_ != GSTI),
1966 std::logic_error, "Graph claims not to have global constants, but "
1967 "some of the global constants are not marked as invalid." << suffix);
1968 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1969 (this->haveGlobalConstants_ &&
1970 (this->globalNumEntries_ == GSTI ||
1971 this->globalMaxNumRowEntries_ == GSTI),
1972 std::logic_error, "Graph claims to have global constants, but "
1973 "some of them are marked as invalid." << suffix);
1974 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1975 (this->haveGlobalConstants_ &&
1976 (this->globalNumEntries_ < this->getLocalNumEntries () ||
1977 this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
1978 std::logic_error, "Graph claims to have global constants, and "
1979 "all of the values of the global constants are valid, but "
1980 "some of the local constants are greater than "
1981 "their corresponding global constants." << suffix);
1982 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1983 (this->indicesAreAllocated () &&
1984 (this->numAllocForAllRows_ != 0 ||
1985 this->k_numAllocPerRow_.extent (0) != 0),
1986 std::logic_error, "The graph claims that its indices are allocated, but "
1987 "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
1988 "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
1989 "the graph is supposed to release its \"allocation specifications\" "
1990 "when it allocates its indices." << suffix);
1991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1992 (rowPtrsUnpacked_host_.extent(0) != rowPtrsUnpacked_dev_.extent(0),
1993 std::logic_error, "The host and device views of k_rowPtrs_ have "
1994 "different sizes; rowPtrsUnpacked_host_ has size "
1995 << rowPtrsUnpacked_host_.extent(0)
1996 << ", but rowPtrsUnpacked_dev_ has size "
1997 << rowPtrsUnpacked_dev_.extent(0)
1998 << "." << suffix);
1999 if (isGloballyIndexed() && rowPtrsUnpacked_host_.extent(0) != 0) {
2000 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2001 (size_t(rowPtrsUnpacked_host_.extent(0)) != size_t(lclNumRows + 1),
2002 std::logic_error, "The graph is globally indexed and "
2003 "k_rowPtrs has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2004 << ", but that size does not equal lclNumRows+1 = "
2005 << (lclNumRows+1) << "." << suffix);
2006 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2007 (rowPtrsUnpacked_host_(lclNumRows) != size_t(gblInds_wdv.extent(0)),
2008 std::logic_error, "The graph is globally indexed and "
2009 "k_rowPtrs_ has nonzero size " << rowPtrsUnpacked_host_.extent(0)
2010 << ", but k_rowPtrs_(lclNumRows=" << lclNumRows << ")="
2011 << rowPtrsUnpacked_host_(lclNumRows)
2012 << " != gblInds_wdv.extent(0)="
2013 << gblInds_wdv.extent(0) << "." << suffix);
2014 }
2015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2016 (this->isLocallyIndexed () &&
2017 this->rowPtrsUnpacked_host_.extent (0) != 0 &&
2018 (static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) !=
2019 static_cast<size_t> (lclNumRows + 1) ||
2020 this->rowPtrsUnpacked_host_(lclNumRows) !=
2021 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0))),
2022 std::logic_error, "If k_rowPtrs_ has nonzero size and "
2023 "the graph is locally indexed, then "
2024 "k_rowPtrs_ must have N+1 rows, and "
2025 "k_rowPtrs_(N) must equal lclIndsUnpacked_wdv.extent(0)." << suffix);
2026
2027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2028 (this->indicesAreAllocated () &&
2029 nodeAllocSize > 0 &&
2030 this->lclIndsUnpacked_wdv.extent (0) == 0 &&
2031 this->gblInds_wdv.extent (0) == 0,
2032 std::logic_error, "Graph is allocated nontrivially, but "
2033 "but 1-D allocations are not present." << suffix);
2034
2035 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2036 (! this->indicesAreAllocated () &&
2037 ((this->rowPtrsUnpacked_host_.extent (0) != 0 ||
2038 this->k_numRowEntries_.extent (0) != 0) ||
2039 this->lclIndsUnpacked_wdv.extent (0) != 0 ||
2040 this->gblInds_wdv.extent (0) != 0),
2041 std::logic_error, "If indices are not allocated, "
2042 "then none of the buffers should be." << suffix);
2043 // indices may be local or global only if they are allocated
2044 // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2045 // indicesAreGlobal_)
2046 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2047 ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2048 ! this->indicesAreAllocated_,
2049 std::logic_error, "Indices may be local or global only if they are "
2050 "allocated." << suffix);
2051 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2052 (this->indicesAreLocal_ && this->indicesAreGlobal_,
2053 std::logic_error, "Indices may not be both local and global." << suffix);
2054 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2055 (indicesAreLocal_ && gblInds_wdv.extent (0) != 0,
2056 std::logic_error, "Indices are local, but "
2057 "gblInds_wdv.extent(0) (= " << gblInds_wdv.extent (0)
2058 << ") != 0. In other words, if indices are local, then "
2059 "allocations of global indices should not be present."
2060 << suffix);
2061 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2062 (indicesAreGlobal_ && lclIndsUnpacked_wdv.extent (0) != 0,
2063 std::logic_error, "Indices are global, but "
2064 "lclIndsUnpacked_wdv.extent(0) (= " << lclIndsUnpacked_wdv.extent(0)
2065 << ") != 0. In other words, if indices are global, "
2066 "then allocations for local indices should not be present."
2067 << suffix);
2068 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2069 (indicesAreLocal_ && nodeAllocSize > 0 &&
2070 lclIndsUnpacked_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2071 std::logic_error, "Indices are local and "
2072 "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2073 "lclIndsUnpacked_wdv.extent(0) = 0 and getLocalNumRows() = "
2074 << getLocalNumRows () << " > 0." << suffix);
2075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2076 (indicesAreGlobal_ && nodeAllocSize > 0 &&
2077 gblInds_wdv.extent (0) == 0 && getLocalNumRows () > 0,
2078 std::logic_error, "Indices are global and "
2079 "getLocalAllocationSize() = " << nodeAllocSize << " > 0, but "
2080 "gblInds_wdv.extent(0) = 0 and getLocalNumRows() = "
2081 << getLocalNumRows () << " > 0." << suffix);
2082 // check the actual allocations
2083 if (this->indicesAreAllocated () &&
2084 this->rowPtrsUnpacked_host_.extent (0) != 0) {
2085 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2086 (static_cast<size_t> (this->rowPtrsUnpacked_host_.extent (0)) !=
2087 this->getLocalNumRows () + 1,
2088 std::logic_error, "Indices are allocated and "
2089 "k_rowPtrs_ has nonzero length, but rowPtrsUnpacked_host_.extent(0) = "
2090 << this->rowPtrsUnpacked_host_.extent (0) << " != getLocalNumRows()+1 = "
2091 << (this->getLocalNumRows () + 1) << "." << suffix);
2092 const size_t actualNumAllocated =
2093 this->rowPtrsUnpacked_host_(this->getLocalNumRows());
2094 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2095 (this->isLocallyIndexed () &&
2096 static_cast<size_t> (this->lclIndsUnpacked_wdv.extent (0)) != actualNumAllocated,
2097 std::logic_error, "Graph is locally indexed, indices are "
2098 "are allocated, and k_rowPtrs_ has nonzero length, but "
2099 "lclIndsUnpacked_wdv.extent(0) = " << this->lclIndsUnpacked_wdv.extent (0)
2100 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2101 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2102 (this->isGloballyIndexed () &&
2103 static_cast<size_t> (this->gblInds_wdv.extent (0)) != actualNumAllocated,
2104 std::logic_error, "Graph is globally indexed, indices "
2105 "are allocated, and k_rowPtrs_ has nonzero length, but "
2106 "gblInds_wdv.extent(0) = " << this->gblInds_wdv.extent (0)
2107 << " != actualNumAllocated = " << actualNumAllocated << suffix);
2108 }
2109
2110 if (verbose_) {
2111 std::ostringstream os;
2112 os << *prefix << "Done" << endl;
2113 std::cerr << os.str();
2114 }
2115 }
2116 }
2117
2118
2119 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2120 size_t
2122 getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2123 {
2124 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2125 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2126 return Teuchos::OrdinalTraits<size_t>::invalid ();
2127 }
2128 else {
2129 return rowInfo.numEntries;
2130 }
2131 }
2132
2133
2134 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2135 size_t
2137 getNumEntriesInLocalRow (LocalOrdinal localRow) const
2138 {
2139 const RowInfo rowInfo = this->getRowInfo (localRow);
2140 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2141 return Teuchos::OrdinalTraits<size_t>::invalid ();
2142 }
2143 else {
2144 return rowInfo.numEntries;
2145 }
2146 }
2147
2148
2149 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2150 size_t
2152 getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2153 {
2154 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2155 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2156 return Teuchos::OrdinalTraits<size_t>::invalid ();
2157 }
2158 else {
2159 return rowInfo.allocSize;
2160 }
2161 }
2162
2163
2164 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2165 size_t
2167 getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2168 {
2169 const RowInfo rowInfo = this->getRowInfo (localRow);
2170 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2171 return Teuchos::OrdinalTraits<size_t>::invalid ();
2172 }
2173 else {
2174 return rowInfo.allocSize;
2175 }
2176 }
2177
2178
2179 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2180 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_host_view_type
2182 getLocalRowPtrsHost () const
2183 {
2184 return rowPtrsPacked_host_;
2185 }
2186
2187 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2188 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::row_ptrs_device_view_type
2191 {
2192 return rowPtrsPacked_dev_;
2193 }
2194
2195
2196 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2197 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::local_inds_host_view_type
2199 getLocalIndicesHost () const
2200 {
2201 return lclIndsPacked_wdv.getHostView(Access::ReadOnly);
2202 }
2203
2204 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2209 return lclIndsPacked_wdv.getDeviceView(Access::ReadOnly);
2210 }
2211
2212 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2213 void
2215 getLocalRowCopy (LocalOrdinal localRow,
2216 nonconst_local_inds_host_view_type & indices,
2217 size_t& numEntries) const
2218 {
2219 using Teuchos::ArrayView;
2220 const char tfecfFuncName[] = "getLocalRowCopy: ";
2221
2222 TEUCHOS_TEST_FOR_EXCEPTION(
2223 isGloballyIndexed () && ! hasColMap (), std::runtime_error,
2224 "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
2225 "does not have a column Map yet. That means we don't have local indices "
2226 "for columns yet, so it doesn't make sense to call this method. If the "
2227 "graph doesn't have a column Map yet, you should call fillComplete on "
2228 "it first.");
2229
2230 // This does the right thing (reports an empty row) if the input
2231 // row is invalid.
2232 const RowInfo rowinfo = this->getRowInfo (localRow);
2233 // No side effects on error.
2234 const size_t theNumEntries = rowinfo.numEntries;
2235 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2236 (static_cast<size_t> (indices.size ()) < theNumEntries,std::runtime_error,
2237 "Specified storage (size==" << indices.size () << ") does not suffice "
2238 "to hold all " << theNumEntries << " entry/ies for this row.");
2239 numEntries = theNumEntries;
2240
2241 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2242 if (isLocallyIndexed ()) {
2243 auto lclInds = getLocalIndsViewHost(rowinfo);
2244 for (size_t j = 0; j < theNumEntries; ++j) {
2245 indices[j] = lclInds(j);
2246 }
2247 }
2248 else if (isGloballyIndexed ()) {
2249 auto gblInds = getGlobalIndsViewHost(rowinfo);
2250 for (size_t j = 0; j < theNumEntries; ++j) {
2251 indices[j] = colMap_->getLocalElement (gblInds(j));
2252 }
2253 }
2254 }
2255 }
2256
2257
2258 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2259 void
2261 getGlobalRowCopy (GlobalOrdinal globalRow,
2262 nonconst_global_inds_host_view_type &indices,
2263 size_t& numEntries) const
2264 {
2265 using Teuchos::ArrayView;
2266 const char tfecfFuncName[] = "getGlobalRowCopy: ";
2267
2268 // This does the right thing (reports an empty row) if the input
2269 // row is invalid.
2270 const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
2271 const size_t theNumEntries = rowinfo.numEntries;
2272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2273 static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
2274 "Specified storage (size==" << indices.size () << ") does not suffice "
2275 "to hold all " << theNumEntries << " entry/ies for this row.");
2276 numEntries = theNumEntries; // first side effect
2277
2278 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
2279 if (isLocallyIndexed ()) {
2280 auto lclInds = getLocalIndsViewHost(rowinfo);
2281 for (size_t j = 0; j < theNumEntries; ++j) {
2282 indices[j] = colMap_->getGlobalElement (lclInds(j));
2283 }
2284 }
2285 else if (isGloballyIndexed ()) {
2286 auto gblInds = getGlobalIndsViewHost(rowinfo);
2287 for (size_t j = 0; j < theNumEntries; ++j) {
2288 indices[j] = gblInds(j);
2289 }
2290 }
2291 }
2292 }
2293
2294
2295 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2296 void
2299 const LocalOrdinal localRow,
2300 local_inds_host_view_type &indices) const
2301 {
2302 const char tfecfFuncName[] = "getLocalRowView: ";
2303
2304 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2305 (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
2306 "currently stored as global indices, so we cannot return a view with "
2307 "local column indices, whether or not the graph has a column Map. If "
2308 "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
2309
2310 const RowInfo rowInfo = getRowInfo (localRow);
2311 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2312 rowInfo.numEntries > 0) {
2313 indices = lclIndsUnpacked_wdv.getHostSubview(rowInfo.offset1D,
2314 rowInfo.numEntries,
2315 Access::ReadOnly);
2316 }
2317 else {
2318 // This does the right thing (reports an empty row) if the input
2319 // row is invalid.
2320 indices = local_inds_host_view_type();
2321 }
2322
2323 if (debug_) {
2324 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2325 (static_cast<size_t> (indices.size ()) !=
2326 getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
2327 "= " << indices.extent(0) << " != getNumEntriesInLocalRow(localRow=" <<
2328 localRow << ") = " << getNumEntriesInLocalRow(localRow) <<
2329 ". Please report this bug to the Tpetra developers.");
2330 }
2331 }
2332
2333
2334 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2335 void
2338 const GlobalOrdinal globalRow,
2339 global_inds_host_view_type &indices) const
2340 {
2341 const char tfecfFuncName[] = "getGlobalRowView: ";
2342
2343 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2344 (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
2345 "currently stored as local indices, so we cannot return a view with "
2346 "global column indices. Use getGlobalRowCopy() instead.");
2347
2348 // This does the right thing (reports an empty row) if the input
2349 // row is invalid.
2350 const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
2351 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
2352 rowInfo.numEntries > 0) {
2353 indices = gblInds_wdv.getHostSubview(rowInfo.offset1D,
2354 rowInfo.numEntries,
2355 Access::ReadOnly);
2356 }
2357 else {
2358 indices = typename global_inds_dualv_type::t_host::const_type();
2359 }
2360 if (debug_) {
2361 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2362 (static_cast<size_t> (indices.size ()) !=
2363 getNumEntriesInGlobalRow (globalRow),
2364 std::logic_error, "indices.size() = " << indices.extent(0)
2365 << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
2366 << getNumEntriesInGlobalRow (globalRow)
2367 << ". Please report this bug to the Tpetra developers.");
2368 }
2369 }
2370
2371
2372 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2373 void
2375 insertLocalIndices (const LocalOrdinal localRow,
2376 const Teuchos::ArrayView<const LocalOrdinal>& indices)
2377 {
2378 const char tfecfFuncName[] = "insertLocalIndices: ";
2379
2380 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2381 (! isFillActive (), std::runtime_error, "Fill must be active.");
2382 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2383 (isGloballyIndexed (), std::runtime_error,
2384 "Graph indices are global; use insertGlobalIndices().");
2385 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2386 (! hasColMap (), std::runtime_error,
2387 "Cannot insert local indices without a column Map.");
2388 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2389 (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
2390 "Local row index " << localRow << " is not in the row Map "
2391 "on the calling process.");
2392 if (! indicesAreAllocated ()) {
2393 allocateIndices (LocalIndices, verbose_);
2394 }
2395
2396 if (debug_) {
2397 // In debug mode, if the graph has a column Map, test whether any
2398 // of the given column indices are not in the column Map. Keep
2399 // track of the invalid column indices so we can tell the user
2400 // about them.
2401 if (hasColMap ()) {
2402 using Teuchos::Array;
2403 using Teuchos::toString;
2404 using std::endl;
2405 typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
2406
2407 const map_type& colMap = *colMap_;
2408 Array<LocalOrdinal> badColInds;
2409 bool allInColMap = true;
2410 for (size_type k = 0; k < indices.size (); ++k) {
2411 if (! colMap.isNodeLocalElement (indices[k])) {
2412 allInColMap = false;
2413 badColInds.push_back (indices[k]);
2414 }
2415 }
2416 if (! allInColMap) {
2417 std::ostringstream os;
2418 os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
2419 "entries in owned row " << localRow << ", at the following column "
2420 "indices: " << toString (indices) << "." << endl;
2421 os << "Of those, the following indices are not in the column Map on "
2422 "this process: " << toString (badColInds) << "." << endl << "Since "
2423 "the graph has a column Map already, it is invalid to insert entries "
2424 "at those locations.";
2425 TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
2426 }
2427 }
2428 }
2429
2430 insertLocalIndicesImpl (localRow, indices);
2431
2432 if (debug_) {
2433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2434 (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
2435 "At the end of insertLocalIndices, ! indicesAreAllocated() || "
2436 "! isLocallyIndexed() is true. Please report this bug to the "
2437 "Tpetra developers.");
2438 }
2439 }
2440
2441 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2442 void
2444 insertLocalIndices (const LocalOrdinal localRow,
2445 const LocalOrdinal numEnt,
2446 const LocalOrdinal inds[])
2447 {
2448 Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
2449 this->insertLocalIndices (localRow, indsT);
2450 }
2451
2452
2453 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2454 void
2456 insertGlobalIndices (const GlobalOrdinal gblRow,
2457 const LocalOrdinal numInputInds,
2458 const GlobalOrdinal inputGblColInds[])
2459 {
2460 typedef LocalOrdinal LO;
2461 const char tfecfFuncName[] = "insertGlobalIndices: ";
2462
2463 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2464 (this->isLocallyIndexed (), std::runtime_error,
2465 "graph indices are local; use insertLocalIndices().");
2466 // This can't really be satisfied for now, because if we are
2467 // fillComplete(), then we are local. In the future, this may
2468 // change. However, the rule that modification require active
2469 // fill will not change.
2470 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2471 (! this->isFillActive (), std::runtime_error,
2472 "You are not allowed to call this method if fill is not active. "
2473 "If fillComplete has been called, you must first call resumeFill "
2474 "before you may insert indices.");
2475 if (! indicesAreAllocated ()) {
2476 allocateIndices (GlobalIndices, verbose_);
2477 }
2478 const LO lclRow = this->rowMap_->getLocalElement (gblRow);
2479 if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2480 if (debug_) {
2481 if (this->hasColMap ()) {
2482 using std::endl;
2483 const map_type& colMap = * (this->colMap_);
2484 // In a debug build, keep track of the nonowned ("bad") column
2485 // indices, so that we can display them in the exception
2486 // message. In a release build, just ditch the loop early if
2487 // we encounter a nonowned column index.
2488 std::vector<GlobalOrdinal> badColInds;
2489 bool allInColMap = true;
2490 for (LO k = 0; k < numInputInds; ++k) {
2491 if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
2492 allInColMap = false;
2493 badColInds.push_back (inputGblColInds[k]);
2494 }
2495 }
2496 if (! allInColMap) {
2497 std::ostringstream os;
2498 os << "You attempted to insert entries in owned row " << gblRow
2499 << ", at the following column indices: [";
2500 for (LO k = 0; k < numInputInds; ++k) {
2501 os << inputGblColInds[k];
2502 if (k + static_cast<LO> (1) < numInputInds) {
2503 os << ",";
2504 }
2505 }
2506 os << "]." << endl << "Of those, the following indices are not in "
2507 "the column Map on this process: [";
2508 for (size_t k = 0; k < badColInds.size (); ++k) {
2509 os << badColInds[k];
2510 if (k + size_t (1) < badColInds.size ()) {
2511 os << ",";
2512 }
2513 }
2514 os << "]." << endl << "Since the matrix has a column Map already, "
2515 "it is invalid to insert entries at those locations.";
2516 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2517 (true, std::invalid_argument, os.str ());
2518 }
2519 }
2520 } // debug_
2521 this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
2522 }
2523 else { // a nonlocal row
2524 this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
2525 numInputInds);
2526 }
2527 }
2528
2529
2530 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2531 void
2533 insertGlobalIndices (const GlobalOrdinal gblRow,
2534 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
2535 {
2536 this->insertGlobalIndices (gblRow, inputGblColInds.size (),
2537 inputGblColInds.getRawPtr ());
2538 }
2539
2540
2541 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2542 void
2544 insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
2545 const GlobalOrdinal gblColInds[],
2546 const LocalOrdinal numGblColInds)
2547 {
2548 typedef LocalOrdinal LO;
2549 typedef GlobalOrdinal GO;
2550 const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
2551
2552 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2553 (this->isLocallyIndexed (), std::runtime_error,
2554 "Graph indices are local; use insertLocalIndices().");
2555 // This can't really be satisfied for now, because if we are
2556 // fillComplete(), then we are local. In the future, this may
2557 // change. However, the rule that modification require active
2558 // fill will not change.
2559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2560 (! this->isFillActive (), std::runtime_error,
2561 "You are not allowed to call this method if fill is not active. "
2562 "If fillComplete has been called, you must first call resumeFill "
2563 "before you may insert indices.");
2564 if (! indicesAreAllocated ()) {
2565 allocateIndices (GlobalIndices, verbose_);
2566 }
2567
2568 Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
2569 // If we have a column Map, use it to filter the entries.
2570 if (! colMap_.is_null ()) {
2571 const map_type& colMap = * (this->colMap_);
2572
2573 LO curOffset = 0;
2574 while (curOffset < numGblColInds) {
2575 // Find a sequence of input indices that are in the column Map
2576 // on the calling process. Doing a sequence at a time,
2577 // instead of one at a time, amortizes some overhead.
2578 LO endOffset = curOffset;
2579 for ( ; endOffset < numGblColInds; ++endOffset) {
2580 const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
2581 if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
2582 break; // first entry, in current sequence, not in the column Map
2583 }
2584 }
2585 // curOffset, endOffset: half-exclusive range of indices in
2586 // the column Map on the calling process. If endOffset ==
2587 // curOffset, the range is empty.
2588 const LO numIndInSeq = (endOffset - curOffset);
2589 if (numIndInSeq != 0) {
2590 this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
2591 numIndInSeq);
2592 }
2593 // Invariant before this line: Either endOffset ==
2594 // numGblColInds, or gblColInds[endOffset] is not in the
2595 // column Map on the calling process.
2596 curOffset = endOffset + 1;
2597 }
2598 }
2599 else {
2600 this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
2601 gblColInds_av.size ());
2602 }
2603 }
2604
2605 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2606 void
2608 insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
2609 const GlobalOrdinal gblColInds[],
2610 const LocalOrdinal numGblColInds)
2611 {
2612 // This creates the std::vector if it doesn't exist yet.
2613 // std::map's operator[] does a lookup each time, so it's better
2614 // to pull nonlocals_[grow] out of the loop.
2615 std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
2616 for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
2617 // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
2618 // order to avoid duplicates. globalAssemble() sorts these
2619 // anyway.
2620 nonlocalRow.push_back (gblColInds[k]);
2621 }
2622 }
2623
2624 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2625 void
2627 removeLocalIndices (LocalOrdinal lrow)
2628 {
2629 const char tfecfFuncName[] = "removeLocalIndices: ";
2630 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2631 ! isFillActive (), std::runtime_error, "requires that fill is active.");
2632 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2633 isStorageOptimized (), std::runtime_error,
2634 "cannot remove indices after optimizeStorage() has been called.");
2635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2636 isGloballyIndexed (), std::runtime_error, "graph indices are global.");
2637 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2638 ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
2639 "Local row " << lrow << " is not in the row Map on the calling process.");
2640 if (! indicesAreAllocated ()) {
2641 allocateIndices (LocalIndices, verbose_);
2642 }
2643
2644 if (k_numRowEntries_.extent (0) != 0) {
2645 this->k_numRowEntries_(lrow) = 0;
2646 }
2647
2648 if (debug_) {
2649 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2650 (getNumEntriesInLocalRow (lrow) != 0 ||
2651 ! indicesAreAllocated () ||
2652 ! isLocallyIndexed (), std::logic_error,
2653 "Violated stated post-conditions. Please contact Tpetra team.");
2654 }
2655 }
2656
2657
2658 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2659 void
2661 setAllIndices (const typename local_graph_device_type::row_map_type& rowPointers,
2662 const typename local_graph_device_type::entries_type::non_const_type& columnIndices)
2663 {
2664 using ProfilingRegion=Details::ProfilingRegion;
2665 ProfilingRegion region ("Tpetra::CrsGraph::setAllIndices");
2666 const char tfecfFuncName[] = "setAllIndices: ";
2667 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2668 ! hasColMap () || getColMap ().is_null (), std::runtime_error,
2669 "The graph must have a column Map before you may call this method.");
2670 LocalOrdinal numLocalRows = this->getLocalNumRows ();
2671 {
2672 LocalOrdinal rowPtrLen = rowPointers.size();
2673 if(numLocalRows == 0) {
2674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2675 rowPtrLen != 0 && rowPtrLen != 1,
2676 std::runtime_error, "Have 0 local rows, but rowPointers.size() is neither 0 nor 1.");
2677 }
2678 else {
2679 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2680 rowPtrLen != numLocalRows + 1,
2681 std::runtime_error, "rowPointers.size() = " << rowPtrLen <<
2682 " != this->getLocalNumRows()+1 = " << (numLocalRows + 1) << ".");
2683 }
2684 }
2685
2686 if(debug_) {
2687 using exec_space = typename local_graph_device_type::execution_space;
2688 int columnsOutOfBounds = 0;
2689 local_ordinal_type numLocalCols = this->getLocalNumCols();
2690 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, columnIndices.extent(0)),
2691 KOKKOS_LAMBDA (const LocalOrdinal i, int& lOutOfBounds)
2692 {
2693 if(columnIndices(i) < 0 || columnIndices(i) >= numLocalCols)
2694 lOutOfBounds++;
2695 }, columnsOutOfBounds);
2696 int globalColsOutOfBounds= 0;
2697 auto comm = this->getComm();
2698 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, columnsOutOfBounds,
2699 Teuchos::outArg (globalColsOutOfBounds));
2700 if (globalColsOutOfBounds)
2701 {
2702 std::string message;
2703 if (columnsOutOfBounds)
2704 {
2705 //Only print message from ranks with the problem
2706 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not all within range [0, getLocalNumCols())!\n";
2707 }
2708 Details::gathervPrint(std::cout, message, *comm);
2709 throw std::invalid_argument("CrsGraph::setAllIndices(): columnIndices are out of the valid range on at least one process.");
2710 }
2711 }
2712
2713 if (debug_ && this->isSorted()) {
2714 // Verify that the local indices are actually sorted
2715 int notSorted = 0;
2716 using exec_space = typename local_graph_device_type::execution_space;
2717 using size_type = typename local_graph_device_type::size_type;
2718 Kokkos::parallel_reduce(Kokkos::RangePolicy<exec_space>(0, numLocalRows),
2719 KOKKOS_LAMBDA (const LocalOrdinal i, int& lNotSorted)
2720 {
2721 size_type rowBegin = rowPointers(i);
2722 size_type rowEnd = rowPointers(i + 1);
2723 for(size_type j = rowBegin + 1; j < rowEnd; j++)
2724 {
2725 if(columnIndices(j - 1) > columnIndices(j))
2726 {
2727 lNotSorted = 1;
2728 }
2729 }
2730 }, notSorted);
2731 //All-reduce notSorted to avoid rank divergence
2732 int globalNotSorted = 0;
2733 auto comm = this->getComm();
2734 Teuchos::reduceAll<int, int> (*comm, Teuchos::REDUCE_MAX, notSorted,
2735 Teuchos::outArg (globalNotSorted));
2736 if (globalNotSorted)
2737 {
2738 std::string message;
2739 if (notSorted)
2740 {
2741 //Only print message from ranks with the problem
2742 message = std::string("ERROR, rank ") + std::to_string(comm->getRank()) + ", CrsGraph::setAllIndices(): provided columnIndices are not sorted!\n";
2743 }
2744 Details::gathervPrint(std::cout, message, *comm);
2745 throw std::invalid_argument("CrsGraph::setAllIndices(): provided columnIndices are not sorted within rows on at least one process.");
2746 }
2747 }
2748
2749 indicesAreAllocated_ = true;
2750 indicesAreLocal_ = true;
2751 indicesAreSorted_ = true;
2752 noRedundancies_ = true;
2753 lclIndsPacked_wdv= local_inds_wdv_type(columnIndices);
2754 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
2755 setRowPtrs(rowPointers);
2756
2757 set_need_sync_host_uvm_access(); // columnIndices and rowPointers potentially still in a kernel
2758
2759 // Storage MUST be packed, since the interface doesn't give any
2760 // way to indicate any extra space at the end of each row.
2761 storageStatus_ = Details::STORAGE_1D_PACKED;
2762
2763 // These normally get cleared out at the end of allocateIndices.
2764 // It makes sense to clear them out here, because at the end of
2765 // this method, the graph is allocated on the calling process.
2766 numAllocForAllRows_ = 0;
2767 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
2768
2769 checkInternalState ();
2770 }
2771
2772
2773 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2774 void
2776 setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
2777 const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
2778 {
2779 using Kokkos::View;
2780 typedef typename local_graph_device_type::row_map_type row_map_type;
2781 typedef typename row_map_type::array_layout layout_type;
2782 typedef typename row_map_type::non_const_value_type row_offset_type;
2783 typedef View<size_t*, layout_type , Kokkos::HostSpace,
2784 Kokkos::MemoryUnmanaged> input_view_type;
2785 typedef typename row_map_type::non_const_type nc_row_map_type;
2786
2787 const size_t size = static_cast<size_t> (rowPointers.size ());
2788 constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2789 input_view_type ptr_in (rowPointers.getRawPtr (), size);
2790
2791 nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
2792
2793 // FIXME get rid of the else-clause when the minimum CXX standard required is bumped to C++17
2794#ifdef KOKKOS_ENABLE_CXX17
2795 if constexpr (same) { // size_t == row_offset_type
2796 using lexecution_space = typename device_type::execution_space;
2797 Kokkos::deep_copy (lexecution_space(),
2798 ptr_rot,
2799 ptr_in);
2800 }
2801#else
2802 if (same) { // size_t == row_offset_type
2803 // This compile-time logic ensures that the compiler never sees
2804 // an assignment of View<row_offset_type*, ...> to View<size_t*,
2805 // ...> unless size_t == row_offset_type.
2806 input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
2807 // DEEP_COPY REVIEW - HOST-TO-DEVICE
2808 Kokkos::deep_copy (execution_space(),
2809 Kokkos::Impl::if_c<same,
2810 nc_row_map_type,
2811 input_view_type>::select (ptr_rot, ptr_decoy),
2812 ptr_in);
2813 }
2814#endif
2815 else { // size_t != row_offset_type
2816 // CudaUvmSpace != HostSpace, so this will be false in that case.
2817 constexpr bool inHostMemory =
2818 std::is_same<typename row_map_type::memory_space,
2819 Kokkos::HostSpace>::value;
2820 if (inHostMemory) {
2821 // Copy (with cast from size_t to row_offset_type, with bounds
2822 // checking if necessary) to ptr_rot.
2823 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
2824 }
2825 else { // Copy input row offsets to device first.
2826 //
2827 // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
2828 // execution space would avoid the double copy.
2829 //
2830 View<size_t*, layout_type, device_type> ptr_st ("Tpetra::CrsGraph::ptr", size);
2831
2832 // DEEP_COPY REVIEW - NOT TESTED
2833 Kokkos::deep_copy (ptr_st, ptr_in);
2834 // Copy on device (casting from size_t to row_offset_type,
2835 // with bounds checking if necessary) to ptr_rot. This
2836 // executes in the output View's execution space, which is the
2837 // same as execution_space.
2838 ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
2839 }
2840 }
2841
2842 Kokkos::View<LocalOrdinal*, layout_type, device_type> k_ind =
2843 Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
2844 setAllIndices (ptr_rot, k_ind);
2845 }
2846
2847
2848 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2849 void
2852 {
2853 using Teuchos::Comm;
2854 using Teuchos::outArg;
2855 using Teuchos::RCP;
2856 using Teuchos::rcp;
2857 using Teuchos::REDUCE_MAX;
2858 using Teuchos::REDUCE_MIN;
2859 using Teuchos::reduceAll;
2860 using std::endl;
2861 using crs_graph_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
2862 using LO = local_ordinal_type;
2863 using GO = global_ordinal_type;
2864 using size_type = typename Teuchos::Array<GO>::size_type;
2865 const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
2866
2867 std::unique_ptr<std::string> prefix;
2868 if (verbose_) {
2869 prefix = this->createPrefix("CrsGraph", "globalAssemble");
2870 std::ostringstream os;
2871 os << *prefix << "Start" << endl;
2872 std::cerr << os.str();
2873 }
2874 RCP<const Comm<int> > comm = getComm ();
2875
2876 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2877 (! isFillActive (), std::runtime_error, "Fill must be active before "
2878 "you may call this method.");
2879
2880 const size_t myNumNonlocalRows = this->nonlocals_.size ();
2881
2882 // If no processes have nonlocal rows, then we don't have to do
2883 // anything. Checking this is probably cheaper than constructing
2884 // the Map of nonlocal rows (see below) and noticing that it has
2885 // zero global entries.
2886 {
2887 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
2888 int someoneHasNonlocalRows = 0;
2889 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
2890 outArg (someoneHasNonlocalRows));
2891 if (someoneHasNonlocalRows == 0) {
2892 if (verbose_) {
2893 std::ostringstream os;
2894 os << *prefix << "Done: No nonlocal rows" << endl;
2895 std::cerr << os.str();
2896 }
2897 return;
2898 }
2899 else if (verbose_) {
2900 std::ostringstream os;
2901 os << *prefix << "At least 1 process has nonlocal rows"
2902 << endl;
2903 std::cerr << os.str();
2904 }
2905 }
2906
2907 // 1. Create a list of the "nonlocal" rows on each process. this
2908 // requires iterating over nonlocals_, so while we do this,
2909 // deduplicate the entries and get a count for each nonlocal
2910 // row on this process.
2911 // 2. Construct a new row Map corresponding to those rows. This
2912 // Map is likely overlapping. We know that the Map is not
2913 // empty on all processes, because the above all-reduce and
2914 // return exclude that case.
2915
2916 RCP<const map_type> nonlocalRowMap;
2917 // Keep this for CrsGraph's constructor.
2918 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
2919 {
2920 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
2921 size_type curPos = 0;
2922 for (auto mapIter = this->nonlocals_.begin ();
2923 mapIter != this->nonlocals_.end ();
2924 ++mapIter, ++curPos) {
2925 myNonlocalGblRows[curPos] = mapIter->first;
2926 std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
2927 std::sort (gblCols.begin (), gblCols.end ());
2928 auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
2929 gblCols.erase (vecLast, gblCols.end ());
2930 numEntPerNonlocalRow[curPos] = gblCols.size ();
2931 }
2932
2933 // Currently, Map requires that its indexBase be the global min
2934 // of all its global indices. Map won't compute this for us, so
2935 // we must do it. If our process has no nonlocal rows, set the
2936 // "min" to the max possible GO value. This ensures that if
2937 // some process has at least one nonlocal row, then it will pick
2938 // that up as the min. We know that at least one process has a
2939 // nonlocal row, since the all-reduce and return at the top of
2940 // this method excluded that case.
2941 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
2942 {
2943 auto iter = std::min_element (myNonlocalGblRows.begin (),
2944 myNonlocalGblRows.end ());
2945 if (iter != myNonlocalGblRows.end ()) {
2946 myMinNonlocalGblRow = *iter;
2947 }
2948 }
2949 GO gblMinNonlocalGblRow = 0;
2950 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
2951 outArg (gblMinNonlocalGblRow));
2952 const GO indexBase = gblMinNonlocalGblRow;
2953 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2954 nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
2955 }
2956
2957 if (verbose_) {
2958 std::ostringstream os;
2959 os << *prefix << "nonlocalRowMap->getIndexBase()="
2960 << nonlocalRowMap->getIndexBase() << endl;
2961 std::cerr << os.str();
2962 }
2963
2964 // 3. Use the column indices for each nonlocal row, as stored in
2965 // nonlocals_, to construct a CrsGraph corresponding to
2966 // nonlocal rows. We need, but we have, exact counts of the
2967 // number of entries in each nonlocal row.
2968
2969 RCP<crs_graph_type> nonlocalGraph =
2970 rcp(new crs_graph_type(nonlocalRowMap, numEntPerNonlocalRow()));
2971 {
2972 size_type curPos = 0;
2973 for (auto mapIter = this->nonlocals_.begin ();
2974 mapIter != this->nonlocals_.end ();
2975 ++mapIter, ++curPos) {
2976 const GO gblRow = mapIter->first;
2977 std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
2978 const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
2979 nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
2980 }
2981 }
2982 if (verbose_) {
2983 std::ostringstream os;
2984 os << *prefix << "Built nonlocal graph" << endl;
2985 std::cerr << os.str();
2986 }
2987 // There's no need to fill-complete the nonlocals graph.
2988 // We just use it as a temporary container for the Export.
2989
2990 // 4. If the original row Map is one to one, then we can Export
2991 // directly from nonlocalGraph into this. Otherwise, we have
2992 // to create a temporary graph with a one-to-one row Map,
2993 // Export into that, then Import from the temporary graph into
2994 // *this.
2995
2996 auto origRowMap = this->getRowMap ();
2997 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
2998
2999 if (origRowMapIsOneToOne) {
3000 if (verbose_) {
3001 std::ostringstream os;
3002 os << *prefix << "Original row Map is 1-to-1" << endl;
3003 std::cerr << os.str();
3004 }
3005 export_type exportToOrig (nonlocalRowMap, origRowMap);
3006 this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3007 // We're done at this point!
3008 }
3009 else {
3010 if (verbose_) {
3011 std::ostringstream os;
3012 os << *prefix << "Original row Map is NOT 1-to-1" << endl;
3013 std::cerr << os.str();
3014 }
3015 // If you ask a Map whether it is one to one, it does some
3016 // communication and stashes intermediate results for later use
3017 // by createOneToOne. Thus, calling createOneToOne doesn't cost
3018 // much more then the original cost of calling isOneToOne.
3019 auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3020 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3021
3022 // Create a temporary graph with the one-to-one row Map.
3023 //
3024 // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3025 // row, to avoid reallocation during the Export operation.
3026 crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3027
3028 // Export from graph of nonlocals into the temp one-to-one graph.
3029 if (verbose_) {
3030 std::ostringstream os;
3031 os << *prefix << "Export nonlocal graph" << endl;
3032 std::cerr << os.str();
3033 }
3034 oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3035
3036 // We don't need the graph of nonlocals anymore, so get rid of
3037 // it, to keep the memory high-water mark down.
3038 nonlocalGraph = Teuchos::null;
3039
3040 // Import from the one-to-one graph to the original graph.
3041 import_type importToOrig (oneToOneRowMap, origRowMap);
3042 if (verbose_) {
3043 std::ostringstream os;
3044 os << *prefix << "Import nonlocal graph" << endl;
3045 std::cerr << os.str();
3046 }
3047 this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3048 }
3049
3050 // It's safe now to clear out nonlocals_, since we've already
3051 // committed side effects to *this. The standard idiom for
3052 // clearing a Container like std::map, is to swap it with an empty
3053 // Container and let the swapped Container fall out of scope.
3054 decltype (this->nonlocals_) newNonlocals;
3055 std::swap (this->nonlocals_, newNonlocals);
3056
3057 checkInternalState ();
3058 if (verbose_) {
3059 std::ostringstream os;
3060 os << *prefix << "Done" << endl;
3061 std::cerr << os.str();
3062 }
3063 }
3064
3065
3066 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3067 void
3069 resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3070 {
3071 clearGlobalConstants();
3072 if (params != Teuchos::null) this->setParameterList (params);
3073 // either still sorted/merged or initially sorted/merged
3074 indicesAreSorted_ = true;
3075 noRedundancies_ = true;
3076 fillComplete_ = false;
3077 }
3078
3079
3080 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3081 void
3083 fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3084 {
3085 // If the graph already has domain and range Maps, don't clobber
3086 // them. If it doesn't, use the current row Map for both the
3087 // domain and range Maps.
3088 //
3089 // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3090 // column Map, and column indices are inserted which are not in
3091 // the row Map on any process, this will cause troubles. However,
3092 // that is not a common case for most applications that we
3093 // encounter, and checking for it might require more
3094 // communication.
3095 Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3096 if (domMap.is_null ()) {
3097 domMap = this->getRowMap ();
3098 }
3099 Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3100 if (ranMap.is_null ()) {
3101 ranMap = this->getRowMap ();
3102 }
3103 this->fillComplete (domMap, ranMap, params);
3104 }
3105
3106
3107 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3108 void
3110 fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3111 const Teuchos::RCP<const map_type>& rangeMap,
3112 const Teuchos::RCP<Teuchos::ParameterList>& params)
3113 {
3114 using std::endl;
3115 const char tfecfFuncName[] = "fillComplete: ";
3116 const bool verbose = verbose_;
3117
3118 std::unique_ptr<std::string> prefix;
3119 if (verbose) {
3120 prefix = this->createPrefix("CrsGraph", "fillComplete");
3121 std::ostringstream os;
3122 os << *prefix << "Start" << endl;
3123 std::cerr << os.str();
3124 }
3125
3126 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3127 (! isFillActive () || isFillComplete (), std::runtime_error,
3128 "Graph fill state must be active (isFillActive() "
3129 "must be true) before calling fillComplete().");
3130
3131 const int numProcs = getComm ()->getSize ();
3132
3133 //
3134 // Read and set parameters
3135 //
3136
3137 // Does the caller want to sort remote GIDs (within those owned by
3138 // the same process) in makeColMap()?
3139 if (! params.is_null ()) {
3140 if (params->isParameter ("sort column map ghost gids")) {
3141 sortGhostsAssociatedWithEachProcessor_ =
3142 params->get<bool> ("sort column map ghost gids",
3143 sortGhostsAssociatedWithEachProcessor_);
3144 }
3145 else if (params->isParameter ("Sort column Map ghost GIDs")) {
3146 sortGhostsAssociatedWithEachProcessor_ =
3147 params->get<bool> ("Sort column Map ghost GIDs",
3148 sortGhostsAssociatedWithEachProcessor_);
3149 }
3150 }
3151
3152 // If true, the caller promises that no process did nonlocal
3153 // changes since the last call to fillComplete.
3154 bool assertNoNonlocalInserts = false;
3155 if (! params.is_null ()) {
3156 assertNoNonlocalInserts =
3157 params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3158 }
3159
3160 //
3161 // Allocate indices, if they haven't already been allocated
3162 //
3163 if (! indicesAreAllocated ()) {
3164 if (hasColMap ()) {
3165 // We have a column Map, so use local indices.
3166 allocateIndices (LocalIndices, verbose);
3167 } else {
3168 // We don't have a column Map, so use global indices.
3169 allocateIndices (GlobalIndices, verbose);
3170 }
3171 }
3172
3173 //
3174 // Do global assembly, if requested and if the communicator
3175 // contains more than one process.
3176 //
3177 const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3178 if (mayNeedGlobalAssemble) {
3179 // This first checks if we need to do global assembly.
3180 // The check costs a single all-reduce.
3181 globalAssemble ();
3182 }
3183 else {
3184 const size_t numNonlocals = nonlocals_.size();
3185 if (verbose) {
3186 std::ostringstream os;
3187 os << *prefix << "Do not need to call globalAssemble; "
3188 "assertNoNonlocalInserts="
3189 << (assertNoNonlocalInserts ? "true" : "false")
3190 << "numProcs=" << numProcs
3191 << ", nonlocals_.size()=" << numNonlocals << endl;
3192 std::cerr << os.str();
3193 }
3194 const int lclNeededGlobalAssemble =
3195 (numProcs > 1 && numNonlocals != 0) ? 1 : 0;
3196 if (lclNeededGlobalAssemble != 0 && verbose) {
3197 std::ostringstream os;
3198 os << *prefix;
3199 Details::Impl::verbosePrintMap(
3200 os, nonlocals_.begin(), nonlocals_.end(),
3201 nonlocals_.size(), "nonlocals_");
3202 std::cerr << os.str() << endl;
3203 }
3204
3205 if (debug_) {
3206 auto map = this->getMap();
3207 auto comm = map.is_null() ? Teuchos::null : map->getComm();
3208 int gblNeededGlobalAssemble = lclNeededGlobalAssemble;
3209 if (! comm.is_null()) {
3210 using Teuchos::REDUCE_MAX;
3211 using Teuchos::reduceAll;
3212 reduceAll(*comm, REDUCE_MAX, lclNeededGlobalAssemble,
3213 Teuchos::outArg(gblNeededGlobalAssemble));
3214 }
3215 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3216 (gblNeededGlobalAssemble != 0, std::runtime_error,
3217 "nonlocals_.size()=" << numNonlocals << " != 0 on at "
3218 "least one process in the CrsGraph's communicator. This "
3219 "means either that you incorrectly set the "
3220 "\"No Nonlocal Changes\" fillComplete parameter to true, "
3221 "or that you inserted invalid entries. "
3222 "Rerun with the environment variable TPETRA_VERBOSE="
3223 "CrsGraph set to see the entries of nonlocals_ on every "
3224 "MPI process (WARNING: lots of output).");
3225 }
3226 else {
3227 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3228 (lclNeededGlobalAssemble != 0, std::runtime_error,
3229 "nonlocals_.size()=" << numNonlocals << " != 0 on the "
3230 "calling process. This means either that you incorrectly "
3231 "set the \"No Nonlocal Changes\" fillComplete parameter "
3232 "to true, or that you inserted invalid entries. "
3233 "Rerun with the environment "
3234 "variable TPETRA_VERBOSE=CrsGraph set to see the entries "
3235 "of nonlocals_ on every MPI process (WARNING: lots of "
3236 "output).");
3237 }
3238 }
3239
3240 // Set domain and range Map. This may clear the Import / Export
3241 // objects if the new Maps differ from any old ones.
3242 setDomainRangeMaps (domainMap, rangeMap);
3243
3244 // If the graph does not already have a column Map (either from
3245 // the user constructor calling the version of the constructor
3246 // that takes a column Map, or from a previous fillComplete call),
3247 // then create it.
3248 Teuchos::Array<int> remotePIDs (0);
3249 const bool mustBuildColMap = ! this->hasColMap ();
3250 if (mustBuildColMap) {
3251 this->makeColMap (remotePIDs); // resized on output
3252 }
3253
3254 // Make indices local, if they aren't already.
3255 // The method doesn't do any work if the indices are already local.
3256 const std::pair<size_t, std::string> makeIndicesLocalResult =
3257 this->makeIndicesLocal(verbose);
3258
3259 if (debug_) {
3261 using Teuchos::RCP;
3262 using Teuchos::REDUCE_MIN;
3263 using Teuchos::reduceAll;
3264 using Teuchos::outArg;
3265
3266 RCP<const map_type> map = this->getMap ();
3267 RCP<const Teuchos::Comm<int> > comm;
3268 if (! map.is_null ()) {
3269 comm = map->getComm ();
3270 }
3271 if (comm.is_null ()) {
3272 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3273 (makeIndicesLocalResult.first != 0, std::runtime_error,
3274 makeIndicesLocalResult.second);
3275 }
3276 else {
3277 const int lclSuccess = (makeIndicesLocalResult.first == 0);
3278 int gblSuccess = 0; // output argument
3279 reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3280 if (gblSuccess != 1) {
3281 std::ostringstream os;
3282 gathervPrint (os, makeIndicesLocalResult.second, *comm);
3283 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3284 (true, std::runtime_error, os.str ());
3285 }
3286 }
3287 }
3288 else {
3289 // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
3290 // the error state to makeImportExport or
3291 // computeGlobalConstants, which may do all-reduces and thus may
3292 // have the opportunity to communicate that error state.
3293 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3294 (makeIndicesLocalResult.first != 0, std::runtime_error,
3295 makeIndicesLocalResult.second);
3296 }
3297
3298 // If this process has no indices, then CrsGraph considers it
3299 // already trivially sorted and merged. Thus, this method need
3300 // not be called on all processes in the row Map's communicator.
3301 this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
3302
3303 // Make Import and Export objects, if they haven't been made
3304 // already. If we made a column Map above, reuse information from
3305 // that process to avoid communiation in the Import setup.
3306 this->makeImportExport (remotePIDs, mustBuildColMap);
3307
3308 // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
3309 this->fillLocalGraph (params);
3310
3311 const bool callComputeGlobalConstants = params.get () == nullptr ||
3312 params->get ("compute global constants", true);
3313 if (callComputeGlobalConstants) {
3314 this->computeGlobalConstants ();
3315 }
3316 else {
3317 this->computeLocalConstants ();
3318 }
3319 this->fillComplete_ = true;
3320 this->checkInternalState ();
3321
3322 if (verbose) {
3323 std::ostringstream os;
3324 os << *prefix << "Done" << endl;
3325 std::cerr << os.str();
3326 }
3327 }
3328
3329
3330 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3331 void
3333 expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
3334 const Teuchos::RCP<const map_type>& rangeMap,
3335 const Teuchos::RCP<const import_type>& importer,
3336 const Teuchos::RCP<const export_type>& exporter,
3337 const Teuchos::RCP<Teuchos::ParameterList>& params)
3338 {
3339 const char tfecfFuncName[] = "expertStaticFillComplete: ";
3340#ifdef HAVE_TPETRA_MMM_TIMINGS
3341 std::string label;
3342 if(!params.is_null())
3343 label = params->get("Timer Label",label);
3344 std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
3345 using Teuchos::TimeMonitor;
3346 Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
3347#endif
3348
3349
3350 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3351 domainMap.is_null () || rangeMap.is_null (),
3352 std::runtime_error, "The input domain Map and range Map must be nonnull.");
3353 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3354 isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
3355 "call this method unless the graph has a column Map.");
3356 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3357 getLocalNumRows () > 0 && rowPtrsUnpacked_host_.extent (0) == 0,
3358 std::runtime_error, "The calling process has getLocalNumRows() = "
3359 << getLocalNumRows () << " > 0 rows, but the row offsets array has not "
3360 "been set.");
3361 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3362 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0)) != getLocalNumRows () + 1,
3363 std::runtime_error, "The row offsets array has length " <<
3364 rowPtrsUnpacked_host_.extent (0) << " != getLocalNumRows()+1 = " <<
3365 (getLocalNumRows () + 1) << ".");
3366
3367 // Note: We don't need to do the following things which are normally done in fillComplete:
3368 // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
3369
3370 // Constants from allocateIndices
3371 //
3372 // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
3373 // away once the graph is allocated. expertStaticFillComplete
3374 // either presumes that the graph is allocated, or "allocates" it.
3375 //
3376 // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
3377 // version of CrsGraph is to allocate in the constructor, not
3378 // lazily on first insert. That will make both
3379 // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
3380 numAllocForAllRows_ = 0;
3381 k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3382 indicesAreAllocated_ = true;
3383
3384 // Constants from makeIndicesLocal
3385 //
3386 // The graph has a column Map, so its indices had better be local.
3387 indicesAreLocal_ = true;
3388 indicesAreGlobal_ = false;
3389
3390 // set domain/range map: may clear the import/export objects
3391#ifdef HAVE_TPETRA_MMM_TIMINGS
3392 MM = Teuchos::null;
3393 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
3394#endif
3395 setDomainRangeMaps (domainMap, rangeMap);
3396
3397 // Presume the user sorted and merged the arrays first
3398 indicesAreSorted_ = true;
3399 noRedundancies_ = true;
3400
3401 // makeImportExport won't create a new importer/exporter if I set one here first.
3402#ifdef HAVE_TPETRA_MMM_TIMINGS
3403 MM = Teuchos::null;
3404 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
3405#endif
3406
3407 importer_ = Teuchos::null;
3408 exporter_ = Teuchos::null;
3409 if (importer != Teuchos::null) {
3410 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3411 ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
3412 ! importer->getTargetMap ()->isSameAs (*getColMap ()),
3413 std::invalid_argument,": importer does not match matrix maps.");
3414 importer_ = importer;
3415
3416 }
3417
3418#ifdef HAVE_TPETRA_MMM_TIMINGS
3419 MM = Teuchos::null;
3420 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
3421#endif
3422
3423 if (exporter != Teuchos::null) {
3424 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3425 ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
3426 ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
3427 std::invalid_argument,": exporter does not match matrix maps.");
3428 exporter_ = exporter;
3429 }
3430
3431#ifdef HAVE_TPETRA_MMM_TIMINGS
3432 MM = Teuchos::null;
3433 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
3434#endif
3435 Teuchos::Array<int> remotePIDs (0); // unused output argument
3436 this->makeImportExport (remotePIDs, false);
3437
3438#ifdef HAVE_TPETRA_MMM_TIMINGS
3439 MM = Teuchos::null;
3440 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
3441#endif
3442 this->fillLocalGraph (params);
3443
3444 const bool callComputeGlobalConstants = params.get () == nullptr ||
3445 params->get ("compute global constants", true);
3446
3447 if (callComputeGlobalConstants) {
3448#ifdef HAVE_TPETRA_MMM_TIMINGS
3449 MM = Teuchos::null;
3450 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
3451#endif // HAVE_TPETRA_MMM_TIMINGS
3452 this->computeGlobalConstants ();
3453 }
3454 else {
3455#ifdef HAVE_TPETRA_MMM_TIMINGS
3456 MM = Teuchos::null;
3457 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
3458#endif // HAVE_TPETRA_MMM_TIMINGS
3459 this->computeLocalConstants ();
3460 }
3461
3462 fillComplete_ = true;
3463
3464#ifdef HAVE_TPETRA_MMM_TIMINGS
3465 MM = Teuchos::null;
3466 MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
3467#endif
3468 checkInternalState ();
3469 }
3470
3471
3472 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3473 void
3475 fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
3476 {
3477 using ::Tpetra::Details::computeOffsetsFromCounts;
3478 typedef decltype (k_numRowEntries_) row_entries_type;
3479 typedef typename local_graph_device_type::row_map_type row_map_type;
3480 typedef typename row_map_type::non_const_type non_const_row_map_type;
3481 typedef typename local_graph_device_type::entries_type::non_const_type lclinds_1d_type;
3482 const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
3483 "expertStaticFillComplete): ";
3484 const size_t lclNumRows = this->getLocalNumRows ();
3485
3486 // This method's goal is to fill in the two arrays (compressed
3487 // sparse row format) that define the sparse graph's structure.
3488
3489 bool requestOptimizedStorage = true;
3490 if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
3491 requestOptimizedStorage = false;
3492 }
3493
3494 // The graph's column indices are currently stored in a 1-D
3495 // format, with row offsets in rowPtrsUnpacked_host_ and local column indices
3496 // in k_lclInds1D_.
3497
3498 if (debug_) {
3499 // The graph's array of row offsets must already be allocated.
3500 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3501 (rowPtrsUnpacked_host_.extent (0) == 0, std::logic_error,
3502 "k_rowPtrs_ has size zero, but shouldn't");
3503 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3504 (rowPtrsUnpacked_host_.extent (0) != lclNumRows + 1, std::logic_error,
3505 "rowPtrsUnpacked_host_.extent(0) = "
3506 << rowPtrsUnpacked_host_.extent (0) << " != (lclNumRows + 1) = "
3507 << (lclNumRows + 1) << ".");
3508 const size_t numOffsets = rowPtrsUnpacked_host_.extent (0);
3509 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets-1);
3510 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3511 (numOffsets != 0 &&
3512 lclIndsUnpacked_wdv.extent (0) != valToCheck,
3513 std::logic_error, "numOffsets=" << numOffsets << " != 0 "
3514 " and lclIndsUnpacked_wdv.extent(0)=" << lclIndsUnpacked_wdv.extent(0)
3515 << " != k_rowPtrs_(" << numOffsets << ")=" << valToCheck
3516 << ".");
3517 }
3518
3519 size_t allocSize = 0;
3520 try {
3521 allocSize = this->getLocalAllocationSize ();
3522 }
3523 catch (std::logic_error& e) {
3524 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3525 (true, std::logic_error, "getLocalAllocationSize threw "
3526 "std::logic_error: " << e.what ());
3527 }
3528 catch (std::runtime_error& e) {
3529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3530 (true, std::runtime_error, "getLocalAllocationSize threw "
3531 "std::runtime_error: " << e.what ());
3532 }
3533 catch (std::exception& e) {
3534 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3535 (true, std::runtime_error, "getLocalAllocationSize threw "
3536 "std::exception: " << e.what ());
3537 }
3538 catch (...) {
3539 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3540 (true, std::runtime_error, "getLocalAllocationSize threw "
3541 "an exception not a subclass of std::exception.");
3542 }
3543
3544 if (this->getLocalNumEntries () != allocSize) {
3545 // Use the nonconst version of row_map_type for ptr_d, because
3546 // the latter is const and we need to modify ptr_d here.
3547 non_const_row_map_type ptr_d;
3548 row_map_type ptr_d_const;
3549
3550 // The graph's current 1-D storage is "unpacked." This means
3551 // the row offsets may differ from what the final row offsets
3552 // should be. This could happen, for example, if the user set
3553 // an upper bound on the number of entries in each row, but
3554 // didn't fill all those entries.
3555
3556 if (debug_) {
3557 if (rowPtrsUnpacked_host_.extent (0) != 0) {
3558 const size_t numOffsets =
3559 static_cast<size_t> (rowPtrsUnpacked_host_.extent (0));
3560 const auto valToCheck = rowPtrsUnpacked_host_(numOffsets - 1);
3561 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3562 (valToCheck != size_t(lclIndsUnpacked_wdv.extent(0)),
3563 std::logic_error, "(Unpacked branch) Before allocating "
3564 "or packing, k_rowPtrs_(" << (numOffsets-1) << ")="
3565 << valToCheck << " != lclIndsUnpacked_wdv.extent(0)="
3566 << lclIndsUnpacked_wdv.extent (0) << ".");
3567 }
3568 }
3569
3570 // Pack the row offsets into ptr_d, by doing a sum-scan of the
3571 // array of valid entry counts per row (k_numRowEntries_).
3572
3573 // Total number of entries in the matrix on the calling
3574 // process. We will compute this in the loop below. It's
3575 // cheap to compute and useful as a sanity check.
3576 size_t lclTotalNumEntries = 0;
3577 {
3578 // Allocate the packed row offsets array.
3579 ptr_d =
3580 non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
3581 ptr_d_const = ptr_d;
3582
3583 // It's ok that k_numRowEntries_ is a host View; the
3584 // function can handle this.
3585 typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
3586 if (debug_) {
3587 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3588 (size_t(numRowEnt_h.extent (0)) != lclNumRows,
3589 std::logic_error, "(Unpacked branch) "
3590 "numRowEnt_h.extent(0)=" << numRowEnt_h.extent(0)
3591 << " != getLocalNumRows()=" << lclNumRows << "");
3592 }
3593
3594 lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
3595
3596 if (debug_) {
3597 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3598 (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
3599 std::logic_error, "(Unpacked branch) After allocating "
3600 "ptr_d, ptr_d.extent(0) = " << ptr_d.extent(0)
3601 << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
3602 const auto valToCheck =
3603 ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
3604 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3605 (valToCheck != lclTotalNumEntries, std::logic_error,
3606 "Tpetra::CrsGraph::fillLocalGraph: In unpacked branch, "
3607 "after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
3608 << ") = " << valToCheck << " != total number of entries "
3609 "on the calling process = " << lclTotalNumEntries
3610 << ".");
3611 }
3612 }
3613
3614 // Allocate the array of packed column indices.
3615 lclinds_1d_type ind_d =
3616 lclinds_1d_type ("Tpetra::CrsGraph::lclInd", lclTotalNumEntries);
3617
3618 // k_rowPtrs_ and lclIndsUnpacked_wdv are currently unpacked. Pack
3619 // them, using the packed row offsets array ptr_d that we
3620 // created above.
3621 //
3622 // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
3623 // CrsMatrix?), we need to keep around the unpacked row
3624 // offsets and column indices.
3625
3626 // Pack the column indices from unpacked lclIndsUnpacked_wdv into
3627 // packed ind_d. We will replace lclIndsUnpacked_wdv below.
3628 typedef pack_functor<
3629 typename local_graph_device_type::entries_type::non_const_type,
3630 typename local_inds_dualv_type::t_dev::const_type,
3631 row_map_type,
3632 typename local_graph_device_type::row_map_type> inds_packer_type;
3633 inds_packer_type f (ind_d,
3634 lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
3635 ptr_d, rowPtrsUnpacked_dev_);
3636 {
3637 typedef typename decltype (ind_d)::execution_space exec_space;
3638 typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
3639 Kokkos::parallel_for (range_type (0, lclNumRows), f);
3640 }
3641
3642 if (debug_) {
3643 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3644 (ptr_d.extent (0) == 0, std::logic_error,
3645 "(\"Optimize Storage\"=true branch) After packing, "
3646 "ptr_d.extent(0)=0. This probably means k_rowPtrs_ was "
3647 "never allocated.");
3648 if (ptr_d.extent (0) != 0) {
3649 const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
3650 const auto valToCheck =
3651 ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
3652 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3653 (static_cast<size_t> (valToCheck) != ind_d.extent (0),
3654 std::logic_error, "(\"Optimize Storage\"=true branch) "
3655 "After packing, ptr_d(" << (numOffsets-1) << ")="
3656 << valToCheck << " != ind_d.extent(0)="
3657 << ind_d.extent(0) << ".");
3658 }
3659 }
3660 // Build the local graph.
3661 if (requestOptimizedStorage)
3662 setRowPtrs(ptr_d_const);
3663 else
3664 setRowPtrsPacked(ptr_d_const);
3665 lclIndsPacked_wdv = local_inds_wdv_type(ind_d);
3666 }
3667 else { // We don't have to pack, so just set the pointers.
3668 // Note: The setRowPtrsPacked call has an aditional memcpy to host that we want to avoid
3669 rowPtrsPacked_dev_ = rowPtrsUnpacked_dev_;
3670 rowPtrsPacked_host_ = rowPtrsUnpacked_host_;
3671 lclIndsPacked_wdv = lclIndsUnpacked_wdv;
3672
3673 if (debug_) {
3674 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3675 (rowPtrsPacked_dev_.extent (0) == 0, std::logic_error,
3676 "(\"Optimize Storage\"=false branch) "
3677 "rowPtrsPacked_dev_.extent(0) = 0. "
3678 "This probably means that "
3679 "k_rowPtrs_ was never allocated.");
3680 if (rowPtrsPacked_dev_.extent (0) != 0) {
3681 const size_t numOffsets =
3682 static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
3683 const size_t valToCheck =
3684 rowPtrsPacked_host_(numOffsets - 1);
3685 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3686 (valToCheck != size_t(lclIndsPacked_wdv.extent (0)),
3687 std::logic_error, "(\"Optimize Storage\"=false branch) "
3688 "rowPtrsPacked_dev_(" << (numOffsets-1) << ")="
3689 << valToCheck
3690 << " != lclIndsPacked_wdv.extent(0)="
3691 << lclIndsPacked_wdv.extent (0) << ".");
3692 }
3693 }
3694 }
3695
3696 if (debug_) {
3697 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3698 (static_cast<size_t> (rowPtrsPacked_dev_.extent (0)) != lclNumRows + 1,
3699 std::logic_error, "After packing, rowPtrsPacked_dev_.extent(0) = " <<
3700 rowPtrsPacked_dev_.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
3701 << ".");
3702 if (rowPtrsPacked_dev_.extent (0) != 0) {
3703 const size_t numOffsets = static_cast<size_t> (rowPtrsPacked_dev_.extent (0));
3704 const auto valToCheck = rowPtrsPacked_host_(numOffsets - 1);
3705 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3706 (static_cast<size_t> (valToCheck) != lclIndsPacked_wdv.extent (0),
3707 std::logic_error, "After packing, rowPtrsPacked_dev_(" << (numOffsets-1)
3708 << ") = " << valToCheck << " != lclIndsPacked_wdv.extent(0) = "
3709 << lclIndsPacked_wdv.extent (0) << ".");
3710 }
3711 }
3712
3713 if (requestOptimizedStorage) {
3714 // With optimized storage, we don't need to store
3715 // the array of row entry counts.
3716
3717 // Free graph data structures that are only needed for
3718 // unpacked 1-D storage.
3719 k_numRowEntries_ = row_entries_type ();
3720
3721 // Keep the new 1-D packed allocations.
3722 lclIndsUnpacked_wdv = lclIndsPacked_wdv;
3723
3724 storageStatus_ = Details::STORAGE_1D_PACKED;
3725 }
3726
3727 set_need_sync_host_uvm_access(); // make sure kernel setup of indices is fenced before a host access
3728 }
3729
3730 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3731 void
3733 replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
3734 {
3735 // NOTE: This safety check matches the code, but not the documentation of Crsgraph
3736 //
3737 // FIXME (mfh 18 Aug 2014) This will break if the calling process
3738 // has no entries, because in that case, currently it is neither
3739 // locally nor globally indexed. This will change once we get rid
3740 // of lazy allocation (so that the constructor allocates indices
3741 // and therefore commits to local vs. global).
3742 const char tfecfFuncName[] = "replaceColMap: ";
3743 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3744 isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
3745 "Requires matching maps and non-static graph.");
3746 colMap_ = newColMap;
3747 }
3748
3749 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3750 void
3752 reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
3753 const Teuchos::RCP<const import_type>& newImport,
3754 const bool sortIndicesInEachRow)
3755 {
3756 using Teuchos::REDUCE_MIN;
3757 using Teuchos::reduceAll;
3758 using Teuchos::RCP;
3759 typedef GlobalOrdinal GO;
3760 typedef LocalOrdinal LO;
3761 typedef typename local_inds_dualv_type::t_host col_inds_type;
3762 const char tfecfFuncName[] = "reindexColumns: ";
3763
3764 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3765 isFillComplete (), std::runtime_error, "The graph is fill complete "
3766 "(isFillComplete() returns true). You must call resumeFill() before "
3767 "you may call this method.");
3768
3769 // mfh 19 Aug 2014: This method does NOT redistribute data; it
3770 // doesn't claim to do the work of an Import or Export. This
3771 // means that for all processes, the calling process MUST own all
3772 // column indices, in both the old column Map (if it exists) and
3773 // the new column Map. We check this via an all-reduce.
3774 //
3775 // Some processes may be globally indexed, others may be locally
3776 // indexed, and others (that have no graph entries) may be
3777 // neither. This method will NOT change the graph's current
3778 // state. If it's locally indexed, it will stay that way, and
3779 // vice versa. It would easy to add an option to convert indices
3780 // from global to local, so as to save a global-to-local
3781 // conversion pass. However, we don't do this here. The intended
3782 // typical use case is that the graph already has a column Map and
3783 // is locally indexed, and this is the case for which we optimize.
3784
3785 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
3786
3787 // Attempt to convert indices to the new column Map's version of
3788 // local. This will fail if on the calling process, the graph has
3789 // indices that are not on that process in the new column Map.
3790 // After the local conversion attempt, we will do an all-reduce to
3791 // see if any processes failed.
3792
3793 // If this is false, then either the graph contains a column index
3794 // which is invalid in the CURRENT column Map, or the graph is
3795 // locally indexed but currently has no column Map. In either
3796 // case, there is no way to convert the current local indices into
3797 // global indices, so that we can convert them into the new column
3798 // Map's local indices. It's possible for this to be true on some
3799 // processes but not others, due to replaceColMap.
3800 bool allCurColIndsValid = true;
3801 // On the calling process, are all valid current column indices
3802 // also in the new column Map on the calling process? In other
3803 // words, does local reindexing suffice, or should the user have
3804 // done an Import or Export instead?
3805 bool localSuffices = true;
3806
3807 // Final arrays for the local indices. We will allocate exactly
3808 // one of these ONLY if the graph is locally indexed on the
3809 // calling process, and ONLY if the graph has one or more entries
3810 // (is not empty) on the calling process. In that case, we
3811 // allocate the first (1-D storage) if the graph has a static
3812 // profile, else we allocate the second (2-D storage).
3813 col_inds_type newLclInds1D;
3814 auto oldLclInds1D = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
3815
3816 // If indices aren't allocated, that means the calling process
3817 // owns no entries in the graph. Thus, there is nothing to
3818 // convert, and it trivially succeeds locally.
3819 if (indicesAreAllocated ()) {
3820 if (isLocallyIndexed ()) {
3821 if (hasColMap ()) { // locally indexed, and currently has a column Map
3822 const map_type& oldColMap = * (getColMap ());
3823 // Allocate storage for the new local indices.
3824 const size_t allocSize = this->getLocalAllocationSize ();
3825 newLclInds1D = col_inds_type("Tpetra::CrsGraph::lclIndsReindexedHost",
3826 allocSize);
3827 // Attempt to convert the new indices locally.
3828 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3829 const RowInfo rowInfo = this->getRowInfo (lclRow);
3830 const size_t beg = rowInfo.offset1D;
3831 const size_t end = beg + rowInfo.numEntries;
3832 for (size_t k = beg; k < end; ++k) {
3833 const LO oldLclCol = oldLclInds1D(k);
3834 if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3835 allCurColIndsValid = false;
3836 break; // Stop at the first invalid index
3837 }
3838 const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
3839
3840 // The above conversion MUST succeed. Otherwise, the
3841 // current local index is invalid, which means that
3842 // the graph was constructed incorrectly.
3843 if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
3844 allCurColIndsValid = false;
3845 break; // Stop at the first invalid index
3846 }
3847 else {
3848 const LO newLclCol = newColMap->getLocalElement (gblCol);
3849 if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
3850 localSuffices = false;
3851 break; // Stop at the first invalid index
3852 }
3853 newLclInds1D(k) = newLclCol;
3854 }
3855 } // for each entry in the current row
3856 } // for each locally owned row
3857 }
3858 else { // locally indexed, but no column Map
3859 // This case is only possible if replaceColMap() was called
3860 // with a null argument on the calling process. It's
3861 // possible, but it means that this method can't possibly
3862 // succeed, since we have no way of knowing how to convert
3863 // the current local indices to global indices.
3864 allCurColIndsValid = false;
3865 }
3866 }
3867 else { // globally indexed
3868 // If the graph is globally indexed, we don't need to save
3869 // local indices, but we _do_ need to know whether the current
3870 // global indices are valid in the new column Map. We may
3871 // need to do a getRemoteIndexList call to find this out.
3872 //
3873 // In this case, it doesn't matter whether the graph currently
3874 // has a column Map. We don't need the old column Map to
3875 // convert from global indices to the _new_ column Map's local
3876 // indices. Furthermore, we can use the same code, whether
3877 // the graph is static or dynamic profile.
3878
3879 // Test whether the current global indices are in the new
3880 // column Map on the calling process.
3881 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
3882 const RowInfo rowInfo = this->getRowInfo (lclRow);
3883 auto oldGblRowView = this->getGlobalIndsViewHost (rowInfo);
3884 for (size_t k = 0; k < rowInfo.numEntries; ++k) {
3885 const GO gblCol = oldGblRowView(k);
3886 if (! newColMap->isNodeGlobalElement (gblCol)) {
3887 localSuffices = false;
3888 break; // Stop at the first invalid index
3889 }
3890 } // for each entry in the current row
3891 } // for each locally owned row
3892 } // locally or globally indexed
3893 } // whether indices are allocated
3894
3895 // Do an all-reduce to check both possible error conditions.
3896 int lclSuccess[2];
3897 lclSuccess[0] = allCurColIndsValid ? 1 : 0;
3898 lclSuccess[1] = localSuffices ? 1 : 0;
3899 int gblSuccess[2];
3900 gblSuccess[0] = 0;
3901 gblSuccess[1] = 0;
3902 RCP<const Teuchos::Comm<int> > comm =
3903 getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
3904 if (! comm.is_null ()) {
3905 reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
3906 }
3907
3908 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3909 gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
3910 " The most likely reason is that the graph is locally indexed, but the "
3911 "column Map is missing (null) on some processes, due to a previous call "
3912 "to replaceColMap().");
3913
3914 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3915 gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
3916 "contains column indices that are in the old column Map, but not in the "
3917 "new column Map (on that process). This method does NOT redistribute "
3918 "data; it does not claim to do the work of an Import or Export operation."
3919 " This means that for all processess, the calling process MUST own all "
3920 "column indices, in both the old column Map and the new column Map. In "
3921 "this case, you will need to do an Import or Export operation to "
3922 "redistribute data.");
3923
3924 // Commit the results.
3925 if (isLocallyIndexed ()) {
3926 { // scope the device view; sortAndMergeAllIndices needs host
3927 typename local_inds_dualv_type::t_dev newLclInds1D_dev(
3928 Kokkos::view_alloc("Tpetra::CrsGraph::lclIndReindexed",
3929 Kokkos::WithoutInitializing),
3930 newLclInds1D.extent(0));
3931 Kokkos::deep_copy(newLclInds1D_dev, newLclInds1D);
3932 lclIndsUnpacked_wdv = local_inds_wdv_type(newLclInds1D_dev);
3933 }
3934
3935 // We've reindexed, so we don't know if the indices are sorted.
3936 //
3937 // FIXME (mfh 17 Sep 2014) It could make sense to check this,
3938 // since we're already going through all the indices above. We
3939 // could also sort each row in place; that way, we would only
3940 // have to make one pass over the rows.
3941 indicesAreSorted_ = false;
3942 if (sortIndicesInEachRow) {
3943 // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
3944 // order to call this method.
3945 //
3946 // FIXME (mfh 17 Sep 2014) This violates the strong exception
3947 // guarantee. It would be better to sort the new index arrays
3948 // before committing them.
3949 const bool sorted = false; // need to resort
3950 const bool merged = true; // no need to merge, since no dups
3951 this->sortAndMergeAllIndices (sorted, merged);
3952 }
3953 }
3954 colMap_ = newColMap;
3955
3956 if (newImport.is_null ()) {
3957 // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
3958 // check whether the input Import is null on any process.
3959 //
3960 // If the domain Map hasn't been set yet, we can't compute a new
3961 // Import object. Leave it what it is; it should be null, but
3962 // it doesn't matter. If the domain Map _has_ been set, then
3963 // compute a new Import object if necessary.
3964 if (! domainMap_.is_null ()) {
3965 if (! domainMap_->isSameAs (* newColMap)) {
3966 importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
3967 } else {
3968 importer_ = Teuchos::null; // don't need an Import
3969 }
3970 }
3971 } else {
3972 // The caller gave us an Import object. Assume that it's valid.
3973 importer_ = newImport;
3974 }
3975 }
3976
3977 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3978 void
3980 replaceDomainMap (const Teuchos::RCP<const map_type>& newDomainMap)
3981 {
3982 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMap: ";
3983 TEUCHOS_TEST_FOR_EXCEPTION(
3984 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
3985 "this method unless the graph already has a column Map.");
3986 TEUCHOS_TEST_FOR_EXCEPTION(
3987 newDomainMap.is_null (), std::invalid_argument,
3988 prefix << "The new domain Map must be nonnull.");
3989
3990 // Create a new importer, if needed
3991 Teuchos::RCP<const import_type> newImporter = Teuchos::null;
3992 if (newDomainMap != colMap_ && (! newDomainMap->isSameAs (*colMap_))) {
3993 newImporter = rcp(new import_type(newDomainMap, colMap_));
3994 }
3995 this->replaceDomainMapAndImporter(newDomainMap, newImporter);
3996 }
3997
3998 template <class LocalOrdinal, class GlobalOrdinal, class Node>
3999 void
4001 replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4002 const Teuchos::RCP<const import_type>& newImporter)
4003 {
4004 const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4005 TEUCHOS_TEST_FOR_EXCEPTION(
4006 colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4007 "this method unless the graph already has a column Map.");
4008 TEUCHOS_TEST_FOR_EXCEPTION(
4009 newDomainMap.is_null (), std::invalid_argument,
4010 prefix << "The new domain Map must be nonnull.");
4011
4012 if (debug_) {
4013 if (newImporter.is_null ()) {
4014 // It's not a good idea to put expensive operations in a macro
4015 // clause, even if they are side effect - free, because macros
4016 // don't promise that they won't evaluate their arguments more
4017 // than once. It's polite for them to do so, but not required.
4018 const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4019 TEUCHOS_TEST_FOR_EXCEPTION
4020 (!colSameAsDom, std::invalid_argument, "If the new Import is null, "
4021 "then the new domain Map must be the same as the current column Map.");
4022 }
4023 else {
4024 const bool colSameAsTgt =
4025 colMap_->isSameAs (* (newImporter->getTargetMap ()));
4026 const bool newDomSameAsSrc =
4027 newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4028 TEUCHOS_TEST_FOR_EXCEPTION
4029 (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4030 "new Import is nonnull, then the current column Map must be the same "
4031 "as the new Import's target Map, and the new domain Map must be the "
4032 "same as the new Import's source Map.");
4033 }
4034 }
4035
4036 domainMap_ = newDomainMap;
4037 importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4038 }
4039
4040 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4041 void
4043 replaceRangeMap (const Teuchos::RCP<const map_type>& newRangeMap)
4044 {
4045 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMap: ";
4046 TEUCHOS_TEST_FOR_EXCEPTION(
4047 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4048 "this method unless the graph already has a row Map.");
4049 TEUCHOS_TEST_FOR_EXCEPTION(
4050 newRangeMap.is_null (), std::invalid_argument,
4051 prefix << "The new range Map must be nonnull.");
4052
4053 // Create a new exporter, if needed
4054 Teuchos::RCP<const export_type> newExporter = Teuchos::null;
4055 if (newRangeMap != rowMap_ && (! newRangeMap->isSameAs (*rowMap_))) {
4056 newExporter = rcp(new export_type(rowMap_, newRangeMap));
4057 }
4058 this->replaceRangeMapAndExporter(newRangeMap, newExporter);
4059 }
4060
4061 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4062 void
4064 replaceRangeMapAndExporter (const Teuchos::RCP<const map_type>& newRangeMap,
4065 const Teuchos::RCP<const export_type>& newExporter)
4066 {
4067 const char prefix[] = "Tpetra::CrsGraph::replaceRangeMapAndExporter: ";
4068 TEUCHOS_TEST_FOR_EXCEPTION(
4069 rowMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4070 "this method unless the graph already has a column Map.");
4071 TEUCHOS_TEST_FOR_EXCEPTION(
4072 newRangeMap.is_null (), std::invalid_argument,
4073 prefix << "The new domain Map must be nonnull.");
4074
4075 if (debug_) {
4076 if (newExporter.is_null ()) {
4077 // It's not a good idea to put expensive operations in a macro
4078 // clause, even if they are side effect - free, because macros
4079 // don't promise that they won't evaluate their arguments more
4080 // than once. It's polite for them to do so, but not required.
4081 const bool rowSameAsRange = rowMap_->isSameAs (*newRangeMap);
4082 TEUCHOS_TEST_FOR_EXCEPTION
4083 (!rowSameAsRange, std::invalid_argument, "If the new Export is null, "
4084 "then the new range Map must be the same as the current row Map.");
4085 }
4086 else {
4087 const bool newRangeSameAsTgt =
4088 newRangeMap->isSameAs (* (newExporter->getTargetMap ()));
4089 const bool rowSameAsSrc =
4090 rowMap_->isSameAs (* (newExporter->getSourceMap ()));
4091 TEUCHOS_TEST_FOR_EXCEPTION
4092 (! rowSameAsSrc || ! newRangeSameAsTgt, std::invalid_argument, "If the "
4093 "new Export is nonnull, then the current row Map must be the same "
4094 "as the new Export's source Map, and the new range Map must be the "
4095 "same as the new Export's target Map.");
4096 }
4097 }
4098
4099 rangeMap_ = newRangeMap;
4100 exporter_ = Teuchos::rcp_const_cast<export_type> (newExporter);
4101 }
4102
4103
4104 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4107 getLocalGraphDevice () const
4108 {
4110 lclIndsPacked_wdv.getDeviceView(Access::ReadWrite),
4111 rowPtrsPacked_dev_);
4112 }
4113
4114 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4117 getLocalGraphHost () const
4118 {
4119 return local_graph_host_type(
4120 lclIndsPacked_wdv.getHostView(Access::ReadWrite),
4121 rowPtrsPacked_host_);
4122 }
4123
4124 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4125 void
4128 {
4129 using ::Tpetra::Details::ProfilingRegion;
4130 using Teuchos::ArrayView;
4131 using Teuchos::outArg;
4132 using Teuchos::reduceAll;
4133 typedef global_size_t GST;
4134
4135 ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4136
4137 this->computeLocalConstants ();
4138
4139 // Compute global constants from local constants. Processes that
4140 // already have local constants still participate in the
4141 // all-reduces, using their previously computed values.
4142 if (! this->haveGlobalConstants_) {
4143 const Teuchos::Comm<int>& comm = * (this->getComm ());
4144 // Promote all the nodeNum* and nodeMaxNum* quantities from
4145 // size_t to global_size_t, when doing the all-reduces for
4146 // globalNum* / globalMaxNum* results.
4147 //
4148 // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4149 // this in two all-reduces (one for the sum and the other for
4150 // the max), or use a custom MPI_Op that combines the sum and
4151 // the max. The latter might even be slower than two
4152 // all-reduces on modern network hardware. It would also be a
4153 // good idea to use nonblocking all-reduces (MPI 3), so that we
4154 // don't have to wait around for the first one to finish before
4155 // starting the second one.
4156 GST lcl, gbl;
4157 lcl = static_cast<GST> (this->getLocalNumEntries ());
4158
4159 reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 1, &lcl, &gbl);
4160 this->globalNumEntries_ = gbl;
4161
4162 const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4163 reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4164 outArg (this->globalMaxNumRowEntries_));
4165 this->haveGlobalConstants_ = true;
4166 }
4167 }
4168
4169
4170 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4171 void
4174 {
4175 using ::Tpetra::Details::ProfilingRegion;
4176
4177 ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4178 if (this->haveLocalConstants_) {
4179 return;
4180 }
4181
4182 // Reset local properties
4183 this->nodeMaxNumRowEntries_ =
4184 Teuchos::OrdinalTraits<size_t>::invalid();
4185
4186 using LO = local_ordinal_type;
4187
4188 auto ptr = this->rowPtrsPacked_dev_;
4189 const LO lclNumRows = ptr.extent(0) == 0 ?
4190 static_cast<LO> (0) :
4191 (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
4192
4193 const LO lclMaxNumRowEnt =
4194 ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
4195 ptr, lclNumRows);
4196 this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
4197 this->haveLocalConstants_ = true;
4198 }
4199
4200
4201 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4202 std::pair<size_t, std::string>
4204 makeIndicesLocal (const bool verbose)
4205 {
4207 using Teuchos::arcp;
4208 using Teuchos::Array;
4209 using std::endl;
4210 typedef LocalOrdinal LO;
4211 typedef GlobalOrdinal GO;
4212 typedef device_type DT;
4213 typedef typename local_graph_device_type::row_map_type::non_const_value_type offset_type;
4214 typedef decltype (k_numRowEntries_) row_entries_type;
4215 typedef typename row_entries_type::non_const_value_type num_ent_type;
4216 const char tfecfFuncName[] = "makeIndicesLocal: ";
4217 ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
4218
4219 std::unique_ptr<std::string> prefix;
4220 if (verbose) {
4221 prefix = this->createPrefix("CrsGraph", "makeIndicesLocal");
4222 std::ostringstream os;
4223 os << *prefix << "lclNumRows: " << getLocalNumRows() << endl;
4224 std::cerr << os.str();
4225 }
4226
4227 // These are somewhat global properties, so it's safe to have
4228 // exception checks for them, rather than returning an error code.
4229 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4230 (! this->hasColMap (), std::logic_error, "The graph does not have a "
4231 "column Map yet. This method should never be called in that case. "
4232 "Please report this bug to the Tpetra developers.");
4233 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4234 (this->getColMap ().is_null (), std::logic_error, "The graph claims "
4235 "that it has a column Map, because hasColMap() returns true. However, "
4236 "the result of getColMap() is null. This should never happen. Please "
4237 "report this bug to the Tpetra developers.");
4238
4239 // Return value 1: The number of column indices (counting
4240 // duplicates) that could not be converted to local indices,
4241 // because they were not in the column Map on the calling process.
4242 size_t lclNumErrs = 0;
4243 std::ostringstream errStrm; // for return value 2 (error string)
4244
4245 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
4246 const map_type& colMap = * (this->getColMap ());
4247
4248 if (this->isGloballyIndexed () && lclNumRows != 0) {
4249 // This is a host-accessible View.
4250 typename row_entries_type::const_type h_numRowEnt =
4251 this->k_numRowEntries_;
4252
4253 // Allocate space for local indices.
4254 if (rowPtrsUnpacked_host_.extent (0) == 0) {
4255 errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
4256 "happen here. Please report this bug to the Tpetra developers."
4257 << endl;
4258 // Need to return early.
4259 return std::make_pair(Tpetra::Details::OrdinalTraits<size_t>::invalid (),
4260 errStrm.str ());
4261 }
4262 const auto numEnt = rowPtrsUnpacked_host_(lclNumRows);
4263
4264 // mfh 17 Dec 2016: We don't need initial zero-fill of
4265 // lclIndsUnpacked_wdv, because we will fill it below anyway.
4266 // AllowPadding would only help for aligned access (e.g.,
4267 // for vectorization) if we also were to pad each row to the
4268 // same alignment, so we'll skip AllowPadding for now.
4269
4270 // using Kokkos::AllowPadding;
4271 using Kokkos::view_alloc;
4272 using Kokkos::WithoutInitializing;
4273
4274 // When giving the label as an argument to
4275 // Kokkos::view_alloc, the label must be a string and not a
4276 // char*, else the code won't compile. This is because
4277 // view_alloc also allows a raw pointer as its first
4278 // argument. See
4279 // https://github.com/kokkos/kokkos/issues/434. This is a
4280 // large allocation typically, so the overhead of creating
4281 // an std::string is minor.
4282 const std::string label ("Tpetra::CrsGraph::lclInd");
4283 if (verbose) {
4284 std::ostringstream os;
4285 os << *prefix << "(Re)allocate lclInd_wdv: old="
4286 << lclIndsUnpacked_wdv.extent(0) << ", new=" << numEnt << endl;
4287 std::cerr << os.str();
4288 }
4289
4290 local_inds_dualv_type lclInds_dualv =
4291 local_inds_dualv_type(view_alloc(label, WithoutInitializing),
4292 numEnt);
4293 lclIndsUnpacked_wdv = local_inds_wdv_type(lclInds_dualv);
4294
4295 auto lclColMap = colMap.getLocalMap ();
4296 // This is a "device mirror" of the host View h_numRowEnt.
4297 //
4298 // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
4299 // Device instance is to use its default constructor. See the
4300 // following Kokkos issue:
4301 //
4302 // https://github.com/kokkos/kokkos/issues/442
4303 if (verbose) {
4304 std::ostringstream os;
4305 os << *prefix << "Allocate device mirror k_numRowEnt: "
4306 << h_numRowEnt.extent(0) << endl;
4307 std::cerr << os.str();
4308 }
4309 auto k_numRowEnt =
4310 Kokkos::create_mirror_view_and_copy (device_type (), h_numRowEnt);
4311
4312 using ::Tpetra::Details::convertColumnIndicesFromGlobalToLocal;
4313 lclNumErrs =
4314 convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (
4315 lclIndsUnpacked_wdv.getDeviceView(Access::OverwriteAll),
4316 gblInds_wdv.getDeviceView(Access::ReadOnly),
4317 rowPtrsUnpacked_dev_,
4318 lclColMap,
4319 k_numRowEnt);
4320 if (lclNumErrs != 0) {
4321 const int myRank = [this] () {
4322 auto map = this->getMap ();
4323 if (map.is_null ()) {
4324 return 0;
4325 }
4326 else {
4327 auto comm = map->getComm ();
4328 return comm.is_null () ? 0 : comm->getRank ();
4329 }
4330 } ();
4331 const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
4332 errStrm << "(Process " << myRank << ") When converting column "
4333 "indices from global to local, we encountered " << lclNumErrs
4334 << " ind" << (pluralNumErrs ? "ices" : "ex")
4335 << " that do" << (pluralNumErrs ? "es" : "")
4336 << " not live in the column Map on this process." << endl;
4337 }
4338
4339 // We've converted column indices from global to local, so we
4340 // can deallocate the global column indices (which we know are
4341 // in 1-D storage, because the graph has static profile).
4342 if (verbose) {
4343 std::ostringstream os;
4344 os << *prefix << "Free gblInds_wdv: "
4345 << gblInds_wdv.extent(0) << endl;
4346 std::cerr << os.str();
4347 }
4348 gblInds_wdv = global_inds_wdv_type ();
4349 } // globallyIndexed() && lclNumRows > 0
4350
4351 this->indicesAreLocal_ = true;
4352 this->indicesAreGlobal_ = false;
4353 this->checkInternalState ();
4354
4355 return std::make_pair (lclNumErrs, errStrm.str ());
4356 }
4357
4358 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4359 void
4361 makeColMap (Teuchos::Array<int>& remotePIDs)
4362 {
4364 using std::endl;
4365 const char tfecfFuncName[] = "makeColMap";
4366
4367 ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
4368 std::unique_ptr<std::string> prefix;
4369 if (verbose_) {
4370 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4371 std::ostringstream os;
4372 os << *prefix << "Start" << endl;
4373 std::cerr << os.str();
4374 }
4375
4376 // this->colMap_ should be null at this point, but we accept the
4377 // future possibility that it might not be (esp. if we decide
4378 // later to support graph structure changes after first
4379 // fillComplete, which CrsGraph does not currently (as of 12 Feb
4380 // 2017) support).
4381 Teuchos::RCP<const map_type> colMap = this->colMap_;
4382 const bool sortEachProcsGids =
4383 this->sortGhostsAssociatedWithEachProcessor_;
4384
4385 // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
4386 // per-process error code. If an error does occur on a process,
4387 // ::Tpetra::Details::makeColMap does NOT promise that all processes will
4388 // notice that error. This is the caller's responsibility. For
4389 // now, we only propagate (to all processes) and report the error
4390 // in debug mode. In the future, we need to add the local/global
4391 // error handling scheme used in BlockCrsMatrix to this class.
4392 if (debug_) {
4393 using Teuchos::outArg;
4394 using Teuchos::REDUCE_MIN;
4395 using Teuchos::reduceAll;
4396
4397 std::ostringstream errStrm;
4398 const int lclErrCode =
4399 Details::makeColMap (colMap, remotePIDs,
4400 getDomainMap (), *this, sortEachProcsGids, &errStrm);
4401 auto comm = this->getComm ();
4402 if (! comm.is_null ()) {
4403 const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
4404 int gblSuccess = 0; // output argument
4405 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
4406 outArg (gblSuccess));
4407 if (gblSuccess != 1) {
4408 std::ostringstream os;
4409 Details::gathervPrint (os, errStrm.str (), *comm);
4410 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4411 (true, std::runtime_error, ": An error happened on at "
4412 "least one process in the CrsGraph's communicator. "
4413 "Here are all processes' error messages:" << std::endl
4414 << os.str ());
4415 }
4416 }
4417 }
4418 else {
4419 (void) Details::makeColMap (colMap, remotePIDs,
4420 getDomainMap (), *this, sortEachProcsGids, nullptr);
4421 }
4422 // See above. We want to admit the possibility of makeColMap
4423 // actually revising an existing column Map, even though that
4424 // doesn't currently (as of 10 May 2017) happen.
4425 this->colMap_ = colMap;
4426
4427 checkInternalState ();
4428 if (verbose_) {
4429 std::ostringstream os;
4430 os << *prefix << "Done" << endl;
4431 std::cerr << os.str();
4432 }
4433 }
4434
4435
4436 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4437 void
4439 sortAndMergeAllIndices (const bool sorted, const bool merged)
4440 {
4441 using std::endl;
4442 using LO = LocalOrdinal;
4443 using host_execution_space =
4444 typename Kokkos::View<LO*, device_type>::HostMirror::
4445 execution_space;
4446 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
4447 const char tfecfFuncName[] = "sortAndMergeAllIndices";
4448 Details::ProfilingRegion regionSortAndMerge
4449 ("Tpetra::CrsGraph::sortAndMergeAllIndices");
4450
4451 std::unique_ptr<std::string> prefix;
4452 if (verbose_) {
4453 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4454 std::ostringstream os;
4455 os << *prefix << "Start: "
4456 << "sorted=" << (sorted ? "true" : "false")
4457 << ", merged=" << (merged ? "true" : "false") << endl;
4458 std::cerr << os.str();
4459 }
4460 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4461 (this->isGloballyIndexed(), std::logic_error,
4462 "This method may only be called after makeIndicesLocal." );
4463 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4464 (! merged && this->isStorageOptimized(), std::logic_error,
4465 "The graph is already storage optimized, so we shouldn't be "
4466 "merging any indices. "
4467 "Please report this bug to the Tpetra developers.");
4468
4469 if (! sorted || ! merged) {
4470 const LO lclNumRows(this->getLocalNumRows());
4471 auto range = range_type(0, lclNumRows);
4472
4473 if (verbose_) {
4474 size_t totalNumDups = 0;
4475 Kokkos::parallel_reduce(range,
4476 [this, sorted, merged] (const LO lclRow, size_t& numDups)
4477 {
4478 const RowInfo rowInfo = this->getRowInfo(lclRow);
4479 numDups += this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4480 },
4481 totalNumDups);
4482 std::ostringstream os;
4483 os << *prefix << "totalNumDups=" << totalNumDups << endl;
4484 std::cerr << os.str();
4485 }
4486 else {
4487 Kokkos::parallel_for(range,
4488 [this, sorted, merged] (const LO lclRow)
4489 {
4490 const RowInfo rowInfo = this->getRowInfo(lclRow);
4491 this->sortAndMergeRowIndices(rowInfo, sorted, merged);
4492 });
4493 }
4494 this->indicesAreSorted_ = true; // we just sorted every row
4495 this->noRedundancies_ = true; // we just merged every row
4496 }
4497
4498 if (verbose_) {
4499 std::ostringstream os;
4500 os << *prefix << "Done" << endl;
4501 std::cerr << os.str();
4502 }
4503 }
4504
4505 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4506 void
4508 makeImportExport (Teuchos::Array<int>& remotePIDs,
4509 const bool useRemotePIDs)
4510 {
4511 using ::Tpetra::Details::ProfilingRegion;
4512 using Teuchos::ParameterList;
4513 using Teuchos::RCP;
4514 using Teuchos::rcp;
4515 const char tfecfFuncName[] = "makeImportExport: ";
4516 ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
4517
4518 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4519 (! this->hasColMap (), std::logic_error,
4520 "This method may not be called unless the graph has a column Map.");
4521 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
4522
4523 // Don't do any checks to see if we need to create the Import, if
4524 // it exists already.
4525 //
4526 // FIXME (mfh 25 Mar 2013) This will become incorrect if we
4527 // change CrsGraph in the future to allow changing the column
4528 // Map after fillComplete. For now, the column Map is fixed
4529 // after the first fillComplete call.
4530 if (importer_.is_null ()) {
4531 // Create the Import instance if necessary.
4532 if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
4533 if (params.is_null () || ! params->isSublist ("Import")) {
4534 if (useRemotePIDs) {
4535 importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
4536 }
4537 else {
4538 importer_ = rcp (new import_type (domainMap_, colMap_));
4539 }
4540 }
4541 else {
4542 RCP<ParameterList> importSublist = sublist (params, "Import", true);
4543 if (useRemotePIDs) {
4544 RCP<import_type> newImp =
4545 rcp (new import_type (domainMap_, colMap_, remotePIDs,
4546 importSublist));
4547 importer_ = newImp;
4548 }
4549 else {
4550 importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
4551 }
4552 }
4553 }
4554 }
4555
4556 // Don't do any checks to see if we need to create the Export, if
4557 // it exists already.
4558 if (exporter_.is_null ()) {
4559 // Create the Export instance if necessary.
4560 if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
4561 if (params.is_null () || ! params->isSublist ("Export")) {
4562 exporter_ = rcp (new export_type (rowMap_, rangeMap_));
4563 }
4564 else {
4565 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
4566 exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
4567 }
4568 }
4569 }
4570 }
4571
4572
4573 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4574 std::string
4576 description () const
4577 {
4578 std::ostringstream oss;
4579 oss << dist_object_type::description ();
4580 if (isFillComplete ()) {
4581 oss << "{status = fill complete"
4582 << ", global rows = " << getGlobalNumRows()
4583 << ", global cols = " << getGlobalNumCols()
4584 << ", global num entries = " << getGlobalNumEntries()
4585 << "}";
4586 }
4587 else {
4588 oss << "{status = fill not complete"
4589 << ", global rows = " << getGlobalNumRows()
4590 << "}";
4591 }
4592 return oss.str();
4593 }
4594
4595
4596 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4597 void
4599 describe (Teuchos::FancyOStream &out,
4600 const Teuchos::EVerbosityLevel verbLevel) const
4601 {
4602 using Teuchos::ArrayView;
4603 using Teuchos::Comm;
4604 using Teuchos::RCP;
4605 using Teuchos::VERB_DEFAULT;
4606 using Teuchos::VERB_NONE;
4607 using Teuchos::VERB_LOW;
4608 using Teuchos::VERB_MEDIUM;
4609 using Teuchos::VERB_HIGH;
4610 using Teuchos::VERB_EXTREME;
4611 using std::endl;
4612 using std::setw;
4613
4614 Teuchos::EVerbosityLevel vl = verbLevel;
4615 if (vl == VERB_DEFAULT) vl = VERB_LOW;
4616 RCP<const Comm<int> > comm = this->getComm();
4617 const int myImageID = comm->getRank(),
4618 numImages = comm->getSize();
4619 size_t width = 1;
4620 for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
4621 ++width;
4622 }
4623 width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
4624 Teuchos::OSTab tab (out);
4625 // none: print nothing
4626 // low: print O(1) info from node 0
4627 // medium: print O(P) info, num entries per node
4628 // high: print O(N) info, num entries per row
4629 // extreme: print O(NNZ) info: print graph indices
4630 //
4631 // for medium and higher, print constituent objects at specified verbLevel
4632 if (vl != VERB_NONE) {
4633 if (myImageID == 0) out << this->description() << std::endl;
4634 // O(1) globals, minus what was already printed by description()
4635 if (isFillComplete() && myImageID == 0) {
4636 out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
4637 }
4638 // constituent objects
4639 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4640 if (myImageID == 0) out << "\nRow map: " << std::endl;
4641 rowMap_->describe(out,vl);
4642 if (colMap_ != Teuchos::null) {
4643 if (myImageID == 0) out << "\nColumn map: " << std::endl;
4644 colMap_->describe(out,vl);
4645 }
4646 if (domainMap_ != Teuchos::null) {
4647 if (myImageID == 0) out << "\nDomain map: " << std::endl;
4648 domainMap_->describe(out,vl);
4649 }
4650 if (rangeMap_ != Teuchos::null) {
4651 if (myImageID == 0) out << "\nRange map: " << std::endl;
4652 rangeMap_->describe(out,vl);
4653 }
4654 }
4655 // O(P) data
4656 if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
4657 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4658 if (myImageID == imageCtr) {
4659 out << "Node ID = " << imageCtr << std::endl
4660 << "Node number of entries = " << this->getLocalNumEntries () << std::endl
4661 << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
4662 if (! indicesAreAllocated ()) {
4663 out << "Indices are not allocated." << std::endl;
4664 }
4665 }
4666 comm->barrier();
4667 comm->barrier();
4668 comm->barrier();
4669 }
4670 }
4671 // O(N) and O(NNZ) data
4672 if (vl == VERB_HIGH || vl == VERB_EXTREME) {
4673 for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
4674 if (myImageID == imageCtr) {
4675 out << std::setw(width) << "Node ID"
4676 << std::setw(width) << "Global Row"
4677 << std::setw(width) << "Num Entries";
4678 if (vl == VERB_EXTREME) {
4679 out << " Entries";
4680 }
4681 out << std::endl;
4682 const LocalOrdinal lclNumRows =
4683 static_cast<LocalOrdinal> (this->getLocalNumRows ());
4684 for (LocalOrdinal r=0; r < lclNumRows; ++r) {
4685 const RowInfo rowinfo = this->getRowInfo (r);
4686 GlobalOrdinal gid = rowMap_->getGlobalElement(r);
4687 out << std::setw(width) << myImageID
4688 << std::setw(width) << gid
4689 << std::setw(width) << rowinfo.numEntries;
4690 if (vl == VERB_EXTREME) {
4691 out << " ";
4692 if (isGloballyIndexed()) {
4693 auto rowview = gblInds_wdv.getHostView(Access::ReadOnly);
4694 for (size_t j=0; j < rowinfo.numEntries; ++j){
4695 GlobalOrdinal colgid = rowview[j + rowinfo.offset1D];
4696 out << colgid << " ";
4697 }
4698 }
4699 else if (isLocallyIndexed()) {
4700 auto rowview = lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
4701 for (size_t j=0; j < rowinfo.numEntries; ++j) {
4702 LocalOrdinal collid = rowview[j + rowinfo.offset1D];
4703 out << colMap_->getGlobalElement(collid) << " ";
4704 }
4705 }
4706 }
4707 out << std::endl;
4708 }
4709 }
4710 comm->barrier();
4711 comm->barrier();
4712 comm->barrier();
4713 }
4714 }
4715 }
4716 }
4717
4718
4719 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4720 bool
4722 checkSizes (const SrcDistObject& /* source */)
4723 {
4724 // It's not clear what kind of compatibility checks on sizes can
4725 // be performed here. Epetra_CrsGraph doesn't check any sizes for
4726 // compatibility.
4727 return true;
4728 }
4729
4730 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4731 void
4734 (const SrcDistObject& source,
4735 const size_t numSameIDs,
4736 const Kokkos::DualView<const local_ordinal_type*,
4737 buffer_device_type>& permuteToLIDs,
4738 const Kokkos::DualView<const local_ordinal_type*,
4739 buffer_device_type>& permuteFromLIDs,
4740 const CombineMode /*CM*/)
4741 {
4742 using std::endl;
4743 using LO = local_ordinal_type;
4744 using GO = global_ordinal_type;
4745 using this_CRS_type = CrsGraph<LO, GO, node_type>;
4746 const char tfecfFuncName[] = "copyAndPermute: ";
4747 const bool verbose = verbose_;
4748
4749 std::unique_ptr<std::string> prefix;
4750 if (verbose) {
4751 prefix = this->createPrefix("CrsGraph", "copyAndPermute");
4752 std::ostringstream os;
4753 os << *prefix << endl;
4754 std::cerr << os.str ();
4755 }
4756
4757 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4758 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
4759 std::runtime_error, "permuteToLIDs.extent(0) = "
4760 << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
4761 << permuteFromLIDs.extent (0) << ".");
4762
4763 // We know from checkSizes that the source object is a
4764 // row_graph_type, so we don't need to check again.
4765 const row_graph_type& srcRowGraph =
4766 dynamic_cast<const row_graph_type&> (source);
4767
4768 if (verbose) {
4769 std::ostringstream os;
4770 os << *prefix << "Compute padding" << endl;
4771 std::cerr << os.str ();
4772 }
4773 auto padding = computeCrsPadding(srcRowGraph, numSameIDs,
4774 permuteToLIDs, permuteFromLIDs, verbose);
4775 applyCrsPadding(*padding, verbose);
4776
4777 // If the source object is actually a CrsGraph, we can use view
4778 // mode instead of copy mode to access the entries in each row,
4779 // if the graph is not fill complete.
4780 const this_CRS_type* srcCrsGraph =
4781 dynamic_cast<const this_CRS_type*> (&source);
4782
4783 const map_type& srcRowMap = *(srcRowGraph.getRowMap());
4784 const map_type& tgtRowMap = *(getRowMap());
4785 const bool src_filled = srcRowGraph.isFillComplete();
4786 nonconst_global_inds_host_view_type row_copy;
4787 LO myid = 0;
4788
4789 //
4790 // "Copy" part of "copy and permute."
4791 //
4792 if (src_filled || srcCrsGraph == nullptr) {
4793 if (verbose) {
4794 std::ostringstream os;
4795 os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
4796 std::cerr << os.str ();
4797 }
4798 // If the source graph is fill complete, we can't use view mode,
4799 // because the data might be stored in a different format not
4800 // compatible with the expectations of view mode. Also, if the
4801 // source graph is not a CrsGraph, we can't use view mode,
4802 // because RowGraph only provides copy mode access to the data.
4803 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4804 const GO gid = srcRowMap.getGlobalElement (myid);
4805 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
4806 Kokkos::resize(row_copy,row_length);
4807 size_t check_row_length = 0;
4808 srcRowGraph.getGlobalRowCopy (gid, row_copy, check_row_length);
4809 this->insertGlobalIndices (gid, row_length, row_copy.data());
4810 }
4811 } else {
4812 if (verbose) {
4813 std::ostringstream os;
4814 os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
4815 std::cerr << os.str ();
4816 }
4817 for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
4818 const GO gid = srcRowMap.getGlobalElement (myid);
4819 global_inds_host_view_type row;
4820 srcCrsGraph->getGlobalRowView (gid, row);
4821 this->insertGlobalIndices (gid, row.extent(0), row.data());
4822 }
4823 }
4824
4825 //
4826 // "Permute" part of "copy and permute."
4827 //
4828 auto permuteToLIDs_h = permuteToLIDs.view_host ();
4829 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
4830
4831 if (src_filled || srcCrsGraph == nullptr) {
4832 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4833 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4834 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4835 size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
4836 Kokkos::resize(row_copy,row_length);
4837 size_t check_row_length = 0;
4838 srcRowGraph.getGlobalRowCopy (srcgid, row_copy, check_row_length);
4839 this->insertGlobalIndices (mygid, row_length, row_copy.data());
4840 }
4841 } else {
4842 for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
4843 const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
4844 const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
4845 global_inds_host_view_type row;
4846 srcCrsGraph->getGlobalRowView (srcgid, row);
4847 this->insertGlobalIndices (mygid, row.extent(0), row.data());
4848 }
4849 }
4850
4851 if (verbose) {
4852 std::ostringstream os;
4853 os << *prefix << "Done" << endl;
4854 std::cerr << os.str ();
4855 }
4856 }
4857
4858 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4859 void
4860 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
4861 applyCrsPadding(const padding_type& padding,
4862 const bool verbose)
4863 {
4864 using Details::ProfilingRegion;
4866 using std::endl;
4867 using LO = local_ordinal_type;
4868 using row_ptrs_type =
4869 typename local_graph_device_type::row_map_type::non_const_type;
4870 using range_policy =
4871 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
4872 const char tfecfFuncName[] = "applyCrsPadding";
4873 ProfilingRegion regionCAP("Tpetra::CrsGraph::applyCrsPadding");
4874
4875 std::unique_ptr<std::string> prefix;
4876 if (verbose) {
4877 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
4878 std::ostringstream os;
4879 os << *prefix << "padding: ";
4880 padding.print(os);
4881 os << endl;
4882 std::cerr << os.str();
4883 }
4884 const int myRank = ! verbose ? -1 : [&] () {
4885 auto map = this->getMap();
4886 if (map.is_null()) {
4887 return -1;
4888 }
4889 auto comm = map->getComm();
4890 if (comm.is_null()) {
4891 return -1;
4892 }
4893 return comm->getRank();
4894 } ();
4895
4896 // FIXME (mfh 10 Feb 2020) We shouldn't actually reallocate
4897 // row_ptrs_beg or allocate row_ptrs_end unless the allocation
4898 // size needs to increase. That should be the job of
4899 // padCrsArrays.
4900
4901 // Assume global indexing we don't have any indices yet
4902 if (! indicesAreAllocated()) {
4903 if (verbose) {
4904 std::ostringstream os;
4905 os << *prefix << "Call allocateIndices" << endl;
4906 std::cerr << os.str();
4907 }
4908 allocateIndices(GlobalIndices, verbose);
4909 }
4910 TEUCHOS_ASSERT( indicesAreAllocated() );
4911
4912 // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
4913 // would use it directly.
4914
4915 if (verbose) {
4916 std::ostringstream os;
4917 os << *prefix << "Allocate row_ptrs_beg: "
4918 << rowPtrsUnpacked_dev_.extent(0) << endl;
4919 std::cerr << os.str();
4920 }
4921 using Kokkos::view_alloc;
4922 using Kokkos::WithoutInitializing;
4923 row_ptrs_type row_ptrs_beg(
4924 view_alloc("row_ptrs_beg", WithoutInitializing),
4925 rowPtrsUnpacked_dev_.extent(0));
4926 // DEEP_COPY REVIEW - DEVICE-TO-DEVICE
4927 Kokkos::deep_copy(execution_space(),row_ptrs_beg, rowPtrsUnpacked_dev_);
4928
4929 const size_t N = row_ptrs_beg.extent(0) == 0 ? size_t(0) :
4930 size_t(row_ptrs_beg.extent(0) - 1);
4931 if (verbose) {
4932 std::ostringstream os;
4933 os << *prefix << "Allocate row_ptrs_end: " << N << endl;
4934 std::cerr << os.str();
4935 }
4936 row_ptrs_type row_ptrs_end(
4937 view_alloc("row_ptrs_end", WithoutInitializing), N);
4938 row_ptrs_type num_row_entries;
4939
4940 const bool refill_num_row_entries = k_numRowEntries_.extent(0) != 0;
4941
4942 execution_space().fence(); // we need above deep_copy to be done
4943
4944 if (refill_num_row_entries) { // Case 1: Unpacked storage
4945 // We can't assume correct *this capture until C++17, and it's
4946 // likely more efficient just to capture what we need anyway.
4947 num_row_entries =
4948 row_ptrs_type(view_alloc("num_row_entries", WithoutInitializing), N);
4949 Kokkos::deep_copy(num_row_entries, this->k_numRowEntries_);
4950 Kokkos::parallel_for
4951 ("Fill end row pointers", range_policy(0, N),
4952 KOKKOS_LAMBDA (const size_t i) {
4953 row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
4954 });
4955 }
4956 else {
4957 // FIXME (mfh 10 Feb 2020) Fix padCrsArrays so that if packed
4958 // storage, we don't need row_ptr_end to be separate allocation;
4959 // could just have it alias row_ptr_beg+1.
4960 Kokkos::parallel_for
4961 ("Fill end row pointers", range_policy(0, N),
4962 KOKKOS_LAMBDA (const size_t i) {
4963 row_ptrs_end(i) = row_ptrs_beg(i+1);
4964 });
4965 }
4966
4967 if (isGloballyIndexed()) {
4968 padCrsArrays(row_ptrs_beg, row_ptrs_end, gblInds_wdv,
4969 padding, myRank, verbose);
4970 }
4971 else {
4972 padCrsArrays(row_ptrs_beg, row_ptrs_end, lclIndsUnpacked_wdv,
4973 padding, myRank, verbose);
4974 }
4975
4976 if (refill_num_row_entries) {
4977 Kokkos::parallel_for
4978 ("Fill num entries", range_policy(0, N),
4979 KOKKOS_LAMBDA (const size_t i) {
4980 num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
4981 });
4982 Kokkos::deep_copy(this->k_numRowEntries_, num_row_entries);
4983 }
4984 if (verbose) {
4985 std::ostringstream os;
4986 os << *prefix << "Reassign k_rowPtrs_; old size: "
4987 << rowPtrsUnpacked_dev_.extent(0) << ", new size: "
4988 << row_ptrs_beg.extent(0) << endl;
4989 std::cerr << os.str();
4990 TEUCHOS_ASSERT( rowPtrsUnpacked_dev_.extent(0) == row_ptrs_beg.extent(0) );
4991 }
4992
4993 setRowPtrsUnpacked(row_ptrs_beg);
4994
4995 set_need_sync_host_uvm_access(); // need fence before host UVM access of k_rowPtrs_
4996 }
4997
4998 template <class LocalOrdinal, class GlobalOrdinal, class Node>
4999 std::unique_ptr<
5000 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5001 >
5002 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5003 computeCrsPadding(
5004 const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5005 const size_t numSameIDs,
5006 const Kokkos::DualView<const local_ordinal_type*,
5007 buffer_device_type>& permuteToLIDs,
5008 const Kokkos::DualView<const local_ordinal_type*,
5009 buffer_device_type>& permuteFromLIDs,
5010 const bool verbose) const
5011 {
5012 using LO = local_ordinal_type;
5013 using std::endl;
5014
5015 std::unique_ptr<std::string> prefix;
5016 if (verbose) {
5017 prefix = this->createPrefix("CrsGraph",
5018 "computeCrsPadding(same & permute)");
5019 std::ostringstream os;
5020 os << *prefix << "{numSameIDs: " << numSameIDs
5021 << ", numPermutes: " << permuteFromLIDs.extent(0) << "}"
5022 << endl;
5023 std::cerr << os.str();
5024 }
5025
5026 const int myRank = [&] () {
5027 auto comm = rowMap_.is_null() ? Teuchos::null :
5028 rowMap_->getComm();
5029 return comm.is_null() ? -1 : comm->getRank();
5030 } ();
5031 std::unique_ptr<padding_type> padding(
5032 new padding_type(myRank, numSameIDs,
5033 permuteFromLIDs.extent(0)));
5034
5035 computeCrsPaddingForSameIDs(*padding, source,
5036 static_cast<LO>(numSameIDs));
5037 computeCrsPaddingForPermutedIDs(*padding, source, permuteToLIDs,
5038 permuteFromLIDs);
5039 return padding;
5040 }
5041
5042 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5043 void
5044 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5045 computeCrsPaddingForSameIDs(
5046 padding_type& padding,
5047 const RowGraph<local_ordinal_type, global_ordinal_type,
5048 node_type>& source,
5049 const local_ordinal_type numSameIDs) const
5050 {
5051 using LO = local_ordinal_type;
5052 using GO = global_ordinal_type;
5053 using Details::Impl::getRowGraphGlobalRow;
5054 using std::endl;
5055 const char tfecfFuncName[] = "computeCrsPaddingForSameIds";
5056
5057 std::unique_ptr<std::string> prefix;
5058 const bool verbose = verbose_;
5059 if (verbose) {
5060 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5061 std::ostringstream os;
5062 os << *prefix << "numSameIDs: " << numSameIDs << endl;
5063 std::cerr << os.str();
5064 }
5065
5066 if (numSameIDs == 0) {
5067 return;
5068 }
5069
5070 const map_type& srcRowMap = *(source.getRowMap());
5071 const map_type& tgtRowMap = *rowMap_;
5072 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5073 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5074 const bool src_is_unique =
5075 srcCrs == nullptr ? false : srcCrs->isMerged();
5076 const bool tgt_is_unique = this->isMerged();
5077
5078 std::vector<GO> srcGblColIndsScratch;
5079 std::vector<GO> tgtGblColIndsScratch;
5080
5081 execute_sync_host_uvm_access(); // protect host UVM access
5082 for (LO lclRowInd = 0; lclRowInd < numSameIDs; ++lclRowInd) {
5083 const GO srcGblRowInd = srcRowMap.getGlobalElement(lclRowInd);
5084 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(lclRowInd);
5085 auto srcGblColInds = getRowGraphGlobalRow(
5086 srcGblColIndsScratch, source, srcGblRowInd);
5087 auto tgtGblColInds = getRowGraphGlobalRow(
5088 tgtGblColIndsScratch, *this, tgtGblRowInd);
5089 padding.update_same(lclRowInd, tgtGblColInds.getRawPtr(),
5090 tgtGblColInds.size(), tgt_is_unique,
5091 srcGblColInds.getRawPtr(),
5092 srcGblColInds.size(), src_is_unique);
5093 }
5094 if (verbose) {
5095 std::ostringstream os;
5096 os << *prefix << "Done" << endl;
5097 std::cerr << os.str();
5098 }
5099 }
5100
5101 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5102 void
5103 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5104 computeCrsPaddingForPermutedIDs(
5105 padding_type& padding,
5106 const RowGraph<local_ordinal_type, global_ordinal_type,
5107 node_type>& source,
5108 const Kokkos::DualView<const local_ordinal_type*,
5109 buffer_device_type>& permuteToLIDs,
5110 const Kokkos::DualView<const local_ordinal_type*,
5111 buffer_device_type>& permuteFromLIDs) const
5112 {
5113 using LO = local_ordinal_type;
5114 using GO = global_ordinal_type;
5115 using Details::Impl::getRowGraphGlobalRow;
5116 using std::endl;
5117 const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds";
5118
5119 std::unique_ptr<std::string> prefix;
5120 const bool verbose = verbose_;
5121 if (verbose) {
5122 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5123 std::ostringstream os;
5124 os << *prefix << "permuteToLIDs.extent(0): "
5125 << permuteToLIDs.extent(0)
5126 << ", permuteFromLIDs.extent(0): "
5127 << permuteFromLIDs.extent(0) << endl;
5128 std::cerr << os.str();
5129 }
5130
5131 if (permuteToLIDs.extent(0) == 0) {
5132 return;
5133 }
5134
5135 const map_type& srcRowMap = *(source.getRowMap());
5136 const map_type& tgtRowMap = *rowMap_;
5137 using this_CRS_type = CrsGraph<LocalOrdinal, GlobalOrdinal, Node>;
5138 const this_CRS_type* srcCrs = dynamic_cast<const this_CRS_type*>(&source);
5139 const bool src_is_unique =
5140 srcCrs == nullptr ? false : srcCrs->isMerged();
5141 const bool tgt_is_unique = this->isMerged();
5142
5143 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host() );
5144 auto permuteToLIDs_h = permuteToLIDs.view_host();
5145 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host() );
5146 auto permuteFromLIDs_h = permuteFromLIDs.view_host();
5147
5148 std::vector<GO> srcGblColIndsScratch;
5149 std::vector<GO> tgtGblColIndsScratch;
5150 const LO numPermutes = static_cast<LO>(permuteToLIDs_h.extent(0));
5151
5152 execute_sync_host_uvm_access(); // protect host UVM access
5153 for (LO whichPermute = 0; whichPermute < numPermutes; ++whichPermute) {
5154 const LO srcLclRowInd = permuteFromLIDs_h[whichPermute];
5155 const GO srcGblRowInd = srcRowMap.getGlobalElement(srcLclRowInd);
5156 auto srcGblColInds = getRowGraphGlobalRow(
5157 srcGblColIndsScratch, source, srcGblRowInd);
5158 const LO tgtLclRowInd = permuteToLIDs_h[whichPermute];
5159 const GO tgtGblRowInd = tgtRowMap.getGlobalElement(tgtLclRowInd);
5160 auto tgtGblColInds = getRowGraphGlobalRow(
5161 tgtGblColIndsScratch, *this, tgtGblRowInd);
5162 padding.update_permute(whichPermute, tgtLclRowInd,
5163 tgtGblColInds.getRawPtr(),
5164 tgtGblColInds.size(), tgt_is_unique,
5165 srcGblColInds.getRawPtr(),
5166 srcGblColInds.size(), src_is_unique);
5167 }
5168
5169 if (verbose) {
5170 std::ostringstream os;
5171 os << *prefix << "Done" << endl;
5172 std::cerr << os.str();
5173 }
5174 }
5175
5176 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5177 std::unique_ptr<
5178 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5179 >
5180 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5181 computeCrsPaddingForImports(
5182 const Kokkos::DualView<const local_ordinal_type*,
5183 buffer_device_type>& importLIDs,
5184 Kokkos::DualView<packet_type*, buffer_device_type> imports,
5185 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5186 const bool verbose) const
5187 {
5188 using Details::Impl::getRowGraphGlobalRow;
5189 using std::endl;
5190 using LO = local_ordinal_type;
5191 using GO = global_ordinal_type;
5192 const char tfecfFuncName[] = "computeCrsPaddingForImports";
5193
5194 std::unique_ptr<std::string> prefix;
5195 if (verbose) {
5196 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5197 std::ostringstream os;
5198 os << *prefix << "importLIDs.extent(0): "
5199 << importLIDs.extent(0)
5200 << ", imports.extent(0): "
5201 << imports.extent(0)
5202 << ", numPacketsPerLID.extent(0): "
5203 << numPacketsPerLID.extent(0) << endl;
5204 std::cerr << os.str();
5205 }
5206
5207 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5208 const int myRank = [&] () {
5209 auto comm = rowMap_.is_null() ? Teuchos::null :
5210 rowMap_->getComm();
5211 return comm.is_null() ? -1 : comm->getRank();
5212 } ();
5213 std::unique_ptr<padding_type> padding(
5214 new padding_type(myRank, numImports));
5215
5216 if (imports.need_sync_host()) {
5217 imports.sync_host();
5218 }
5219 auto imports_h = imports.view_host();
5220 if (numPacketsPerLID.need_sync_host ()) {
5221 numPacketsPerLID.sync_host();
5222 }
5223 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5224
5225 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5226 auto importLIDs_h = importLIDs.view_host();
5227
5228 const map_type& tgtRowMap = *rowMap_;
5229 // Always merge source column indices, since isMerged() is
5230 // per-process state, and we don't know its value on other
5231 // processes that sent us data.
5232 constexpr bool src_is_unique = false;
5233 const bool tgt_is_unique = isMerged();
5234
5235 std::vector<GO> tgtGblColIndsScratch;
5236 size_t offset = 0;
5237 execute_sync_host_uvm_access(); // protect host UVM access
5238 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5239 // CrsGraph packs just global column indices, while CrsMatrix
5240 // packs bytes (first the number of entries in the row, then the
5241 // global column indices, then other stuff like the matrix
5242 // values in that row).
5243 const LO origSrcNumEnt =
5244 static_cast<LO>(numPacketsPerLID_h[whichImport]);
5245 GO* const srcGblColInds = imports_h.data() + offset;
5246
5247 const LO tgtLclRowInd = importLIDs_h[whichImport];
5248 const GO tgtGblRowInd =
5249 tgtRowMap.getGlobalElement(tgtLclRowInd);
5250 auto tgtGblColInds = getRowGraphGlobalRow(
5251 tgtGblColIndsScratch, *this, tgtGblRowInd);
5252 const size_t origTgtNumEnt(tgtGblColInds.size());
5253
5254 padding->update_import(whichImport, tgtLclRowInd,
5255 tgtGblColInds.getRawPtr(),
5256 origTgtNumEnt, tgt_is_unique,
5257 srcGblColInds,
5258 origSrcNumEnt, src_is_unique);
5259 offset += origSrcNumEnt;
5260 }
5261
5262 if (verbose) {
5263 std::ostringstream os;
5264 os << *prefix << "Done" << endl;
5265 std::cerr << os.str();
5266 }
5267 return padding;
5268 }
5269
5270 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5271 std::unique_ptr<
5272 typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::padding_type
5273 >
5274 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5275 computePaddingForCrsMatrixUnpack(
5276 const Kokkos::DualView<const local_ordinal_type*,
5277 buffer_device_type>& importLIDs,
5278 Kokkos::DualView<char*, buffer_device_type> imports,
5279 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
5280 const bool verbose) const
5281 {
5282 using Details::Impl::getRowGraphGlobalRow;
5283 using Details::PackTraits;
5284 using std::endl;
5285 using LO = local_ordinal_type;
5286 using GO = global_ordinal_type;
5287 const char tfecfFuncName[] = "computePaddingForCrsMatrixUnpack";
5288
5289 std::unique_ptr<std::string> prefix;
5290 if (verbose) {
5291 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5292 std::ostringstream os;
5293 os << *prefix << "importLIDs.extent(0): "
5294 << importLIDs.extent(0)
5295 << ", imports.extent(0): "
5296 << imports.extent(0)
5297 << ", numPacketsPerLID.extent(0): "
5298 << numPacketsPerLID.extent(0) << endl;
5299 std::cerr << os.str();
5300 }
5301 const bool extraVerbose =
5302 verbose && Details::Behavior::verbose("CrsPadding");
5303
5304 const LO numImports = static_cast<LO>(importLIDs.extent(0));
5305 TEUCHOS_ASSERT( LO(numPacketsPerLID.extent(0)) >= numImports );
5306 const int myRank = [&] () {
5307 auto comm = rowMap_.is_null() ? Teuchos::null :
5308 rowMap_->getComm();
5309 return comm.is_null() ? -1 : comm->getRank();
5310 } ();
5311 std::unique_ptr<padding_type> padding(
5312 new padding_type(myRank, numImports));
5313
5314 if (imports.need_sync_host()) {
5315 imports.sync_host();
5316 }
5317 auto imports_h = imports.view_host();
5318 if (numPacketsPerLID.need_sync_host ()) {
5319 numPacketsPerLID.sync_host();
5320 }
5321 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
5322
5323 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
5324 auto importLIDs_h = importLIDs.view_host();
5325
5326 const map_type& tgtRowMap = *rowMap_;
5327 // Always merge source column indices, since isMerged() is
5328 // per-process state, and we don't know its value on other
5329 // processes that sent us data.
5330 constexpr bool src_is_unique = false;
5331 const bool tgt_is_unique = isMerged();
5332
5333 std::vector<GO> srcGblColIndsScratch;
5334 std::vector<GO> tgtGblColIndsScratch;
5335 size_t offset = 0;
5336 execute_sync_host_uvm_access(); // protect host UVM access
5337 for (LO whichImport = 0; whichImport < numImports; ++whichImport) {
5338 // CrsGraph packs just global column indices, while CrsMatrix
5339 // packs bytes (first the number of entries in the row, then the
5340 // global column indices, then other stuff like the matrix
5341 // values in that row).
5342 const size_t numBytes = numPacketsPerLID_h[whichImport];
5343 if (extraVerbose) {
5344 std::ostringstream os;
5345 os << *prefix << "whichImport=" << whichImport
5346 << ", numImports=" << numImports
5347 << ", numBytes=" << numBytes << endl;
5348 std::cerr << os.str();
5349 }
5350 if (numBytes == 0) {
5351 continue; // special case: no entries to unpack for this row
5352 }
5353 LO origSrcNumEnt = 0;
5354 const size_t numEntBeg = offset;
5355 const size_t numEntLen =
5356 PackTraits<LO>::packValueCount(origSrcNumEnt);
5357 TEUCHOS_ASSERT( numBytes >= numEntLen );
5358 TEUCHOS_ASSERT( imports_h.extent(0) >= numEntBeg + numEntLen );
5359 PackTraits<LO>::unpackValue(origSrcNumEnt,
5360 imports_h.data() + numEntBeg);
5361 if (extraVerbose) {
5362 std::ostringstream os;
5363 os << *prefix << "whichImport=" << whichImport
5364 << ", numImports=" << numImports
5365 << ", origSrcNumEnt=" << origSrcNumEnt << endl;
5366 std::cerr << os.str();
5367 }
5368 TEUCHOS_ASSERT( origSrcNumEnt >= LO(0) );
5369 TEUCHOS_ASSERT( numBytes >= size_t(numEntLen + origSrcNumEnt * sizeof(GO)) );
5370 const size_t gidsBeg = numEntBeg + numEntLen;
5371 if (srcGblColIndsScratch.size() < size_t(origSrcNumEnt)) {
5372 srcGblColIndsScratch.resize(origSrcNumEnt);
5373 }
5374 GO* const srcGblColInds = srcGblColIndsScratch.data();
5375 PackTraits<GO>::unpackArray(srcGblColInds,
5376 imports_h.data() + gidsBeg,
5377 origSrcNumEnt);
5378 const LO tgtLclRowInd = importLIDs_h[whichImport];
5379 const GO tgtGblRowInd =
5380 tgtRowMap.getGlobalElement(tgtLclRowInd);
5381 auto tgtGblColInds = getRowGraphGlobalRow(
5382 tgtGblColIndsScratch, *this, tgtGblRowInd);
5383 const size_t origNumTgtEnt(tgtGblColInds.size());
5384
5385 if (extraVerbose) {
5386 std::ostringstream os;
5387 os << *prefix << "whichImport=" << whichImport
5388 << ", numImports=" << numImports
5389 << ": Call padding->update_import" << endl;
5390 std::cerr << os.str();
5391 }
5392 padding->update_import(whichImport, tgtLclRowInd,
5393 tgtGblColInds.getRawPtr(),
5394 origNumTgtEnt, tgt_is_unique,
5395 srcGblColInds,
5396 origSrcNumEnt, src_is_unique);
5397 offset += numBytes;
5398 }
5399
5400 if (verbose) {
5401 std::ostringstream os;
5402 os << *prefix << "Done" << endl;
5403 std::cerr << os.str();
5404 }
5405 return padding;
5406 }
5407
5408 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5409 void
5410 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5411 packAndPrepare
5412 (const SrcDistObject& source,
5413 const Kokkos::DualView<const local_ordinal_type*,
5414 buffer_device_type>& exportLIDs,
5415 Kokkos::DualView<packet_type*,
5416 buffer_device_type>& exports,
5417 Kokkos::DualView<size_t*,
5418 buffer_device_type> numPacketsPerLID,
5419 size_t& constantNumPackets)
5420 {
5422 using GO = global_ordinal_type;
5423 using std::endl;
5424 using crs_graph_type =
5425 CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
5426 const char tfecfFuncName[] = "packAndPrepare: ";
5427 ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
5428
5429 const bool verbose = verbose_;
5430 std::unique_ptr<std::string> prefix;
5431 if (verbose) {
5432 prefix = this->createPrefix("CrsGraph", "packAndPrepare");
5433 std::ostringstream os;
5434 os << *prefix << "Start" << endl;
5435 std::cerr << os.str();
5436 }
5437
5438 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5439 (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
5440 std::runtime_error,
5441 "exportLIDs.extent(0) = " << exportLIDs.extent (0)
5442 << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
5443 << ".");
5444 const row_graph_type* srcRowGraphPtr =
5445 dynamic_cast<const row_graph_type*> (&source);
5446 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5447 (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
5448 "or Import operation to a CrsGraph must be a RowGraph with the same "
5449 "template parameters.");
5450 // We don't check whether src_graph has had fillComplete called,
5451 // because it doesn't matter whether the *source* graph has been
5452 // fillComplete'd. The target graph can not be fillComplete'd yet.
5453 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5454 (this->isFillComplete (), std::runtime_error,
5455 "The target graph of an Import or Export must not be fill complete.");
5456
5457 const crs_graph_type* srcCrsGraphPtr =
5458 dynamic_cast<const crs_graph_type*> (&source);
5459
5460 if (srcCrsGraphPtr == nullptr) {
5461 using Teuchos::ArrayView;
5462 using LO = local_ordinal_type;
5463
5464 if (verbose) {
5465 std::ostringstream os;
5466 os << *prefix << "Source is a RowGraph but not a CrsGraph"
5467 << endl;
5468 std::cerr << os.str();
5469 }
5470 // RowGraph::pack serves the "old" DistObject interface. It
5471 // takes Teuchos::ArrayView and Teuchos::Array&. The latter
5472 // entails deep-copying the exports buffer on output. RowGraph
5473 // is a convenience interface when not a CrsGraph, so we accept
5474 // the performance hit.
5475 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5476 auto exportLIDs_h = exportLIDs.view_host ();
5477 ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
5478 exportLIDs_h.extent (0));
5479 Teuchos::Array<GO> exports_a;
5480
5481 numPacketsPerLID.clear_sync_state ();
5482 numPacketsPerLID.modify_host ();
5483 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5484 ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
5485 numPacketsPerLID_h.extent (0));
5486 srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
5487 constantNumPackets);
5488 const size_t newSize = static_cast<size_t> (exports_a.size ());
5489 if (static_cast<size_t> (exports.extent (0)) != newSize) {
5490 using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
5491 exports = exports_dv_type ("exports", newSize);
5492 }
5493 Kokkos::View<const packet_type*, Kokkos::HostSpace,
5494 Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
5495 exports.clear_sync_state ();
5496 exports.modify_host ();
5497 // DEEP_COPY REVIEW - NOT TESTED
5498 Kokkos::deep_copy (exports.view_host (), exports_a_h);
5499 }
5500 // packCrsGraphNew requires k_rowPtrsPacked_ to be set
5501 else if (! getColMap ().is_null () &&
5502 (rowPtrsPacked_dev_.extent (0) != 0 ||
5503 getRowMap ()->getLocalNumElements () == 0)) {
5504 if (verbose) {
5505 std::ostringstream os;
5506 os << *prefix << "packCrsGraphNew path" << endl;
5507 std::cerr << os.str();
5508 }
5509 using export_pids_type =
5510 Kokkos::DualView<const int*, buffer_device_type>;
5511 export_pids_type exportPIDs; // not filling it; needed for syntax
5512 using LO = local_ordinal_type;
5513 using NT = node_type;
5515 packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
5516 exports, numPacketsPerLID,
5517 constantNumPackets, false);
5518 }
5519 else {
5520 srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
5521 constantNumPackets);
5522 }
5523
5524 if (verbose) {
5525 std::ostringstream os;
5526 os << *prefix << "Done" << endl;
5527 std::cerr << os.str();
5528 }
5529 }
5530
5531 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5532 void
5534 pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5535 Teuchos::Array<GlobalOrdinal>& exports,
5536 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5537 size_t& constantNumPackets) const
5538 {
5539 auto col_map = this->getColMap();
5540 // packCrsGraph requires k_rowPtrsPacked to be set
5541 if( !col_map.is_null() && (rowPtrsPacked_dev_.extent(0) != 0 || getRowMap()->getLocalNumElements() ==0)) {
5543 packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
5544 exportLIDs, constantNumPackets);
5545 }
5546 else {
5547 this->packFillActive(exportLIDs, exports, numPacketsPerLID,
5548 constantNumPackets);
5549 }
5550 }
5551
5552 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5553 void
5555 packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
5556 Teuchos::Array<GlobalOrdinal>& exports,
5557 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
5558 size_t& constantNumPackets) const
5559 {
5560 using std::endl;
5561 using LO = LocalOrdinal;
5562 using GO = GlobalOrdinal;
5563 using host_execution_space =
5564 typename Kokkos::View<size_t*, device_type>::
5565 HostMirror::execution_space;
5566 const char tfecfFuncName[] = "packFillActive: ";
5567 const bool verbose = verbose_;
5568
5569 const auto numExportLIDs = exportLIDs.size ();
5570 std::unique_ptr<std::string> prefix;
5571 if (verbose) {
5572 prefix = this->createPrefix("CrsGraph", "allocateIndices");
5573 std::ostringstream os;
5574 os << *prefix << "numExportLIDs=" << numExportLIDs << endl;
5575 std::cerr << os.str();
5576 }
5577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5578 (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
5579 "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
5580 " = " << numPacketsPerLID.size () << ".");
5581
5582 const map_type& rowMap = * (this->getRowMap ());
5583 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5584 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5585 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5586 "This graph claims to be locally indexed, but its column Map is nullptr. "
5587 "This should never happen. Please report this bug to the Tpetra "
5588 "developers.");
5589
5590 // We may pack different amounts of data for different rows.
5591 constantNumPackets = 0;
5592
5593 // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
5594 // it might be now, but we might as well be safe).
5595 size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
5596 const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
5597
5598 // Count the total number of packets (column indices, in the case
5599 // of a CrsGraph) to pack. While doing so, set
5600 // numPacketsPerLID[i] to the number of entries owned by the
5601 // calling process in (local) row exportLIDs[i] of the graph, that
5602 // the caller wants us to send out.
5603 Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
5604 size_t totalNumPackets = 0;
5605 size_t errCount = 0;
5606 // lambdas turn what they capture const, so we can't
5607 // atomic_add(&errCount,1). Instead, we need a View to modify.
5608 typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
5609 host_device_type;
5610 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5611 constexpr size_t ONE = 1;
5612
5613 execute_sync_host_uvm_access(); // protect host UVM access
5614 Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
5615 inputRange,
5616 [=] (const LO& i, size_t& curTotalNumPackets) {
5617 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5618 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5619 Kokkos::atomic_add (&errCountView(), ONE);
5620 numPacketsPerLID_raw[i] = 0;
5621 }
5622 else {
5623 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5624 numPacketsPerLID_raw[i] = numEnt;
5625 curTotalNumPackets += numEnt;
5626 }
5627 },
5628 totalNumPackets);
5629
5630 if (verbose) {
5631 std::ostringstream os;
5632 os << *prefix << "totalNumPackets=" << totalNumPackets << endl;
5633 std::cerr << os.str();
5634 }
5635 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5636 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5637 "one or more errors! errCount = " << errCount
5638 << ", totalNumPackets = " << totalNumPackets << ".");
5639 errCount = 0;
5640
5641 // Allocate space for all the column indices to pack.
5642 exports.resize (totalNumPackets);
5643
5644 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5645 (! this->supportsRowViews (), std::logic_error,
5646 "this->supportsRowViews() returns false; this should never happen. "
5647 "Please report this bug to the Tpetra developers.");
5648
5649 // Loop again over the rows to export, and pack rows of indices
5650 // into the output buffer.
5651
5652 if (verbose) {
5653 std::ostringstream os;
5654 os << *prefix << "Pack into exports" << endl;
5655 std::cerr << os.str();
5656 }
5657
5658 // Teuchos::ArrayView may not be thread safe, or may not be
5659 // efficiently thread safe. Better to use the raw pointer.
5660 GO* const exports_raw = exports.getRawPtr ();
5661 errCount = 0;
5662 Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
5663 inputRange, [=, &prefix]
5664 (const LO i, size_t& exportsOffset, const bool final) {
5665 const size_t curOffset = exportsOffset;
5666 const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
5667 const RowInfo rowInfo =
5668 this->getRowInfoFromGlobalRowIndex (gblRow);
5669
5670 using TDO = Tpetra::Details::OrdinalTraits<size_t>;
5671 if (rowInfo.localRow == TDO::invalid ()) {
5672 if (verbose) {
5673 std::ostringstream os;
5674 os << *prefix << ": INVALID rowInfo: i=" << i
5675 << ", lclRow=" << exportLIDs_raw[i] << endl;
5676 std::cerr << os.str();
5677 }
5678 Kokkos::atomic_add (&errCountView(), ONE);
5679 }
5680 else if (curOffset + rowInfo.numEntries > totalNumPackets) {
5681 if (verbose) {
5682 std::ostringstream os;
5683 os << *prefix << ": UH OH! For i=" << i << ", lclRow="
5684 << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
5685 "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
5686 << ") > totalNumPackets (= " << totalNumPackets << ")."
5687 << endl;
5688 std::cerr << os.str();
5689 }
5690 Kokkos::atomic_add (&errCountView(), ONE);
5691 }
5692 else {
5693 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5694 if (this->isLocallyIndexed ()) {
5695 auto lclColInds = getLocalIndsViewHost (rowInfo);
5696 if (final) {
5697 for (LO k = 0; k < numEnt; ++k) {
5698 const LO lclColInd = lclColInds(k);
5699 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5700 // Pack it, even if it's wrong. Let the receiving
5701 // process deal with it. Otherwise, we'll miss out
5702 // on any correct data.
5703 exports_raw[curOffset + k] = gblColInd;
5704 } // for each entry in the row
5705 } // final pass?
5706 exportsOffset = curOffset + numEnt;
5707 }
5708 else if (this->isGloballyIndexed ()) {
5709 auto gblColInds = getGlobalIndsViewHost (rowInfo);
5710 if (final) {
5711 for (LO k = 0; k < numEnt; ++k) {
5712 const GO gblColInd = gblColInds(k);
5713 // Pack it, even if it's wrong. Let the receiving
5714 // process deal with it. Otherwise, we'll miss out
5715 // on any correct data.
5716 exports_raw[curOffset + k] = gblColInd;
5717 } // for each entry in the row
5718 } // final pass?
5719 exportsOffset = curOffset + numEnt;
5720 }
5721 // If neither globally nor locally indexed, then the graph
5722 // has no entries in this row (or indeed, in any row on this
5723 // process) to pack.
5724 }
5725 });
5726
5727 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5728 (errCount != 0, std::logic_error, "Packing encountered "
5729 "one or more errors! errCount = " << errCount
5730 << ", totalNumPackets = " << totalNumPackets << ".");
5731
5732 if (verbose) {
5733 std::ostringstream os;
5734 os << *prefix << "Done" << endl;
5735 std::cerr << os.str();
5736 }
5737 }
5738
5739 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5740 void
5741 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5742 packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
5743 buffer_device_type>& exportLIDs,
5744 Kokkos::DualView<packet_type*,
5745 buffer_device_type>& exports,
5746 Kokkos::DualView<size_t*,
5747 buffer_device_type> numPacketsPerLID,
5748 size_t& constantNumPackets) const
5749 {
5750 using std::endl;
5751 using LO = local_ordinal_type;
5752 using GO = global_ordinal_type;
5753 using host_execution_space = typename Kokkos::View<size_t*,
5754 device_type>::HostMirror::execution_space;
5755 using host_device_type =
5756 Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
5757 using exports_dv_type =
5758 Kokkos::DualView<packet_type*, buffer_device_type>;
5759 const char tfecfFuncName[] = "packFillActiveNew: ";
5760 const bool verbose = verbose_;
5761
5762 const auto numExportLIDs = exportLIDs.extent (0);
5763 std::unique_ptr<std::string> prefix;
5764 if (verbose) {
5765 prefix = this->createPrefix("CrsGraph", "packFillActiveNew");
5766 std::ostringstream os;
5767 os << *prefix << "numExportLIDs: " << numExportLIDs
5768 << ", numPacketsPerLID.extent(0): "
5769 << numPacketsPerLID.extent(0) << endl;
5770 std::cerr << os.str();
5771 }
5772 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5773 (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
5774 "exportLIDs.extent(0) = " << numExportLIDs
5775 << " != numPacketsPerLID.extent(0) = "
5776 << numPacketsPerLID.extent (0) << ".");
5777 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
5778 auto exportLIDs_h = exportLIDs.view_host ();
5779
5780 const map_type& rowMap = * (this->getRowMap ());
5781 const map_type* const colMapPtr = this->colMap_.getRawPtr ();
5782 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5783 (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
5784 "This graph claims to be locally indexed, but its column Map is nullptr. "
5785 "This should never happen. Please report this bug to the Tpetra "
5786 "developers.");
5787
5788 // We may pack different amounts of data for different rows.
5789 constantNumPackets = 0;
5790
5791 numPacketsPerLID.clear_sync_state ();
5792 numPacketsPerLID.modify_host ();
5793 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
5794
5795 // Count the total number of packets (column indices, in the case
5796 // of a CrsGraph) to pack. While doing so, set
5797 // numPacketsPerLID[i] to the number of entries owned by the
5798 // calling process in (local) row exportLIDs[i] of the graph, that
5799 // the caller wants us to send out.
5800 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
5801 range_type inputRange (0, numExportLIDs);
5802 size_t totalNumPackets = 0;
5803 size_t errCount = 0;
5804 // lambdas turn what they capture const, so we can't
5805 // atomic_add(&errCount,1). Instead, we need a View to modify.
5806 Kokkos::View<size_t, host_device_type> errCountView (&errCount);
5807 constexpr size_t ONE = 1;
5808
5809 if (verbose) {
5810 std::ostringstream os;
5811 os << *prefix << "Compute totalNumPackets" << endl;
5812 std::cerr << os.str ();
5813 }
5814
5815 execute_sync_host_uvm_access(); // protect host UVM access
5816 Kokkos::parallel_reduce
5817 ("Tpetra::CrsGraph::pack: totalNumPackets",
5818 inputRange,
5819 [=, &prefix] (const LO i, size_t& curTotalNumPackets) {
5820 const LO lclRow = exportLIDs_h[i];
5821 const GO gblRow = rowMap.getGlobalElement (lclRow);
5822 if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
5823 if (verbose) {
5824 std::ostringstream os;
5825 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5826 << " not in row Map on this process" << endl;
5827 std::cerr << os.str();
5828 }
5829 Kokkos::atomic_add (&errCountView(), ONE);
5830 numPacketsPerLID_h(i) = 0;
5831 }
5832 else {
5833 const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
5834 numPacketsPerLID_h(i) = numEnt;
5835 curTotalNumPackets += numEnt;
5836 }
5837 },
5838 totalNumPackets);
5839
5840 if (verbose) {
5841 std::ostringstream os;
5842 os << *prefix << "totalNumPackets: " << totalNumPackets
5843 << ", errCount: " << errCount << endl;
5844 std::cerr << os.str ();
5845 }
5846 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5847 (errCount != 0, std::logic_error, "totalNumPackets count encountered "
5848 "one or more errors! totalNumPackets: " << totalNumPackets
5849 << ", errCount: " << errCount << ".");
5850
5851 // Allocate space for all the column indices to pack.
5852 if (size_t(exports.extent (0)) < totalNumPackets) {
5853 // FIXME (mfh 09 Apr 2019) Create without initializing.
5854 exports = exports_dv_type ("exports", totalNumPackets);
5855 }
5856
5857 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5858 (! this->supportsRowViews (), std::logic_error,
5859 "this->supportsRowViews() returns false; this should never happen. "
5860 "Please report this bug to the Tpetra developers.");
5861
5862 // Loop again over the rows to export, and pack rows of indices
5863 // into the output buffer.
5864
5865 if (verbose) {
5866 std::ostringstream os;
5867 os << *prefix << "Pack into exports buffer" << endl;
5868 std::cerr << os.str();
5869 }
5870
5871 exports.clear_sync_state ();
5872 exports.modify_host ();
5873 auto exports_h = exports.view_host ();
5874
5875 errCount = 0;
5876 Kokkos::parallel_scan
5877 ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
5878 inputRange, [=, &prefix]
5879 (const LO i, size_t& exportsOffset, const bool final) {
5880 const size_t curOffset = exportsOffset;
5881 const LO lclRow = exportLIDs_h(i);
5882 const GO gblRow = rowMap.getGlobalElement (lclRow);
5883 if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
5884 if (verbose) {
5885 std::ostringstream os;
5886 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5887 << " not in row Map on this process" << endl;
5888 std::cerr << os.str();
5889 }
5890 Kokkos::atomic_add (&errCountView(), ONE);
5891 return;
5892 }
5893
5894 const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
5895 if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
5896 if (verbose) {
5897 std::ostringstream os;
5898 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5899 << ", gblRow=" << gblRow << ": invalid rowInfo"
5900 << endl;
5901 std::cerr << os.str();
5902 }
5903 Kokkos::atomic_add (&errCountView(), ONE);
5904 return;
5905 }
5906
5907 if (curOffset + rowInfo.numEntries > totalNumPackets) {
5908 if (verbose) {
5909 std::ostringstream os;
5910 os << *prefix << "For i=" << i << ", lclRow=" << lclRow
5911 << ", gblRow=" << gblRow << ", curOffset (= "
5912 << curOffset << ") + numEnt (= " << rowInfo.numEntries
5913 << ") > totalNumPackets (= " << totalNumPackets
5914 << ")." << endl;
5915 std::cerr << os.str();
5916 }
5917 Kokkos::atomic_add (&errCountView(), ONE);
5918 return;
5919 }
5920
5921 const LO numEnt = static_cast<LO> (rowInfo.numEntries);
5922 if (this->isLocallyIndexed ()) {
5923 auto lclColInds = getLocalIndsViewHost(rowInfo);
5924 if (final) {
5925 for (LO k = 0; k < numEnt; ++k) {
5926 const LO lclColInd = lclColInds(k);
5927 const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
5928 // Pack it, even if it's wrong. Let the receiving
5929 // process deal with it. Otherwise, we'll miss out
5930 // on any correct data.
5931 exports_h(curOffset + k) = gblColInd;
5932 } // for each entry in the row
5933 } // final pass?
5934 exportsOffset = curOffset + numEnt;
5935 }
5936 else if (this->isGloballyIndexed ()) {
5937 auto gblColInds = getGlobalIndsViewHost(rowInfo);
5938 if (final) {
5939 for (LO k = 0; k < numEnt; ++k) {
5940 const GO gblColInd = gblColInds(k);
5941 // Pack it, even if it's wrong. Let the receiving
5942 // process deal with it. Otherwise, we'll miss out
5943 // on any correct data.
5944 exports_h(curOffset + k) = gblColInd;
5945 } // for each entry in the row
5946 } // final pass?
5947 exportsOffset = curOffset + numEnt;
5948 }
5949 // If neither globally nor locally indexed, then the graph
5950 // has no entries in this row (or indeed, in any row on this
5951 // process) to pack.
5952 });
5953
5954 // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5955 // (errCount != 0, std::logic_error, "Packing encountered "
5956 // "one or more errors! errCount = " << errCount
5957 // << ", totalNumPackets = " << totalNumPackets << ".");
5958
5959 if (verbose) {
5960 std::ostringstream os;
5961 os << *prefix << "errCount=" << errCount << "; Done" << endl;
5962 std::cerr << os.str();
5963 }
5964 }
5965
5966 template <class LocalOrdinal, class GlobalOrdinal, class Node>
5967 void
5968 CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5969 unpackAndCombine
5970 (const Kokkos::DualView<const local_ordinal_type*,
5971 buffer_device_type>& importLIDs,
5972 Kokkos::DualView<packet_type*,
5973 buffer_device_type> imports,
5974 Kokkos::DualView<size_t*,
5975 buffer_device_type> numPacketsPerLID,
5976 const size_t /* constantNumPackets */,
5977 const CombineMode /* combineMode */ )
5978 {
5979 using Details::ProfilingRegion;
5980 using std::endl;
5981 using LO = local_ordinal_type;
5982 using GO = global_ordinal_type;
5983 const char tfecfFuncName[] = "unpackAndCombine";
5984
5985 ProfilingRegion regionCGC("Tpetra::CrsGraph::unpackAndCombine");
5986 const bool verbose = verbose_;
5987
5988 std::unique_ptr<std::string> prefix;
5989 if (verbose) {
5990 prefix = this->createPrefix("CrsGraph", tfecfFuncName);
5991 std::ostringstream os;
5992 os << *prefix << "Start" << endl;
5993 std::cerr << os.str ();
5994 }
5995 {
5996 auto padding = computeCrsPaddingForImports(
5997 importLIDs, imports, numPacketsPerLID, verbose);
5998 applyCrsPadding(*padding, verbose);
5999 if (verbose) {
6000 std::ostringstream os;
6001 os << *prefix << "Done computing & applying padding" << endl;
6002 std::cerr << os.str ();
6003 }
6004 }
6005
6006 // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6007 // reasonable meaning, whether or not the matrix is fill complete.
6008 // It's just more work to implement.
6009
6010 // We are not checking the value of the CombineMode input
6011 // argument. For CrsGraph, we only support import/export
6012 // operations if fillComplete has not yet been called. Any
6013 // incoming column-indices are inserted into the target graph. In
6014 // this context, CombineMode values of ADD vs INSERT are
6015 // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6016 // duplicate column-index is inserted, it will be compressed out
6017 // when fillComplete is called.
6018 //
6019 // Note: I think REPLACE means that an existing row is replaced by
6020 // the imported row, i.e., the existing indices are cleared. CGB,
6021 // 6/17/2010
6022
6023 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6024 (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6025 std::runtime_error, ": importLIDs.extent(0) = "
6026 << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6027 << numPacketsPerLID.extent (0) << ".");
6028 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6029 (isFillComplete (), std::runtime_error,
6030 ": Import or Export operations are not allowed on a target "
6031 "CrsGraph that is fillComplete.");
6032
6033 const size_t numImportLIDs(importLIDs.extent(0));
6034 if (numPacketsPerLID.need_sync_host()) {
6035 numPacketsPerLID.sync_host();
6036 }
6037 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
6038 if (imports.need_sync_host()) {
6039 imports.sync_host();
6040 }
6041 auto imports_h = imports.view_host();
6042 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
6043 auto importLIDs_h = importLIDs.view_host();
6044
6045 // If we're inserting in local indices, let's pre-allocate
6046 Teuchos::Array<LO> lclColInds;
6047 if (isLocallyIndexed()) {
6048 if (verbose) {
6049 std::ostringstream os;
6050 os << *prefix << "Preallocate local indices scratch" << endl;
6051 std::cerr << os.str();
6052 }
6053 size_t maxNumInserts = 0;
6054 for (size_t i = 0; i < numImportLIDs; ++i) {
6055 maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6056 }
6057 if (verbose) {
6058 std::ostringstream os;
6059 os << *prefix << "Local indices scratch size: "
6060 << maxNumInserts << endl;
6061 std::cerr << os.str();
6062 }
6063 lclColInds.resize (maxNumInserts);
6064 }
6065 else {
6066 if (verbose) {
6067 std::ostringstream os;
6068 os << *prefix;
6069 if (isGloballyIndexed()) {
6070 os << "Graph is globally indexed";
6071 }
6072 else {
6073 os << "Graph is neither locally nor globally indexed";
6074 }
6075 os << endl;
6076 std::cerr << os.str();
6077 }
6078 }
6079
6080 TEUCHOS_ASSERT( ! rowMap_.is_null() );
6081 const map_type& rowMap = *rowMap_;
6082
6083 try {
6084 size_t importsOffset = 0;
6085 for (size_t i = 0; i < numImportLIDs; ++i) {
6086 if (verbose) {
6087 std::ostringstream os;
6088 os << *prefix << "i=" << i << ", numImportLIDs="
6089 << numImportLIDs << endl;
6090 std::cerr << os.str();
6091 }
6092 // We can only unpack into owned rows, since we only have
6093 // local row indices.
6094 const LO lclRow = importLIDs_h[i];
6095 const GO gblRow = rowMap.getGlobalElement(lclRow);
6096 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6097 (gblRow == Teuchos::OrdinalTraits<GO>::invalid(),
6098 std::logic_error, "importLIDs[i=" << i << "]="
6099 << lclRow << " is not in the row Map on the calling "
6100 "process.");
6101 const LO numEnt = numPacketsPerLID_h[i];
6102 const GO* const gblColInds = (numEnt == 0) ? nullptr :
6103 imports_h.data() + importsOffset;
6104 if (! isLocallyIndexed()) {
6105 insertGlobalIndicesFiltered(lclRow, gblColInds, numEnt);
6106 }
6107 else {
6108 // FIXME (mfh 09 Feb 2020) Now would be a good time to do
6109 // column Map filtering.
6110 for (LO j = 0; j < numEnt; j++) {
6111 lclColInds[j] = colMap_->getLocalElement(gblColInds[j]);
6112 }
6113 insertLocalIndices(lclRow, numEnt, lclColInds.data());
6114 }
6115 importsOffset += numEnt;
6116 }
6117 }
6118 catch (std::exception& e) {
6119 TEUCHOS_TEST_FOR_EXCEPTION
6120 (true, std::runtime_error,
6121 "Tpetra::CrsGraph::unpackAndCombine: Insert loop threw an "
6122 "exception: " << endl << e.what());
6123 }
6124
6125 if (verbose) {
6126 std::ostringstream os;
6127 os << *prefix << "Done" << endl;
6128 std::cerr << os.str();
6129 }
6130 }
6131
6132 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6133 void
6135 removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6136 {
6137 using Teuchos::Comm;
6138 using Teuchos::null;
6139 using Teuchos::ParameterList;
6140 using Teuchos::RCP;
6141
6142 // We'll set all the state "transactionally," so that this method
6143 // satisfies the strong exception guarantee. This object's state
6144 // won't be modified until the end of this method.
6145 RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6146 RCP<import_type> importer;
6147 RCP<export_type> exporter;
6148
6149 rowMap = newMap;
6150 RCP<const Comm<int> > newComm =
6151 (newMap.is_null ()) ? null : newMap->getComm ();
6152
6153 if (! domainMap_.is_null ()) {
6154 if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6155 // Common case: original domain and row Maps are identical.
6156 // In that case, we need only replace the original domain Map
6157 // with the new Map. This ensures that the new domain and row
6158 // Maps _stay_ identical.
6159 domainMap = newMap;
6160 } else {
6161 domainMap = domainMap_->replaceCommWithSubset (newComm);
6162 }
6163 }
6164 if (! rangeMap_.is_null ()) {
6165 if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6166 // Common case: original range and row Maps are identical. In
6167 // that case, we need only replace the original range Map with
6168 // the new Map. This ensures that the new range and row Maps
6169 // _stay_ identical.
6170 rangeMap = newMap;
6171 } else {
6172 rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6173 }
6174 }
6175 if (! colMap_.is_null ()) {
6176 colMap = colMap_->replaceCommWithSubset (newComm);
6177 }
6178
6179 // (Re)create the Export and / or Import if necessary.
6180 if (! newComm.is_null ()) {
6181 RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6182 //
6183 // The operations below are collective on the new communicator.
6184 //
6185 // (Re)create the Export object if necessary. If I haven't
6186 // called fillComplete yet, I don't have a rangeMap, so I must
6187 // first check if the _original_ rangeMap is not null. Ditto
6188 // for the Import object and the domain Map.
6189 if (! rangeMap_.is_null () &&
6190 rangeMap != rowMap &&
6191 ! rangeMap->isSameAs (*rowMap)) {
6192 if (params.is_null () || ! params->isSublist ("Export")) {
6193 exporter = rcp (new export_type (rowMap, rangeMap));
6194 }
6195 else {
6196 RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6197 exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6198 }
6199 }
6200 // (Re)create the Import object if necessary.
6201 if (! domainMap_.is_null () &&
6202 domainMap != colMap &&
6203 ! domainMap->isSameAs (*colMap)) {
6204 if (params.is_null () || ! params->isSublist ("Import")) {
6205 importer = rcp (new import_type (domainMap, colMap));
6206 } else {
6207 RCP<ParameterList> importSublist = sublist (params, "Import", true);
6208 importer = rcp (new import_type (domainMap, colMap, importSublist));
6209 }
6210 }
6211 } // if newComm is not null
6212
6213 // Defer side effects until the end. If no destructors throw
6214 // exceptions (they shouldn't anyway), then this method satisfies
6215 // the strong exception guarantee.
6216 exporter_ = exporter;
6217 importer_ = importer;
6218 rowMap_ = rowMap;
6219 // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6220 // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6221 // the same object. We might want to get rid of this redundant
6222 // pointer sometime, but for now, we'll leave it alone and just
6223 // set map_ to the same object.
6224 this->map_ = rowMap;
6225 domainMap_ = domainMap;
6226 rangeMap_ = rangeMap;
6227 colMap_ = colMap;
6228 }
6229
6230 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6231 void
6233 getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6234 {
6235 using std::endl;
6236 using LO = LocalOrdinal;
6237 using GO = GlobalOrdinal;
6238 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6239 const bool verbose = verbose_;
6240
6241 std::unique_ptr<std::string> prefix;
6242 if (verbose) {
6243 prefix = this->createPrefix("CrsGraph", "getLocalDiagOffsets");
6244 std::ostringstream os;
6245 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6246 << endl;
6247 std::cerr << os.str();
6248 }
6249
6250 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6251 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6252 const LO lclNumRows = static_cast<LO> (this->getLocalNumRows ());
6253 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6254 (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6255 std::invalid_argument, "offsets.extent(0) = " <<
6256 offsets.extent (0) << " < getLocalNumRows() = " << lclNumRows << ".");
6257
6258 const map_type& rowMap = * (this->getRowMap ());
6259 const map_type& colMap = * (this->getColMap ());
6260
6261 // We only use these in debug mode, but since debug mode is a
6262 // run-time option, they need to exist here. That's why we create
6263 // the vector with explicit size zero, to avoid overhead if debug
6264 // mode is off.
6265 bool allRowMapDiagEntriesInColMap = true;
6266 bool allDiagEntriesFound = true;
6267 bool allOffsetsCorrect = true;
6268 bool noOtherWeirdness = true;
6269 using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6270 wrong_offsets_type wrongOffsets(0);
6271
6272 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6273 // the subset of Map functionality that we need below.
6274 auto lclRowMap = rowMap.getLocalMap ();
6275 auto lclColMap = colMap.getLocalMap ();
6276
6277 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6278 // setup, at least on the host. For CUDA, we have to use LocalMap
6279 // (that comes from each of the two Maps).
6280
6281 const bool sorted = this->isSorted ();
6282 if (isFillComplete ()) {
6283 auto lclGraph = this->getLocalGraphDevice ();
6284 ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6285 lclGraph.row_map,
6286 lclGraph.entries, sorted);
6287 }
6288 else {
6289 // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6290 // since the graph is not fill complete. The previous version
6291 // of this code assumed UVM; this version does not.
6292 auto offsets_h = Kokkos::create_mirror_view (offsets);
6293
6294 for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6295 // Find the diagonal entry. Since the row Map and column Map
6296 // may differ, we have to compare global row and column
6297 // indices, not local.
6298 const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6299 const GO gblColInd = gblRowInd;
6300 const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6301
6302 if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6303 allRowMapDiagEntriesInColMap = false;
6304 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6305 }
6306 else {
6307 const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6308 if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6309 rowInfo.numEntries > 0) {
6310
6311 auto colInds = this->getLocalIndsViewHost (rowInfo);
6312 const size_t hint = 0; // not needed for this algorithm
6313 const size_t offset =
6314 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6315 lclColInd, hint, sorted);
6316 offsets_h(lclRowInd) = offset;
6317
6318 if (debug_) {
6319 // Now that we have what we think is an offset, make sure
6320 // that it really does point to the diagonal entry. Offsets
6321 // are _relative_ to each row, not absolute (for the whole
6322 // (local) graph).
6323 typename local_inds_dualv_type::t_host::const_type lclColInds;
6324 try {
6325 lclColInds = this->getLocalIndsViewHost (rowInfo);
6326 }
6327 catch (...) {
6328 noOtherWeirdness = false;
6329 }
6330 // Don't continue with error checking if the above failed.
6331 if (noOtherWeirdness) {
6332 const size_t numEnt = lclColInds.extent (0);
6333 if (offset >= numEnt) {
6334 // Offsets are relative to each row, so this means that
6335 // the offset is out of bounds.
6336 allOffsetsCorrect = false;
6337 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6338 } else {
6339 const LO actualLclColInd = lclColInds(offset);
6340 const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6341 if (actualGblColInd != gblColInd) {
6342 allOffsetsCorrect = false;
6343 wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6344 }
6345 }
6346 }
6347 } // debug_
6348 }
6349 else { // either row is empty, or something went wrong w/ getRowInfo()
6350 offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6351 allDiagEntriesFound = false;
6352 }
6353 } // whether lclColInd is a valid local column index
6354 } // for each local row
6355 // DEEP_COPY REVIEW - NOT TESTED
6356 Kokkos::deep_copy (offsets, offsets_h);
6357 } // whether the graph is fill complete
6358
6359 if (verbose && wrongOffsets.size () != 0) {
6360 std::ostringstream os;
6361 os << *prefix << "Wrong offsets: [";
6362 for (size_t k = 0; k < wrongOffsets.size (); ++k) {
6363 os << "(" << wrongOffsets[k].first << ","
6364 << wrongOffsets[k].second << ")";
6365 if (k + 1 < wrongOffsets.size ()) {
6366 os << ", ";
6367 }
6368 }
6369 os << "]" << endl;
6370 std::cerr << os.str();
6371 }
6372
6373 if (debug_) {
6374 using Teuchos::reduceAll;
6375 using std::endl;
6376 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
6377 const bool localSuccess =
6378 allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
6379 const int numResults = 5;
6380 int lclResults[5];
6381 lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
6382 lclResults[1] = allDiagEntriesFound ? 1 : 0;
6383 lclResults[2] = allOffsetsCorrect ? 1 : 0;
6384 lclResults[3] = noOtherWeirdness ? 1 : 0;
6385 // min-all-reduce will compute least rank of all the processes
6386 // that didn't succeed.
6387 lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
6388
6389 int gblResults[5];
6390 gblResults[0] = 0;
6391 gblResults[1] = 0;
6392 gblResults[2] = 0;
6393 gblResults[3] = 0;
6394 gblResults[4] = 0;
6395 reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
6396 numResults, lclResults, gblResults);
6397
6398 if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
6399 || gblResults[3] != 1) {
6400 std::ostringstream os; // build error message
6401 os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
6402 "possibly among others): " << endl;
6403 if (gblResults[0] == 0) {
6404 os << " - The column Map does not contain at least one diagonal entry "
6405 "of the graph." << endl;
6406 }
6407 if (gblResults[1] == 0) {
6408 os << " - On one or more processes, some row does not contain a "
6409 "diagonal entry." << endl;
6410 }
6411 if (gblResults[2] == 0) {
6412 os << " - On one or more processes, some offsets are incorrect."
6413 << endl;
6414 }
6415 if (gblResults[3] == 0) {
6416 os << " - One or more processes had some other error."
6417 << endl;
6418 }
6419 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
6420 }
6421 } // debug_
6422 }
6423
6424 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6425 void
6427 getLocalOffRankOffsets (offset_device_view_type& offsets) const
6428 {
6429 using std::endl;
6430 const char tfecfFuncName[] = "getLocalOffRankOffsets: ";
6431 const bool verbose = verbose_;
6432
6433 std::unique_ptr<std::string> prefix;
6434 if (verbose) {
6435 prefix = this->createPrefix("CrsGraph", "getLocalOffRankOffsets");
6436 std::ostringstream os;
6437 os << *prefix << "offsets.extent(0)=" << offsets.extent(0)
6438 << endl;
6439 std::cerr << os.str();
6440 }
6441
6442 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6443 (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6444 // Instead of throwing, we could also copy the rowPtr to k_offRankOffsets_.
6445
6446 const size_t lclNumRows = this->getLocalNumRows ();
6447
6448 if (haveLocalOffRankOffsets_ && k_offRankOffsets_.extent(0) == lclNumRows+1) {
6449 offsets = k_offRankOffsets_;
6450 return;
6451 }
6452 haveLocalOffRankOffsets_ = false;
6453 k_offRankOffsets_ = offset_device_view_type(Kokkos::ViewAllocateWithoutInitializing("offRankOffset"), lclNumRows+1);
6454 offsets = k_offRankOffsets_;
6455
6456 const map_type& colMap = * (this->getColMap ());
6457 const map_type& domMap = * (this->getDomainMap ());
6458
6459 // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6460 // the subset of Map functionality that we need below.
6461 auto lclColMap = colMap.getLocalMap ();
6462 auto lclDomMap = domMap.getLocalMap ();
6463
6464 // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6465 // setup, at least on the host. For CUDA, we have to use LocalMap
6466 // (that comes from each of the two Maps).
6467
6468 TEUCHOS_ASSERT(this->isSorted ());
6469 if (isFillComplete ()) {
6470 auto lclGraph = this->getLocalGraphDevice ();
6471 ::Tpetra::Details::getGraphOffRankOffsets (k_offRankOffsets_,
6472 lclColMap, lclDomMap,
6473 lclGraph);
6474 haveLocalOffRankOffsets_ = true;
6475 }
6476 }
6477
6478 namespace { // (anonymous)
6479
6480 // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
6481 // below). The point is to avoid the deep copy between the input
6482 // Teuchos::ArrayRCP and the internally used Kokkos::View. We
6483 // can't use UVM to avoid the deep copy with CUDA, because the
6484 // ArrayRCP is a host pointer, while the input to the graph's
6485 // getLocalDiagOffsets method is a device pointer. Assigning a
6486 // host pointer to a device pointer is incorrect unless the host
6487 // pointer points to host pinned memory. The goal is to get rid
6488 // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
6489 // copy for backwards compatibility.
6490 //
6491 // We have to use template magic because
6492 // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
6493 // if device_type::memory_space is not Kokkos::HostSpace (as is
6494 // the case with CUDA).
6495
6496 template<class DeviceType,
6497 const bool memSpaceIsHostSpace =
6498 std::is_same<typename DeviceType::memory_space,
6499 Kokkos::HostSpace>::value>
6500 struct HelpGetLocalDiagOffsets {};
6501
6502 template<class DeviceType>
6503 struct HelpGetLocalDiagOffsets<DeviceType, true> {
6504 typedef DeviceType device_type;
6505 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6506 Kokkos::MemoryUnmanaged> device_offsets_type;
6507 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6508 Kokkos::MemoryUnmanaged> host_offsets_type;
6509
6510 static device_offsets_type
6511 getDeviceOffsets (const host_offsets_type& hostOffsets)
6512 {
6513 // Host and device are the same; no need to allocate a
6514 // temporary device View.
6515 return hostOffsets;
6516 }
6517
6518 static void
6519 copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
6520 const device_offsets_type& /* deviceOffsets */)
6521 { /* copy back not needed; host and device are the same */ }
6522 };
6523
6524 template<class DeviceType>
6525 struct HelpGetLocalDiagOffsets<DeviceType, false> {
6526 typedef DeviceType device_type;
6527 // We have to do a deep copy, since host memory space != device
6528 // memory space. Thus, the device View is managed (we need to
6529 // allocate a temporary device View).
6530 typedef Kokkos::View<size_t*, device_type> device_offsets_type;
6531 typedef Kokkos::View<size_t*, Kokkos::HostSpace,
6532 Kokkos::MemoryUnmanaged> host_offsets_type;
6533
6534 static device_offsets_type
6535 getDeviceOffsets (const host_offsets_type& hostOffsets)
6536 {
6537 // Host memory space != device memory space, so we must
6538 // allocate a temporary device View for the graph.
6539 return device_offsets_type ("offsets", hostOffsets.extent (0));
6540 }
6541
6542 static void
6543 copyBackIfNeeded (const host_offsets_type& hostOffsets,
6544 const device_offsets_type& deviceOffsets)
6545 {
6546 // DEEP_COPY REVIEW - NOT TESTED
6547 Kokkos::deep_copy (hostOffsets, deviceOffsets);
6548 }
6549 };
6550 } // namespace (anonymous)
6551
6552
6553 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6554 void
6556 getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
6557 {
6558 typedef LocalOrdinal LO;
6559 const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6560 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6561 (! this->hasColMap (), std::runtime_error,
6562 "The graph does not yet have a column Map.");
6563 const LO myNumRows = static_cast<LO> (this->getLocalNumRows ());
6564 if (static_cast<LO> (offsets.size ()) != myNumRows) {
6565 // NOTE (mfh 21 Jan 2016) This means that the method does not
6566 // satisfy the strong exception guarantee (no side effects
6567 // unless successful).
6568 offsets.resize (myNumRows);
6569 }
6570
6571 // mfh 21 Jan 2016: This method unfortunately takes a
6572 // Teuchos::ArrayRCP, which is host memory. The graph wants a
6573 // device pointer. We can't access host memory from the device;
6574 // that's the wrong direction for UVM. (It's the right direction
6575 // for inefficient host pinned memory, but we don't want to use
6576 // that here.) Thus, if device memory space != host memory space,
6577 // we allocate and use a temporary device View to get the offsets.
6578 // If the two spaces are equal, the template magic makes the deep
6579 // copy go away.
6580 typedef HelpGetLocalDiagOffsets<device_type> helper_type;
6581 typedef typename helper_type::host_offsets_type host_offsets_type;
6582 // Unmanaged host View that views the output array.
6583 host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
6584 // Allocate temp device View if host != device, else reuse host array.
6585 auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
6586 // NOT recursion; this calls the overload that takes a device View.
6587 this->getLocalDiagOffsets (deviceOffsets);
6588 helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
6589 }
6590
6591 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6592 bool
6594 supportsRowViews () const {
6595 return true;
6596 }
6597
6598 template <class LocalOrdinal, class GlobalOrdinal, class Node>
6599 void
6602 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
6603 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
6604 const Teuchos::RCP<const map_type>& domainMap,
6605 const Teuchos::RCP<const map_type>& rangeMap,
6606 const Teuchos::RCP<Teuchos::ParameterList>& params) const
6607 {
6612 using Teuchos::ArrayRCP;
6613 using Teuchos::ArrayView;
6614 using Teuchos::Comm;
6615 using Teuchos::ParameterList;
6616 using Teuchos::rcp;
6617 using Teuchos::RCP;
6618#ifdef HAVE_TPETRA_MMM_TIMINGS
6619 using std::string;
6620 using Teuchos::TimeMonitor;
6621#endif
6622
6623 using LO = LocalOrdinal;
6624 using GO = GlobalOrdinal;
6625 using NT = node_type;
6626 using this_CRS_type = CrsGraph<LO, GO, NT>;
6627 using ivector_type = Vector<int, LO, GO, NT>;
6628
6629 const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
6630
6631#ifdef HAVE_TPETRA_MMM_TIMINGS
6632 string label;
6633 if(!params.is_null()) label = params->get("Timer Label", label);
6634 string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
6635 RCP<TimeMonitor> MM =
6636 rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
6637#endif
6638
6639 // Make sure that the input argument rowTransfer is either an
6640 // Import or an Export. Import and Export are the only two
6641 // subclasses of Transfer that we defined, but users might
6642 // (unwisely, for now at least) decide to implement their own
6643 // subclasses. Exclude this possibility.
6644 const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
6645 const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
6646 TEUCHOS_TEST_FOR_EXCEPTION(
6647 xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
6648 prefix << "The 'rowTransfer' input argument must be either an Import or "
6649 "an Export, and its template parameters must match the corresponding "
6650 "template parameters of the CrsGraph.");
6651
6652 // Make sure that the input argument domainTransfer is either an
6653 // Import or an Export. Import and Export are the only two
6654 // subclasses of Transfer that we defined, but users might
6655 // (unwisely, for now at least) decide to implement their own
6656 // subclasses. Exclude this possibility.
6657 Teuchos::RCP<const import_type> xferDomainAsImport =
6658 Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
6659 Teuchos::RCP<const export_type> xferDomainAsExport =
6660 Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
6661
6662 if(! domainTransfer.is_null()) {
6663
6664 TEUCHOS_TEST_FOR_EXCEPTION(
6665 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
6666 prefix << "The 'domainTransfer' input argument must be either an "
6667 "Import or an Export, and its template parameters must match the "
6668 "corresponding template parameters of the CrsGraph.");
6669
6670 TEUCHOS_TEST_FOR_EXCEPTION(
6671 ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
6672 (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
6673 ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
6674 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6675 "must be of the same type (either Import or Export).");
6676
6677 TEUCHOS_TEST_FOR_EXCEPTION(
6678 ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
6679 (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
6680 ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
6681 prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
6682 "must be of the same type (either Import or Export).");
6683
6684 } // domainTransfer != null
6685
6686
6687 // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
6688 // if the source Map is not distributed but the target Map is?
6689 const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
6690
6691 //
6692 // Get the caller's parameters
6693 //
6694
6695 bool reverseMode = false; // Are we in reverse mode?
6696 bool restrictComm = false; // Do we need to restrict the communicator?
6697 RCP<ParameterList> graphparams; // parameters for the destination graph
6698 if (! params.is_null()) {
6699 reverseMode = params->get("Reverse Mode", reverseMode);
6700 restrictComm = params->get("Restrict Communicator", restrictComm);
6701 graphparams = sublist(params, "CrsGraph");
6702 }
6703
6704 // Get the new domain and range Maps. We need some of them for error
6705 // checking, now that we have the reverseMode parameter.
6706 RCP<const map_type> MyRowMap = reverseMode ?
6707 rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
6708 RCP<const map_type> MyColMap; // create this below
6709 RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
6710 RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
6711 RCP<const map_type> BaseRowMap = MyRowMap;
6712 RCP<const map_type> BaseDomainMap = MyDomainMap;
6713
6714 // If the user gave us a nonnull destGraph, then check whether it's
6715 // "pristine." That means that it has no entries.
6716 //
6717 // FIXME (mfh 15 May 2014) If this is not true on all processes,
6718 // then this exception test may hang. It would be better to
6719 // forward an error flag to the next communication phase.
6720 if (! destGraph.is_null()) {
6721 // FIXME (mfh 15 May 2014): The Epetra idiom for checking
6722 // whether a graph or matrix has no entries on the calling
6723 // process, is that it is neither locally nor globally indexed.
6724 // This may change eventually with the Kokkos refactor version
6725 // of Tpetra, so it would be better just to check the quantity
6726 // of interest directly. Note that with the Kokkos refactor
6727 // version of Tpetra, asking for the total number of entries in
6728 // a graph or matrix that is not fill complete might require
6729 // computation (kernel launch), since it is not thread scalable
6730 // to update a count every time an entry is inserted.
6731 const bool NewFlag =
6732 ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
6733 TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
6734 prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
6735 "if its graph is empty (neither locally nor globally indexed).");
6736
6737 // FIXME (mfh 15 May 2014) At some point, we want to change
6738 // graphs and matrices so that their DistObject Map
6739 // (this->getMap()) may differ from their row Map. This will
6740 // make redistribution for 2-D distributions more efficient. I
6741 // hesitate to change this check, because I'm not sure how much
6742 // the code here depends on getMap() and getRowMap() being the
6743 // same.
6744 TEUCHOS_TEST_FOR_EXCEPTION(
6745 ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
6746 prefix << "The (row) Map of the input argument 'destGraph' is not the "
6747 "same as the (row) Map specified by the input argument 'rowTransfer'.");
6748
6749 TEUCHOS_TEST_FOR_EXCEPTION(
6750 ! destGraph->checkSizes(*this), std::invalid_argument,
6751 prefix << "You provided a nonnull destination graph, but checkSizes() "
6752 "indicates that it is not a legal legal target for redistribution from "
6753 "the source graph (*this). This may mean that they do not have the "
6754 "same dimensions.");
6755 }
6756
6757 // If forward mode (the default), then *this's (row) Map must be
6758 // the same as the source Map of the Transfer. If reverse mode,
6759 // then *this's (row) Map must be the same as the target Map of
6760 // the Transfer.
6761 //
6762 // FIXME (mfh 15 May 2014) At some point, we want to change graphs
6763 // and matrices so that their DistObject Map (this->getMap()) may
6764 // differ from their row Map. This will make redistribution for
6765 // 2-D distributions more efficient. I hesitate to change this
6766 // check, because I'm not sure how much the code here depends on
6767 // getMap() and getRowMap() being the same.
6768 TEUCHOS_TEST_FOR_EXCEPTION(
6769 ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
6770 std::invalid_argument, prefix <<
6771 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
6772
6773 TEUCHOS_TEST_FOR_EXCEPTION(
6774 ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
6775 std::invalid_argument, prefix <<
6776 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
6777
6778 // checks for domainTransfer
6779 TEUCHOS_TEST_FOR_EXCEPTION(
6780 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
6781 std::invalid_argument,
6782 prefix << "The target map of the 'domainTransfer' input argument must be "
6783 "the same as the rebalanced domain map 'domainMap'");
6784
6785 TEUCHOS_TEST_FOR_EXCEPTION(
6786 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
6787 std::invalid_argument,
6788 prefix << "The source map of the 'domainTransfer' input argument must be "
6789 "the same as the rebalanced domain map 'domainMap'");
6790
6791 // The basic algorithm here is:
6792 //
6793 // 1. Call the moral equivalent of "Distor.do" to handle the import.
6794 // 2. Copy all the Imported and Copy/Permuted data into the raw
6795 // CrsGraph pointers, still using GIDs.
6796 // 3. Call an optimized version of MakeColMap that avoids the
6797 // Directory lookups (since the importer knows who owns all the
6798 // GIDs) AND reindexes to LIDs.
6799 // 4. Call expertStaticFillComplete()
6800
6801 // Get information from the Importer
6802 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
6803 ArrayView<const LO> ExportLIDs = reverseMode ?
6804 rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
6805 ArrayView<const LO> RemoteLIDs = reverseMode ?
6806 rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
6807 ArrayView<const LO> PermuteToLIDs = reverseMode ?
6808 rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
6809 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
6810 rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
6811 Distributor& Distor = rowTransfer.getDistributor();
6812
6813 // Owning PIDs
6814 Teuchos::Array<int> SourcePids;
6815 Teuchos::Array<int> TargetPids;
6816 int MyPID = getComm()->getRank();
6817
6818 // Temp variables for sub-communicators
6819 RCP<const map_type> ReducedRowMap, ReducedColMap,
6820 ReducedDomainMap, ReducedRangeMap;
6821 RCP<const Comm<int> > ReducedComm;
6822
6823 // If the user gave us a null destGraph, then construct the new
6824 // destination graph. We will replace its column Map later.
6825 if (destGraph.is_null()) {
6826 destGraph = rcp(new this_CRS_type(MyRowMap, 0, graphparams));
6827 }
6828
6829 /***************************************************/
6830 /***** 1) First communicator restriction phase ****/
6831 /***************************************************/
6832 if (restrictComm) {
6833 ReducedRowMap = MyRowMap->removeEmptyProcesses();
6834 ReducedComm = ReducedRowMap.is_null() ?
6835 Teuchos::null :
6836 ReducedRowMap->getComm();
6837 destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
6838
6839 ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
6840 ReducedRowMap :
6841 MyDomainMap->replaceCommWithSubset(ReducedComm);
6842 ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
6843 ReducedRowMap :
6844 MyRangeMap->replaceCommWithSubset(ReducedComm);
6845
6846 // Reset the "my" maps
6847 MyRowMap = ReducedRowMap;
6848 MyDomainMap = ReducedDomainMap;
6849 MyRangeMap = ReducedRangeMap;
6850
6851 // Update my PID, if we've restricted the communicator
6852 if (! ReducedComm.is_null()) {
6853 MyPID = ReducedComm->getRank();
6854 }
6855 else {
6856 MyPID = -2; // For debugging
6857 }
6858 }
6859 else {
6860 ReducedComm = MyRowMap->getComm();
6861 }
6862
6863 /***************************************************/
6864 /***** 2) From Tpera::DistObject::doTransfer() ****/
6865 /***************************************************/
6866#ifdef HAVE_TPETRA_MMM_TIMINGS
6867 MM = Teuchos::null;
6868 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
6869#endif
6870 // Get the owning PIDs
6871 RCP<const import_type> MyImporter = getImporter();
6872
6873 // check whether domain maps of source graph and base domain map is the same
6874 bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
6875
6876 if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
6877 // Same domain map as source graph
6878 //
6879 // NOTE: This won't work for restrictComm (because the Import
6880 // doesn't know the restricted PIDs), though writing an
6881 // optimized version for that case would be easy (Import an
6882 // IntVector of the new PIDs). Might want to add this later.
6883 Import_Util::getPids(*MyImporter, SourcePids, false);
6884 }
6885 else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
6886 // Same domain map as source graph (restricted communicator)
6887 // We need one import from the domain to the column map
6888 ivector_type SourceDomain_pids(getDomainMap(),true);
6889 ivector_type SourceCol_pids(getColMap());
6890 // SourceDomain_pids contains the restricted pids
6891 SourceDomain_pids.putScalar(MyPID);
6892
6893 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6894 SourcePids.resize(getColMap()->getLocalNumElements());
6895 SourceCol_pids.get1dCopy(SourcePids());
6896 }
6897 else if (MyImporter.is_null() && bSameDomainMap) {
6898 // Graph has no off-process entries
6899 SourcePids.resize(getColMap()->getLocalNumElements());
6900 SourcePids.assign(getColMap()->getLocalNumElements(), MyPID);
6901 }
6902 else if ( ! MyImporter.is_null() &&
6903 ! domainTransfer.is_null() ) {
6904 // general implementation for rectangular matrices with
6905 // domain map different than SourceGraph domain map.
6906 // User has to provide a DomainTransfer object. We need
6907 // to communications (import/export)
6908
6909 // TargetDomain_pids lives on the rebalanced new domain map
6910 ivector_type TargetDomain_pids(domainMap);
6911 TargetDomain_pids.putScalar(MyPID);
6912
6913 // SourceDomain_pids lives on the non-rebalanced old domain map
6914 ivector_type SourceDomain_pids(getDomainMap());
6915
6916 // SourceCol_pids lives on the non-rebalanced old column map
6917 ivector_type SourceCol_pids(getColMap());
6918
6919 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
6920 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6921 }
6922 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
6923 SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6924 }
6925 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
6926 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
6927 }
6928 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
6929 SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
6930 }
6931 else {
6932 TEUCHOS_TEST_FOR_EXCEPTION(
6933 true, std::logic_error,
6934 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6935 }
6936 SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
6937 SourcePids.resize(getColMap()->getLocalNumElements());
6938 SourceCol_pids.get1dCopy(SourcePids());
6939 }
6940 else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
6941 getDomainMap()->isSameAs(*getRowMap())) {
6942 // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
6943 ivector_type TargetRow_pids(domainMap);
6944 ivector_type SourceRow_pids(getRowMap());
6945 ivector_type SourceCol_pids(getColMap());
6946
6947 TargetRow_pids.putScalar(MyPID);
6948 if (! reverseMode && xferAsImport != nullptr) {
6949 SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
6950 }
6951 else if (reverseMode && xferAsExport != nullptr) {
6952 SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
6953 }
6954 else if (! reverseMode && xferAsExport != nullptr) {
6955 SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
6956 }
6957 else if (reverseMode && xferAsImport != nullptr) {
6958 SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
6959 }
6960 else {
6961 TEUCHOS_TEST_FOR_EXCEPTION(
6962 true, std::logic_error,
6963 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
6964 }
6965 SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
6966 SourcePids.resize(getColMap()->getLocalNumElements());
6967 SourceCol_pids.get1dCopy(SourcePids());
6968 }
6969 else {
6970 TEUCHOS_TEST_FOR_EXCEPTION(
6971 true, std::invalid_argument,
6972 prefix << "This method only allows either domainMap == getDomainMap(), "
6973 "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
6974 }
6975
6976 // Tpetra-specific stuff
6977 size_t constantNumPackets = destGraph->constantNumberOfPackets();
6978 if (constantNumPackets == 0) {
6979 destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
6980 RemoteLIDs.size());
6981 }
6982 else {
6983 // There are a constant number of packets per element. We
6984 // already know (from the number of "remote" (incoming)
6985 // elements) how many incoming elements we expect, so we can
6986 // resize the buffer accordingly.
6987 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
6988 destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
6989 }
6990
6991 {
6992 // packAndPrepare* methods modify numExportPacketsPerLID_.
6993 destGraph->numExportPacketsPerLID_.modify_host();
6994 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
6995 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
6996
6997 // Pack & Prepare w/ owning PIDs
6998 packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
6999 numExportPacketsPerLID, ExportLIDs,
7000 SourcePids, constantNumPackets);
7001 }
7002
7003 // Do the exchange of remote data.
7004#ifdef HAVE_TPETRA_MMM_TIMINGS
7005 MM = Teuchos::null;
7006 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7007#endif
7008
7009 if (communication_needed) {
7010 if (reverseMode) {
7011 if (constantNumPackets == 0) { // variable number of packets per LID
7012 // Make sure that host has the latest version, since we're
7013 // using the version on host. If host has the latest
7014 // version, syncing to host does nothing.
7015 destGraph->numExportPacketsPerLID_.sync_host();
7016 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7017 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7018 destGraph->numImportPacketsPerLID_.sync_host();
7019 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7020 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7021
7022 Distor.doReversePostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7023 destGraph->numImportPacketsPerLID_.view_host());
7024 size_t totalImportPackets = 0;
7025 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7026 totalImportPackets += numImportPacketsPerLID[i];
7027 }
7028
7029 // Reallocation MUST go before setting the modified flag,
7030 // because it may clear out the flags.
7031 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7032 destGraph->imports_.modify_host();
7033 auto hostImports = destGraph->imports_.view_host();
7034 // This is a legacy host pack/unpack path, so use the host
7035 // version of exports_.
7036 destGraph->exports_.sync_host();
7037 auto hostExports = destGraph->exports_.view_host();
7038 Distor.doReversePostsAndWaits(hostExports,
7039 numExportPacketsPerLID,
7040 hostImports,
7041 numImportPacketsPerLID);
7042 }
7043 else { // constant number of packets per LI
7044 destGraph->imports_.modify_host();
7045 auto hostImports = destGraph->imports_.view_host();
7046 // This is a legacy host pack/unpack path, so use the host
7047 // version of exports_.
7048 destGraph->exports_.sync_host();
7049 auto hostExports = destGraph->exports_.view_host();
7050 Distor.doReversePostsAndWaits(hostExports,
7051 constantNumPackets,
7052 hostImports);
7053 }
7054 }
7055 else { // forward mode (the default)
7056 if (constantNumPackets == 0) { // variable number of packets per LID
7057 // Make sure that host has the latest version, since we're
7058 // using the version on host. If host has the latest
7059 // version, syncing to host does nothing.
7060 destGraph->numExportPacketsPerLID_.sync_host();
7061 destGraph->numImportPacketsPerLID_.sync_host();
7062 Distor.doPostsAndWaits(destGraph->numExportPacketsPerLID_.view_host(), 1,
7063 destGraph->numImportPacketsPerLID_.view_host());
7064
7065 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7066 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7067 size_t totalImportPackets = 0;
7068 for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7069 totalImportPackets += numImportPacketsPerLID[i];
7070 }
7071
7072 // Reallocation MUST go before setting the modified flag,
7073 // because it may clear out the flags.
7074 destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7075 destGraph->imports_.modify_host();
7076 auto hostImports = destGraph->imports_.view_host();
7077 // This is a legacy host pack/unpack path, so use the host
7078 // version of exports_.
7079 destGraph->exports_.sync_host();
7080 auto hostExports = destGraph->exports_.view_host();
7081 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7082 getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7083 Distor.doPostsAndWaits(hostExports, numExportPacketsPerLID, hostImports, numImportPacketsPerLID);
7084 }
7085 else { // constant number of packets per LID
7086 destGraph->imports_.modify_host();
7087 auto hostImports = destGraph->imports_.view_host();
7088 // This is a legacy host pack/unpack path, so use the host
7089 // version of exports_.
7090 destGraph->exports_.sync_host();
7091 auto hostExports = destGraph->exports_.view_host();
7092 Distor.doPostsAndWaits(hostExports, constantNumPackets, hostImports);
7093 }
7094 }
7095 }
7096
7097 /*********************************************************************/
7098 /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7099 /*********************************************************************/
7100
7101#ifdef HAVE_TPETRA_MMM_TIMINGS
7102 MM = Teuchos::null;
7103 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7104#endif
7105
7106 // Backwards compatibility measure. We'll use this again below.
7107 destGraph->numImportPacketsPerLID_.sync_host();
7108 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7109 getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7110 destGraph->imports_.sync_host();
7111 Teuchos::ArrayView<const packet_type> hostImports =
7112 getArrayViewFromDualView(destGraph->imports_);
7113 size_t mynnz =
7114 unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7115 numImportPacketsPerLID,
7116 constantNumPackets, INSERT,
7117 NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7118 size_t N = BaseRowMap->getLocalNumElements();
7119
7120 // Allocations
7121 ArrayRCP<size_t> CSR_rowptr(N+1);
7122 ArrayRCP<GO> CSR_colind_GID;
7123 ArrayRCP<LO> CSR_colind_LID;
7124 CSR_colind_GID.resize(mynnz);
7125
7126 // If LO and GO are the same, we can reuse memory when
7127 // converting the column indices from global to local indices.
7128 if (typeid(LO) == typeid(GO)) {
7129 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7130 }
7131 else {
7132 CSR_colind_LID.resize(mynnz);
7133 }
7134
7135 // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7136 // unpackAndCombine method on a "CrsArrays" object? This passing
7137 // in a huge list of arrays is icky. Can't we have a bit of an
7138 // abstraction? Implementing a concrete DistObject subclass only
7139 // takes five methods.
7140 unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7141 numImportPacketsPerLID, constantNumPackets,
7142 INSERT, NumSameIDs, PermuteToLIDs,
7143 PermuteFromLIDs, N, mynnz, MyPID,
7144 CSR_rowptr(), CSR_colind_GID(),
7145 SourcePids(), TargetPids);
7146
7147 /**************************************************************/
7148 /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7149 /**************************************************************/
7150#ifdef HAVE_TPETRA_MMM_TIMINGS
7151 MM = Teuchos::null;
7152 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7153#endif
7154 // Call an optimized version of makeColMap that avoids the
7155 // Directory lookups (since the Import object knows who owns all
7156 // the GIDs).
7157 Teuchos::Array<int> RemotePids;
7158 Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7159 CSR_colind_LID(),
7160 CSR_colind_GID(),
7161 BaseDomainMap,
7162 TargetPids, RemotePids,
7163 MyColMap);
7164
7165 /*******************************************************/
7166 /**** 4) Second communicator restriction phase ****/
7167 /*******************************************************/
7168 if (restrictComm) {
7169 ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7170 ReducedRowMap :
7171 MyColMap->replaceCommWithSubset(ReducedComm);
7172 MyColMap = ReducedColMap; // Reset the "my" maps
7173 }
7174
7175 // Replace the col map
7176 destGraph->replaceColMap(MyColMap);
7177
7178 // Short circuit if the processor is no longer in the communicator
7179 //
7180 // NOTE: Epetra replaces modifies all "removed" processes so they
7181 // have a dummy (serial) Map that doesn't touch the original
7182 // communicator. Duplicating that here might be a good idea.
7183 if (ReducedComm.is_null()) {
7184 return;
7185 }
7186
7187 /***************************************************/
7188 /**** 5) Sort ****/
7189 /***************************************************/
7190 if ((! reverseMode && xferAsImport != nullptr) ||
7191 (reverseMode && xferAsExport != nullptr)) {
7192 Import_Util::sortCrsEntries(CSR_rowptr(),
7193 CSR_colind_LID());
7194 }
7195 else if ((! reverseMode && xferAsExport != nullptr) ||
7196 (reverseMode && xferAsImport != nullptr)) {
7197 Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7198 CSR_colind_LID());
7199 if (CSR_rowptr[N] != mynnz) {
7200 CSR_colind_LID.resize(CSR_rowptr[N]);
7201 }
7202 }
7203 else {
7204 TEUCHOS_TEST_FOR_EXCEPTION(
7205 true, std::logic_error,
7206 prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7207 }
7208 /***************************************************/
7209 /**** 6) Reset the colmap and the arrays ****/
7210 /***************************************************/
7211
7212 // Call constructor for the new graph (restricted as needed)
7213 //
7214 destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7215
7216 /***************************************************/
7217 /**** 7) Build Importer & Call ESFC ****/
7218 /***************************************************/
7219 // Pre-build the importer using the existing PIDs
7220 Teuchos::ParameterList esfc_params;
7221#ifdef HAVE_TPETRA_MMM_TIMINGS
7222 MM = Teuchos::null;
7223 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7224#endif
7225 RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7226#ifdef HAVE_TPETRA_MMM_TIMINGS
7227 MM = Teuchos::null;
7228 MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7229
7230 esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7231#endif
7232 if(!params.is_null())
7233 esfc_params.set("compute global constants",params->get("compute global constants",true));
7234
7235 destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7236 MyImport, Teuchos::null, rcp(&esfc_params,false));
7237
7238 }
7239
7240 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7241 void
7244 const import_type& importer,
7245 const Teuchos::RCP<const map_type>& domainMap,
7246 const Teuchos::RCP<const map_type>& rangeMap,
7247 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7248 {
7249 transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7250 }
7251
7252 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7253 void
7256 const import_type& rowImporter,
7257 const import_type& domainImporter,
7258 const Teuchos::RCP<const map_type>& domainMap,
7259 const Teuchos::RCP<const map_type>& rangeMap,
7260 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7261 {
7262 transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7263 }
7264
7265 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7266 void
7269 const export_type& exporter,
7270 const Teuchos::RCP<const map_type>& domainMap,
7271 const Teuchos::RCP<const map_type>& rangeMap,
7272 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7273 {
7274 transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7275 }
7276
7277 template <class LocalOrdinal, class GlobalOrdinal, class Node>
7278 void
7281 const export_type& rowExporter,
7282 const export_type& domainExporter,
7283 const Teuchos::RCP<const map_type>& domainMap,
7284 const Teuchos::RCP<const map_type>& rangeMap,
7285 const Teuchos::RCP<Teuchos::ParameterList>& params) const
7286 {
7287 transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7288 }
7289
7290
7291 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7292 void
7295 {
7296 std::swap(graph.need_sync_host_uvm_access, this->need_sync_host_uvm_access);
7297
7298 std::swap(graph.rowMap_, this->rowMap_);
7299 std::swap(graph.colMap_, this->colMap_);
7300 std::swap(graph.rangeMap_, this->rangeMap_);
7301 std::swap(graph.domainMap_, this->domainMap_);
7302
7303 std::swap(graph.importer_, this->importer_);
7304 std::swap(graph.exporter_, this->exporter_);
7305
7306 std::swap(graph.rowPtrsPacked_dev_, this->rowPtrsPacked_dev_);
7307 std::swap(graph.rowPtrsPacked_host_, this->rowPtrsPacked_host_);
7308
7309 std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7310
7311 std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7312 std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7313
7314 std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7315
7316 std::swap(graph.rowPtrsUnpacked_dev_, this->rowPtrsUnpacked_dev_);
7317 std::swap(graph.rowPtrsUnpacked_host_, this->rowPtrsUnpacked_host_);
7318 std::swap(graph.k_offRankOffsets_, this->k_offRankOffsets_);
7319
7320 std::swap(graph.lclIndsUnpacked_wdv, this->lclIndsUnpacked_wdv);
7321 std::swap(graph.gblInds_wdv, this->gblInds_wdv);
7322 std::swap(graph.lclIndsPacked_wdv, this->lclIndsPacked_wdv);
7323
7324 std::swap(graph.storageStatus_, this->storageStatus_);
7325
7326 std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7327 std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7328 std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7329 std::swap(graph.fillComplete_, this->fillComplete_);
7330 std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7331 std::swap(graph.noRedundancies_, this->noRedundancies_);
7332 std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7333 std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7334 std::swap(graph.haveLocalOffRankOffsets_, this->haveLocalOffRankOffsets_);
7335
7336 std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7337
7338 std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7339 std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7340 std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7341 }
7342
7343
7344 template<class LocalOrdinal, class GlobalOrdinal, class Node>
7345 bool
7348 {
7349 auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7350 bool output = true;
7351 output = m1.size() == m2.size() ? output : false;
7352 for(auto & it_m: m1)
7353 {
7354 size_t key = it_m.first;
7355 output = m2.find(key) != m2.end() ? output : false;
7356 if(output)
7357 {
7358 auto v1 = m1.find(key)->second;
7359 auto v2 = m2.find(key)->second;
7360 std::sort(v1.begin(), v1.end());
7361 std::sort(v2.begin(), v2.end());
7362
7363 output = v1.size() == v2.size() ? output : false;
7364 for(size_t i=0; output && i<v1.size(); i++)
7365 {
7366 output = v1[i]==v2[i] ? output : false;
7367 }
7368 }
7369 }
7370 return output;
7371 };
7372
7373 bool output = true;
7374
7375 output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7376 output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7377 output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7378 output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7379
7380 output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7381
7382 output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7383 output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7384
7385 output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7386
7387 output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7388
7389 output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7390 output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
7391 output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
7392 output = this->fillComplete_ == graph.fillComplete_ ? output : false;
7393 output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
7394 output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
7395 output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
7396 output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
7397 output = this->haveLocalOffRankOffsets_ == graph.haveLocalOffRankOffsets_ ? output : false;
7398 output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
7399
7400 // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
7401 // nonlocals_ isa std::map<GO, std::vector<GO> >
7402 output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
7403
7404 // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
7405 // - since this is a HostMirror type, it should be in host memory already
7406 output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
7407 if(output && this->k_numAllocPerRow_.extent(0) > 0)
7408 {
7409 for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
7410 output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
7411 }
7412
7413 // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
7414 // - since this is a HostMirror type, it should be in host memory already
7415 output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
7416 if(output && this->k_numRowEntries_.extent(0) > 0)
7417 {
7418 for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
7419 output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
7420 }
7421
7422 // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
7423 output = this->rowPtrsUnpacked_host_.extent(0) == graph.rowPtrsUnpacked_host_.extent(0) ? output : false;
7424 if(output && this->rowPtrsUnpacked_host_.extent(0) > 0)
7425 {
7426 auto rowPtrsThis = this->rowPtrsUnpacked_host_;
7427 auto rowPtrsGraph = graph.rowPtrsUnpacked_host_;
7428 for(size_t i=0; output && i<rowPtrsThis.extent(0); i++)
7429 output = rowPtrsThis(i) == rowPtrsGraph(i) ? output : false;
7430 }
7431
7432 // Compare lclIndsUnpacked_wdv isa Kokkos::View<LocalOrdinal*, ...>
7433 output = this->lclIndsUnpacked_wdv.extent(0) == graph.lclIndsUnpacked_wdv.extent(0) ? output : false;
7434 if(output && this->lclIndsUnpacked_wdv.extent(0) > 0)
7435 {
7436 auto indThis = this->lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7437 auto indGraph = graph.lclIndsUnpacked_wdv.getHostView(Access::ReadOnly);
7438 for(size_t i=0; output && i < indThis.extent(0); i++)
7439 output = indThis(i) == indGraph(i) ? output : false;
7440 }
7441
7442 // Compare gblInds_wdv isa Kokkos::View<GlobalOrdinal*, ...>
7443 output = this->gblInds_wdv.extent(0) == graph.gblInds_wdv.extent(0) ? output : false;
7444 if(output && this->gblInds_wdv.extent(0) > 0)
7445 {
7446 auto indtThis = this->gblInds_wdv.getHostView(Access::ReadOnly);
7447 auto indtGraph = graph.gblInds_wdv.getHostView(Access::ReadOnly);
7448 for(size_t i=0; output && i<indtThis.extent(0); i++)
7449 output = indtThis(i) == indtGraph(i) ? output : false;
7450 }
7451
7452 // Check lclGraph_ isa
7453 // Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
7454 // Kokkos::StaticCrsGraph has 3 data members in it:
7455 // Kokkos::View<size_type*, ...> row_map
7456 // (local_graph_device_type::row_map_type)
7457 // Kokkos::View<data_type*, ...> entries
7458 // (local_graph_device_type::entries_type)
7459 // Kokkos::View<size_type*, ...> row_block_offsets
7460 // (local_graph_device_type::row_block_type)
7461 // There is currently no Kokkos::StaticCrsGraph comparison function
7462 // that's built-in, so we will just compare
7463 // the three data items here. This can be replaced if Kokkos ever
7464 // puts in its own comparison routine.
7465 local_graph_host_type thisLclGraph = this->getLocalGraphHost();
7466 local_graph_host_type graphLclGraph = graph.getLocalGraphHost();
7467
7468 output = thisLclGraph.row_map.extent(0) == graphLclGraph.row_map.extent(0)
7469 ? output : false;
7470 if(output && thisLclGraph.row_map.extent(0) > 0)
7471 {
7472 auto lclGraph_rowmap_host_this = thisLclGraph.row_map;
7473 auto lclGraph_rowmap_host_graph = graphLclGraph.row_map;
7474 for (size_t i=0; output && i < lclGraph_rowmap_host_this.extent(0); i++)
7475 output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i)
7476 ? output : false;
7477 }
7478
7479 output = thisLclGraph.entries.extent(0) == graphLclGraph.entries.extent(0)
7480 ? output : false;
7481 if(output && thisLclGraph.entries.extent(0) > 0)
7482 {
7483 auto lclGraph_entries_host_this = thisLclGraph.entries;
7484 auto lclGraph_entries_host_graph = graphLclGraph.entries;
7485 for (size_t i=0; output && i < lclGraph_entries_host_this.extent(0); i++)
7486 output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i)
7487 ? output : false;
7488 }
7489
7490 output =
7491 thisLclGraph.row_block_offsets.extent(0) ==
7492 graphLclGraph.row_block_offsets.extent(0) ? output : false;
7493 if(output && thisLclGraph.row_block_offsets.extent(0) > 0)
7494 {
7495 auto lclGraph_rbo_host_this = thisLclGraph.row_block_offsets;
7496 auto lclGraph_rbo_host_graph = graphLclGraph.row_block_offsets;
7497 for (size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
7498 output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i)
7499 ? output : false;
7500 }
7501
7502 // For Importer and Exporter, we don't need to explicitly check them since
7503 // they will be consistent with the maps.
7504 // Note: importer_ isa Teuchos::RCP<const import_type>
7505 // exporter_ isa Teuchos::RCP<const export_type>
7506
7507 return output;
7508 }
7509
7510
7511
7512} // namespace Tpetra
7513
7514//
7515// Explicit instantiation macros
7516//
7517// Must be expanded from within the Tpetra namespace!
7518//
7519
7520#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7521 template<> \
7522 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7523 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7524 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7525 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7526 CrsGraph<LO,GO,NODE>::node_type>& importer, \
7527 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7528 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7529 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7530 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7531 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7532 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7533 const Teuchos::RCP<Teuchos::ParameterList>& params);
7534
7535#define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7536 template<> \
7537 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7538 importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7539 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7540 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7541 CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
7542 const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7543 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7544 CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
7545 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7546 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7547 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7548 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7549 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7550 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7551 const Teuchos::RCP<Teuchos::ParameterList>& params);
7552
7553
7554#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7555 template<> \
7556 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7557 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7558 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7559 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7560 CrsGraph<LO,GO,NODE>::node_type>& exporter, \
7561 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7562 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7563 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7564 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7565 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7566 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7567 const Teuchos::RCP<Teuchos::ParameterList>& params);
7568
7569#define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7570 template<> \
7571 Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
7572 exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
7573 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7574 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7575 CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
7576 const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7577 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7578 CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
7579 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7580 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7581 CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
7582 const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
7583 CrsGraph<LO,GO,NODE>::global_ordinal_type, \
7584 CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
7585 const Teuchos::RCP<Teuchos::ParameterList>& params);
7586
7587
7588#define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
7589 template class CrsGraph<LO, GO, NODE>; \
7590 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7591 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
7592 TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
7593 TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
7594
7595
7596#endif // TPETRA_CRSGRAPH_DEF_HPP
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Stand-alone utility functions and macros.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
bool isMerged() const
Whether duplicate column indices in each row have been merged.
local_inds_dualv_type::t_dev::const_type getLocalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const override
Default parameter list suitable for validation.
Details::EStorageStatus storageStatus_
Status of the graph's storage, when not in a fill-complete state.
GlobalOrdinal global_ordinal_type
The type of the graph's global indices.
void insertGlobalIndicesIntoNonownedRows(const global_ordinal_type gblRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Implementation of insertGlobalIndices for nonowned rows.
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
std::pair< size_t, std::string > makeIndicesLocal(const bool verbose=false)
Convert column indices from global to local.
local_inds_device_view_type getLocalIndicesDevice() const
Get a device view of the packed column indicies.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isIdenticalTo(const CrsGraph< LocalOrdinal, GlobalOrdinal, Node > &graph) const
Create a cloned CrsGraph for a different Node type.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
void globalAssemble()
Communicate nonlocal contributions to other processes.
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Tell the graph that you are done changing its structure.
global_inds_wdv_type gblInds_wdv
Global ordinals of column indices for all rows.
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
size_t sortAndMergeRowIndices(const RowInfo &rowInfo, const bool sorted, const bool merged)
Sort and merge duplicate column indices in the given row.
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
bool hasColMap() const override
Whether the graph has a column Map.
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
std::string description() const override
Return a one-line human-readable description of this object.
bool isStorageOptimized() const
Returns true if storage has been optimized.
void getGlobalRowCopy(global_ordinal_type gblRow, nonconst_global_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using global indices.
void removeLocalIndices(local_ordinal_type localRow)
Remove all graph indices from the specified local row.
void importAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const import_type &importer, const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Import from this to the given destination graph, and make the result fill complete.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
void replaceRangeMapAndExporter(const Teuchos::RCP< const map_type > &newRangeMap, const Teuchos::RCP< const export_type > &newExporter)
Replace the current Range Map and Export with the given parameters.
void computeLocalConstants()
Compute local constants, if they have not yet been computed.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const override
Print this object to the given output stream with the given verbosity level.
void setParameterList(const Teuchos::RCP< Teuchos::ParameterList > &params) override
Set the given list of parameters (must be nonnull).
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
size_t insertIndices(RowInfo &rowInfo, const SLocalGlobalViews &newInds, const ELocalGlobal lg, const ELocalGlobal I)
Insert indices into the given row.
typename Node::device_type device_type
This class' Kokkos device type.
void insertGlobalIndicesFiltered(const local_ordinal_type lclRow, const global_ordinal_type gblColInds[], const local_ordinal_type numGblColInds)
Like insertGlobalIndices(), but with column Map filtering.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
global_inds_dualv_type::t_dev::const_type getGlobalIndsViewDevice(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
typename local_graph_device_type::HostMirror local_graph_host_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
row_ptrs_host_view_type getLocalRowPtrsHost() const
Get a host view of the packed row offsets.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
void setAllIndices(const typename local_graph_device_type::row_map_type &rowPointers, const typename local_graph_device_type::entries_type::non_const_type &columnIndices)
Set the graph's data directly, using 1-D storage.
void insertLocalIndices(const local_ordinal_type localRow, const Teuchos::ArrayView< const local_ordinal_type > &indices)
Insert local indices into the graph.
local_inds_host_view_type getLocalIndicesHost() const
Get a host view of the packed column indicies.
bool supportsRowViews() const override
Whether this class implements getLocalRowView() and getGlobalRowView() (it does).
size_t getNumEntriesInGlobalRow(global_ordinal_type globalRow) const override
Returns the current number of entries on this node in the specified global row.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
void setDomainRangeMaps(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap)
void swap(CrsGraph< local_ordinal_type, global_ordinal_type, Node > &graph)
Swaps the data from *this with the data and maps from graph.
void getGlobalRowView(const global_ordinal_type gblRow, global_inds_host_view_type &gblColInds) const override
Get a const view of the given global row's global column indices.
void exportAndFillComplete(Teuchos::RCP< CrsGraph< local_ordinal_type, global_ordinal_type, Node > > &destGraph, const export_type &exporter, const Teuchos::RCP< const map_type > &domainMap=Teuchos::null, const Teuchos::RCP< const map_type > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null) const
Export from this to the given destination graph, and make the result fill complete.
void makeColMap(Teuchos::Array< int > &remotePIDs)
Make and set the graph's column Map.
bool haveGlobalConstants_
Whether all processes have computed global constants.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator.
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Perform a fillComplete on a graph that already has data, via setAllIndices().
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
size_t getNumAllocatedEntriesInGlobalRow(global_ordinal_type globalRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
typename device_type::execution_space execution_space
This class' Kokkos execution space.
void makeImportExport(Teuchos::Array< int > &remotePIDs, const bool useRemotePIDs)
Make the Import and Export objects, if needed.
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
row_ptrs_device_view_type getLocalRowPtrsDevice() const
Get a device view of the packed row offsets.
void getLocalRowCopy(local_ordinal_type gblRow, nonconst_local_inds_host_view_type &gblColInds, size_t &numColInds) const override
Get a copy of the given row, using local indices.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
bool isFillActive() const
Whether resumeFill() has been called and the graph is in edit mode.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
global_size_t globalNumEntries_
Global number of entries in the graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
size_t getLocalNumEntries() const override
The local number of entries in the graph.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
local_inds_wdv_type lclIndsPacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true Built during fillCo...
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, device_type, void, size_t > local_graph_device_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
size_t getLocalNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
virtual void pack(const Teuchos::ArrayView< const local_ordinal_type > &exportLIDs, Teuchos::Array< global_ordinal_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, size_t &constantNumPackets) const override
Pack this object's data for Import or Export.
void getLocalOffRankOffsets(offset_device_view_type &offsets) const
Get offsets of the off-rank entries in the graph.
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
typename row_graph_type::local_inds_device_view_type local_inds_device_view_type
The Kokkos::View type for views of local ordinals on device and host.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Node node_type
This class' Kokkos Node type.
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
void insertGlobalIndices(const global_ordinal_type globalRow, const Teuchos::ArrayView< const global_ordinal_type > &indices)
Insert global indices into the graph.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
void replaceDomainMap(const Teuchos::RCP< const map_type > &newDomainMap)
Replace the current domain Map with the given objects.
Kokkos::View< constsize_t *, device_type >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
void computeGlobalConstants()
Compute global constants, if they have not yet been computed.
size_t getNumAllocatedEntriesInLocalRow(local_ordinal_type localRow) const
Current number of allocated entries in the given row on the calling (MPI) process,...
offset_device_view_type k_offRankOffsets_
The offsets for off-rank entries.
void replaceDomainMapAndImporter(const Teuchos::RCP< const map_type > &newDomainMap, const Teuchos::RCP< const import_type > &newImporter)
Replace the current domain Map and Import with the given parameters.
void setLocallyModified()
Report that we made a local modification to its structure.
size_t getLocalAllocationSize() const
The local number of indices allocated for the graph, over all rows on the calling (MPI) process.
void replaceRangeMap(const Teuchos::RCP< const map_type > &newRangeMap)
Replace the current Range Map with the given objects.
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
virtual void removeEmptyProcessesInPlace(const Teuchos::RCP< const map_type > &newMap) override
Remove processes owning zero rows from the Maps and their communicator.
void getLocalRowView(const LocalOrdinal lclRow, local_inds_host_view_type &lclColInds) const override
Get a const view of the given local row's local column indices.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
virtual bool checkSizes(const SrcDistObject &source) override
Compare the source and target (this) objects for compatibility.
local_graph_device_type getLocalGraphDevice() const
Get the local graph.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the graph's current column Map with the given Map.
bool haveLocalConstants_
Whether this process has computed local constants.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Sets up and executes a communication plan for a Tpetra DistObject.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
int local_ordinal_type
Default value of Scalar template parameter.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a CrsGraph's global column indices into local column indices.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse graph for communication.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
CombineMode
Rule for combining data in an Import or Export.
@ INSERT
Insert new values that don't currently exist.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.