40#ifndef TPETRA_CRSMATRIX_DEF_HPP
41#define TPETRA_CRSMATRIX_DEF_HPP
53#include "Tpetra_RowMatrix.hpp"
54#include "Tpetra_LocalCrsMatrixOperator.hpp"
62#include "Tpetra_Details_getDiagCopyWithoutOffsets.hpp"
67#include "KokkosSparse_getDiagCopy.hpp"
71#include "Tpetra_Details_packCrsMatrix.hpp"
72#include "Tpetra_Details_unpackCrsMatrixAndCombine.hpp"
74#include "Teuchos_FancyOStream.hpp"
75#include "Teuchos_RCP.hpp"
76#include "Teuchos_DataAccess.hpp"
77#include "Teuchos_SerialDenseMatrix.hpp"
78#include "KokkosBlas.hpp"
90 template<
class T,
class BinaryFunction>
91 T atomic_binary_function_update (
volatile T*
const dest,
105 T newVal = f (assume, inputVal);
106 oldVal = Kokkos::atomic_compare_exchange (dest, assume, newVal);
107 }
while (assume != oldVal);
127template<
class Scalar>
131 typedef Teuchos::ScalarTraits<Scalar> STS;
132 return std::max (STS::magnitude (x), STS::magnitude (y));
141 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
143 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
144 size_t maxNumEntriesPerRow,
145 const Teuchos::RCP<Teuchos::ParameterList>& params) :
148 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, size_t "
149 "[, RCP<ParameterList>]): ";
150 Teuchos::RCP<crs_graph_type> graph;
152 graph = Teuchos::rcp (
new crs_graph_type (rowMap, maxNumEntriesPerRow,
155 catch (std::exception& e) {
156 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
157 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
158 "size_t [, RCP<ParameterList>]) threw an exception: "
165 staticGraph_ = myGraph_;
170 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
172 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
173 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
174 const Teuchos::RCP<Teuchos::ParameterList>& params) :
177 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
178 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
179 Teuchos::RCP<crs_graph_type> graph;
185 catch (std::exception& e) {
186 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
187 (
true, std::runtime_error,
"CrsGraph constructor "
188 "(RCP<const Map>, ArrayView<const size_t>"
189 "[, RCP<ParameterList>]) threw an exception: "
196 staticGraph_ = graph;
201 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
203 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
204 const Teuchos::RCP<const map_type>& colMap,
205 const size_t maxNumEntPerRow,
206 const Teuchos::RCP<Teuchos::ParameterList>& params) :
209 const char tfecfFuncName[] =
"CrsMatrix(RCP<const Map>, "
210 "RCP<const Map>, size_t[, RCP<ParameterList>]): ";
211 const char suffix[] =
212 " Please report this bug to the Tpetra developers.";
215 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
216 (! staticGraph_.is_null (), std::logic_error,
217 "staticGraph_ is not null at the beginning of the constructor."
219 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
220 (! myGraph_.is_null (), std::logic_error,
221 "myGraph_ is not null at the beginning of the constructor."
223 Teuchos::RCP<crs_graph_type> graph;
229 catch (std::exception& e) {
230 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
231 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
232 "RCP<const Map>, size_t[, RCP<ParameterList>]) threw an "
233 "exception: " << e.what ());
239 staticGraph_ = myGraph_;
244 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
246 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
247 const Teuchos::RCP<const map_type>& colMap,
248 const Teuchos::ArrayView<const size_t>& numEntPerRowToAlloc,
249 const Teuchos::RCP<Teuchos::ParameterList>& params) :
252 const char tfecfFuncName[] =
253 "CrsMatrix(RCP<const Map>, RCP<const Map>, "
254 "ArrayView<const size_t>[, RCP<ParameterList>]): ";
255 Teuchos::RCP<crs_graph_type> graph;
261 catch (std::exception& e) {
262 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
263 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
264 "RCP<const Map>, ArrayView<const size_t>[, "
265 "RCP<ParameterList>]) threw an exception: " << e.what ());
271 staticGraph_ = graph;
277 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
279 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
280 const Teuchos::RCP<Teuchos::ParameterList>& ) :
282 staticGraph_ (graph),
283 storageStatus_ (
Details::STORAGE_1D_PACKED)
286 typedef typename local_matrix_device_type::values_type values_type;
287 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>[, "
288 "RCP<ParameterList>]): ";
291 std::unique_ptr<std::string> prefix;
293 prefix = this->createPrefix(
"CrsMatrix",
"CrsMatrix(graph,params)");
294 std::ostringstream os;
295 os << *prefix <<
"Start" << endl;
296 std::cerr << os.str ();
299 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
300 (graph.is_null (), std::runtime_error,
"Input graph is null.");
301 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
302 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
303 "is not fill complete. You must call fillComplete on the "
304 "graph before using it to construct a CrsMatrix. Note that "
305 "calling resumeFill on the graph makes it not fill complete, "
306 "even if you had previously called fillComplete. In that "
307 "case, you must call fillComplete on the graph again.");
315 const size_t numEnt = graph->lclIndsPacked_wdv.extent (0);
317 std::ostringstream os;
318 os << *prefix <<
"Allocate values: " << numEnt << endl;
319 std::cerr << os.str ();
322 values_type val (
"Tpetra::CrsMatrix::values", numEnt);
323 valuesPacked_wdv = values_wdv_type(val);
324 valuesUnpacked_wdv = valuesPacked_wdv;
329 std::ostringstream os;
330 os << *prefix <<
"Done" << endl;
331 std::cerr << os.str ();
335 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
338 const Teuchos::RCP<const crs_graph_type>& graph,
339 const Teuchos::RCP<Teuchos::ParameterList>& params) :
341 staticGraph_ (graph),
342 storageStatus_ (matrix.storageStatus_)
344 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
345 "local_matrix_device_type::values_type, "
346 "[,RCP<ParameterList>]): ";
347 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
348 (graph.is_null (), std::runtime_error,
"Input graph is null.");
349 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
350 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
351 "is not fill complete. You must call fillComplete on the "
352 "graph before using it to construct a CrsMatrix. Note that "
353 "calling resumeFill on the graph makes it not fill complete, "
354 "even if you had previously called fillComplete. In that "
355 "case, you must call fillComplete on the graph again.");
357 size_t numValuesPacked = graph->lclIndsPacked_wdv.extent(0);
358 valuesPacked_wdv = values_wdv_type(matrix.valuesPacked_wdv, 0, numValuesPacked);
360 size_t numValuesUnpacked = graph->lclIndsUnpacked_wdv.extent(0);
361 valuesUnpacked_wdv = values_wdv_type(matrix.valuesUnpacked_wdv, 0, numValuesUnpacked);
367 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
369 CrsMatrix (
const Teuchos::RCP<const crs_graph_type>& graph,
370 const typename local_matrix_device_type::values_type& values,
371 const Teuchos::RCP<Teuchos::ParameterList>& ) :
373 staticGraph_ (graph),
374 storageStatus_ (
Details::STORAGE_1D_PACKED)
376 const char tfecfFuncName[] =
"CrsMatrix(RCP<const CrsGraph>, "
377 "local_matrix_device_type::values_type, "
378 "[,RCP<ParameterList>]): ";
379 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
380 (graph.is_null (), std::runtime_error,
"Input graph is null.");
381 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
382 (! graph->isFillComplete (), std::runtime_error,
"Input graph "
383 "is not fill complete. You must call fillComplete on the "
384 "graph before using it to construct a CrsMatrix. Note that "
385 "calling resumeFill on the graph makes it not fill complete, "
386 "even if you had previously called fillComplete. In that "
387 "case, you must call fillComplete on the graph again.");
395 valuesPacked_wdv = values_wdv_type(values);
396 valuesUnpacked_wdv = valuesPacked_wdv;
407 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
409 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
410 const Teuchos::RCP<const map_type>& colMap,
411 const typename local_graph_device_type::row_map_type& rowPointers,
412 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
413 const typename local_matrix_device_type::values_type& values,
414 const Teuchos::RCP<Teuchos::ParameterList>& params) :
416 storageStatus_ (
Details::STORAGE_1D_PACKED)
418 using Details::getEntryOnHost;
421 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
422 "RCP<const Map>, ptr, ind, val[, params]): ";
423 const char suffix[] =
424 ". Please report this bug to the Tpetra developers.";
428 std::unique_ptr<std::string> prefix;
430 prefix = this->createPrefix(
431 "CrsMatrix",
"CrsMatrix(rowMap,colMap,ptr,ind,val[,params])");
432 std::ostringstream os;
433 os << *prefix <<
"Start" << endl;
434 std::cerr << os.str ();
441 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
442 (values.extent(0) != columnIndices.extent(0),
443 std::invalid_argument,
"values.extent(0)=" << values.extent(0)
444 <<
" != columnIndices.extent(0) = " << columnIndices.extent(0)
446 if (debug && rowPointers.extent(0) != 0) {
447 const size_t numEnt =
448 getEntryOnHost(rowPointers, rowPointers.extent(0) - 1);
449 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
450 (numEnt !=
size_t(columnIndices.extent(0)) ||
451 numEnt !=
size_t(values.extent(0)),
452 std::invalid_argument,
"Last entry of rowPointers says that "
453 "the matrix has " << numEnt <<
" entr"
454 << (numEnt != 1 ?
"ies" :
"y") <<
", but the dimensions of "
455 "columnIndices and values don't match this. "
456 "columnIndices.extent(0)=" << columnIndices.extent (0)
457 <<
" and values.extent(0)=" << values.extent (0) <<
".");
460 RCP<crs_graph_type> graph;
462 graph = Teuchos::rcp (
new crs_graph_type (rowMap, colMap, rowPointers,
463 columnIndices, params));
465 catch (std::exception& e) {
466 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
467 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
468 "RCP<const Map>, ptr, ind[, params]) threw an exception: "
476 auto lclGraph = graph->getLocalGraphDevice ();
477 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
478 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
479 lclGraph.entries.extent (0) != columnIndices.extent (0),
480 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, "
481 "ind[, params]) did not set the local graph correctly." << suffix);
482 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
483 (lclGraph.entries.extent (0) != values.extent (0),
484 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, ptr, ind[, "
485 "params]) did not set the local graph correctly. "
486 "lclGraph.entries.extent(0) = " << lclGraph.entries.extent (0)
487 <<
" != values.extent(0) = " << values.extent (0) << suffix);
493 staticGraph_ = graph;
502 valuesPacked_wdv = values_wdv_type(values);
503 valuesUnpacked_wdv = valuesPacked_wdv;
512 std::ostringstream os;
513 os << *prefix <<
"Done" << endl;
514 std::cerr << os.str();
518 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
520 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
521 const Teuchos::RCP<const map_type>& colMap,
522 const Teuchos::ArrayRCP<size_t>& ptr,
523 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
524 const Teuchos::ArrayRCP<Scalar>& val,
525 const Teuchos::RCP<Teuchos::ParameterList>& params) :
527 storageStatus_ (
Details::STORAGE_1D_PACKED)
529 using Kokkos::Compat::getKokkosViewDeepCopy;
530 using Teuchos::av_reinterpret_cast;
532 using values_type =
typename local_matrix_device_type::values_type;
534 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
535 "RCP<const Map>, ptr, ind, val[, params]): ";
537 RCP<crs_graph_type> graph;
542 catch (std::exception& e) {
543 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
544 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
545 "RCP<const Map>, ArrayRCP<size_t>, ArrayRCP<LocalOrdinal>[, "
546 "RCP<ParameterList>]) threw an exception: " << e.what ());
552 staticGraph_ = graph;
565 auto lclGraph = staticGraph_->getLocalGraphDevice ();
566 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
567 (
size_t (lclGraph.row_map.extent (0)) != size_t (ptr.size ()) ||
568 size_t (lclGraph.entries.extent (0)) != size_t (ind.size ()),
569 std::logic_error,
"CrsGraph's constructor (rowMap, colMap, "
570 "ptr, ind[, params]) did not set the local graph correctly. "
571 "Please report this bug to the Tpetra developers.");
574 getKokkosViewDeepCopy<device_type> (av_reinterpret_cast<IST> (val ()));
575 valuesPacked_wdv = values_wdv_type(valIn);
576 valuesUnpacked_wdv = valuesPacked_wdv;
586 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
588 CrsMatrix (
const Teuchos::RCP<const map_type>& rowMap,
589 const Teuchos::RCP<const map_type>& colMap,
591 const Teuchos::RCP<Teuchos::ParameterList>& params) :
593 storageStatus_ (
Details::STORAGE_1D_PACKED),
596 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
597 "RCP<const Map>, local_matrix_device_type[, RCP<ParameterList>]): ";
598 const char suffix[] =
599 " Please report this bug to the Tpetra developers.";
601 Teuchos::RCP<crs_graph_type> graph;
604 lclMatrix.graph, params));
606 catch (std::exception& e) {
607 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
608 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
609 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) threw an "
610 "exception: " << e.what ());
612 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
613 (!graph->isFillComplete (), std::logic_error,
"CrsGraph constructor (RCP"
614 "<const Map>, RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) "
615 "did not produce a fill-complete graph. Please report this bug to the "
616 "Tpetra developers.");
621 staticGraph_ = graph;
623 valuesPacked_wdv = values_wdv_type(lclMatrix.values);
624 valuesUnpacked_wdv = valuesPacked_wdv;
626 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
628 "At the end of a CrsMatrix constructor that should produce "
629 "a fillComplete matrix, isFillActive() is true." << suffix);
630 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
632 "CrsMatrix constructor that should produce a fillComplete "
633 "matrix, isFillComplete() is false." << suffix);
637 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
640 const Teuchos::RCP<const map_type>& rowMap,
641 const Teuchos::RCP<const map_type>& colMap,
642 const Teuchos::RCP<const map_type>& domainMap,
643 const Teuchos::RCP<const map_type>& rangeMap,
644 const Teuchos::RCP<Teuchos::ParameterList>& params) :
646 storageStatus_ (
Details::STORAGE_1D_PACKED),
649 const char tfecfFuncName[] =
"Tpetra::CrsMatrix(RCP<const Map>, "
650 "RCP<const Map>, RCP<const Map>, RCP<const Map>, "
651 "local_matrix_device_type[, RCP<ParameterList>]): ";
652 const char suffix[] =
653 " Please report this bug to the Tpetra developers.";
655 Teuchos::RCP<crs_graph_type> graph;
657 graph = Teuchos::rcp (
new crs_graph_type (lclMatrix.graph, rowMap, colMap,
658 domainMap, rangeMap, params));
660 catch (std::exception& e) {
661 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
662 (
true, std::runtime_error,
"CrsGraph constructor (RCP<const Map>, "
663 "RCP<const Map>, RCP<const Map>, RCP<const Map>, local_graph_device_type[, "
664 "RCP<ParameterList>]) threw an exception: " << e.what ());
666 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
667 (! graph->isFillComplete (), std::logic_error,
"CrsGraph "
668 "constructor (RCP<const Map>, RCP<const Map>, RCP<const Map>, "
669 "RCP<const Map>, local_graph_device_type[, RCP<ParameterList>]) did "
670 "not produce a fillComplete graph." << suffix);
675 staticGraph_ = graph;
677 valuesPacked_wdv = values_wdv_type(lclMatrix.values);
678 valuesUnpacked_wdv = valuesPacked_wdv;
680 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
682 "At the end of a CrsMatrix constructor that should produce "
683 "a fillComplete matrix, isFillActive() is true." << suffix);
684 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
686 "CrsMatrix constructor that should produce a fillComplete "
687 "matrix, isFillComplete() is false." << suffix);
691 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
694 const Teuchos::RCP<const map_type>& rowMap,
695 const Teuchos::RCP<const map_type>& colMap,
696 const Teuchos::RCP<const map_type>& domainMap,
697 const Teuchos::RCP<const map_type>& rangeMap,
698 const Teuchos::RCP<const import_type>& importer,
699 const Teuchos::RCP<const export_type>& exporter,
700 const Teuchos::RCP<Teuchos::ParameterList>& params) :
702 storageStatus_ (
Details::STORAGE_1D_PACKED),
706 const char tfecfFuncName[] =
"Tpetra::CrsMatrix"
707 "(lclMat,Map,Map,Map,Map,Import,Export,params): ";
708 const char suffix[] =
709 " Please report this bug to the Tpetra developers.";
711 Teuchos::RCP<crs_graph_type> graph;
714 domainMap, rangeMap, importer,
717 catch (std::exception& e) {
718 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
719 (
true, std::runtime_error,
"CrsGraph constructor "
720 "(local_graph_device_type, Map, Map, Map, Map, Import, Export, "
721 "params) threw: " << e.what ());
723 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
724 (!graph->isFillComplete (), std::logic_error,
"CrsGraph "
725 "constructor (local_graph_device_type, Map, Map, Map, Map, Import, "
726 "Export, params) did not produce a fill-complete graph. "
727 "Please report this bug to the Tpetra developers.");
732 staticGraph_ = graph;
734 valuesPacked_wdv = values_wdv_type(lclMatrix.values);
735 valuesUnpacked_wdv = valuesPacked_wdv;
737 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
739 "At the end of a CrsMatrix constructor that should produce "
740 "a fillComplete matrix, isFillActive() is true." << suffix);
741 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
743 "CrsMatrix constructor that should produce a fillComplete "
744 "matrix, isFillComplete() is false." << suffix);
748 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
751 const Teuchos::DataAccess copyOrView):
753 staticGraph_ (source.getCrsGraph()),
754 storageStatus_ (source.storageStatus_)
756 const char tfecfFuncName[] =
"Tpetra::CrsMatrix("
757 "const CrsMatrix&, const Teuchos::DataAccess): ";
758 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
760 "Source graph must be fillComplete().");
762 if (copyOrView == Teuchos::Copy) {
763 using values_type =
typename local_matrix_device_type::values_type;
765 using Kokkos::view_alloc;
766 using Kokkos::WithoutInitializing;
767 values_type newvals (view_alloc (
"val", WithoutInitializing),
770 Kokkos::deep_copy (newvals, vals);
771 valuesPacked_wdv = values_wdv_type(newvals);
772 valuesUnpacked_wdv = valuesPacked_wdv;
775 else if (copyOrView == Teuchos::View) {
776 valuesPacked_wdv = values_wdv_type(source.valuesPacked_wdv);
777 valuesUnpacked_wdv = values_wdv_type(source.valuesUnpacked_wdv);
781 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
782 (
true, std::invalid_argument,
"Second argument 'copyOrView' "
783 "has an invalid value " << copyOrView <<
". Valid values "
784 "include Teuchos::Copy = " << Teuchos::Copy <<
" and "
785 "Teuchos::View = " << Teuchos::View <<
".");
787 checkInternalState();
790 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
795 std::swap(crs_matrix.
importMV_, this->importMV_);
796 std::swap(crs_matrix.
exportMV_, this->exportMV_);
797 std::swap(crs_matrix.staticGraph_, this->staticGraph_);
798 std::swap(crs_matrix.myGraph_, this->myGraph_);
799 std::swap(crs_matrix.valuesPacked_wdv, this->valuesPacked_wdv);
800 std::swap(crs_matrix.valuesUnpacked_wdv, this->valuesUnpacked_wdv);
803 std::swap(crs_matrix.
nonlocals_, this->nonlocals_);
806 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
807 Teuchos::RCP<const Teuchos::Comm<int> >
810 return getCrsGraphRef ().
getComm ();
813 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
817 return fillComplete_;
820 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
824 return ! fillComplete_;
827 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
834 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
841 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
848 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
855 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
862 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
869 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
876 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
883 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
891 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
899 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
906 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
913 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
920 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
927 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
934 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
935 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
941 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
942 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
948 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
949 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
955 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
956 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
962 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
963 Teuchos::RCP<const RowGraph<LocalOrdinal, GlobalOrdinal, Node> >
966 if (staticGraph_ != Teuchos::null) {
972 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
973 Teuchos::RCP<const CrsGraph<LocalOrdinal, GlobalOrdinal, Node> >
976 if (staticGraph_ != Teuchos::null) {
982 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
983 const CrsGraph<LocalOrdinal, GlobalOrdinal, Node>&
984 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
985 getCrsGraphRef ()
const
987#ifdef HAVE_TPETRA_DEBUG
988 constexpr bool debug =
true;
990 constexpr bool debug =
false;
993 if (! this->staticGraph_.is_null ()) {
994 return * (this->staticGraph_);
998 const char tfecfFuncName[] =
"getCrsGraphRef: ";
999 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1000 (this->myGraph_.is_null (), std::logic_error,
1001 "Both staticGraph_ and myGraph_ are null. "
1002 "Please report this bug to the Tpetra developers.");
1004 return * (this->myGraph_);
1008 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1009 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_device_type
1013 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
1016 valuesPacked_wdv.getDeviceView(Access::ReadWrite),
1017 staticGraph_->getLocalGraphDevice());
1020 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1021 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_matrix_host_type
1025 auto numCols = staticGraph_->
getColMap()->getLocalNumElements();
1026 return local_matrix_host_type(
"Tpetra::CrsMatrix::lclMatrixHost", numCols,
1027 valuesPacked_wdv.getHostView(Access::ReadWrite),
1028 staticGraph_->getLocalGraphHost());
1032 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1033 std::shared_ptr<typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::local_multiply_op_type>
1037 auto localMatrix = getLocalMatrixDevice();
1038#ifdef HAVE_TPETRACORE_CUDA
1039#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
1040 if(this->getLocalNumEntries() <=
size_t(Teuchos::OrdinalTraits<LocalOrdinal>::max()) &&
1041 std::is_same<Node, Kokkos::Compat::KokkosCudaWrapperNode>::value)
1043 if(this->ordinalRowptrs.data() ==
nullptr)
1045 auto originalRowptrs = localMatrix.graph.row_map;
1048 this->ordinalRowptrs = ordinal_rowptrs_type(
1049 Kokkos::ViewAllocateWithoutInitializing(
"CrsMatrix::ordinalRowptrs"), originalRowptrs.extent(0));
1050 auto ordinalRowptrs_ = this->ordinalRowptrs;
1051 Kokkos::parallel_for(
"CrsMatrix::getLocalMultiplyOperator::convertRowptrs",
1052 Kokkos::RangePolicy<execution_space>(0, originalRowptrs.extent(0)),
1053 KOKKOS_LAMBDA(LocalOrdinal i)
1055 ordinalRowptrs_(i) = originalRowptrs(i);
1059 return std::make_shared<local_multiply_op_type>(
1060 std::make_shared<local_matrix_device_type>(localMatrix), this->ordinalRowptrs);
1065 return std::make_shared<local_multiply_op_type>(
1066 std::make_shared<local_matrix_device_type>(localMatrix));
1069 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1073 return myGraph_.is_null ();
1076 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1083 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1090 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1099 const char tfecfFuncName[] =
"allocateValues: ";
1100 const char suffix[] =
1101 " Please report this bug to the Tpetra developers.";
1102 ProfilingRegion region(
"Tpetra::CrsMatrix::allocateValues");
1104 std::unique_ptr<std::string> prefix;
1106 prefix = this->createPrefix(
"CrsMatrix",
"allocateValues");
1107 std::ostringstream os;
1108 os << *prefix <<
"lg: "
1109 << (lg == LocalIndices ?
"Local" :
"Global") <<
"Indices"
1111 << (gas == GraphAlreadyAllocated ?
"Already" :
"NotYet")
1112 <<
"Allocated" << endl;
1113 std::cerr << os.str();
1116 const bool debug = Behavior::debug(
"CrsMatrix");
1118 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1119 (this->staticGraph_.is_null (), std::logic_error,
1120 "staticGraph_ is null." << suffix);
1125 if ((gas == GraphAlreadyAllocated) !=
1126 staticGraph_->indicesAreAllocated ()) {
1127 const char err1[] =
"The caller has asserted that the graph "
1129 const char err2[] =
"already allocated, but the static graph "
1130 "says that its indices are ";
1131 const char err3[] =
"already allocated. ";
1132 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1133 (gas == GraphAlreadyAllocated &&
1134 ! staticGraph_->indicesAreAllocated (), std::logic_error,
1135 err1 << err2 <<
"not " << err3 << suffix);
1136 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1137 (gas != GraphAlreadyAllocated &&
1138 staticGraph_->indicesAreAllocated (), std::logic_error,
1139 err1 <<
"not " << err2 << err3 << suffix);
1147 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1148 (! this->staticGraph_->indicesAreAllocated () &&
1149 this->myGraph_.is_null (), std::logic_error,
1150 "The static graph says that its indices are not allocated, "
1151 "but the graph is not owned by the matrix." << suffix);
1154 if (gas == GraphNotYetAllocated) {
1156 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1157 (this->myGraph_.is_null (), std::logic_error,
1158 "gas = GraphNotYetAllocated, but myGraph_ is null." << suffix);
1161 this->myGraph_->allocateIndices (lg, verbose);
1163 catch (std::exception& e) {
1164 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1165 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1166 "threw an exception: " << e.what ());
1169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1170 (
true, std::runtime_error,
"CrsGraph::allocateIndices "
1171 "threw an exception not a subclass of std::exception.");
1176 const size_t lclNumRows = this->staticGraph_->getLocalNumRows ();
1177 typename Graph::local_graph_device_type::row_map_type k_ptrs =
1178 this->staticGraph_->rowPtrsUnpacked_dev_;
1180 const size_t lclTotalNumEntries =
1181 this->staticGraph_->rowPtrsUnpacked_host_(lclNumRows);
1184 using values_type =
typename local_matrix_device_type::values_type;
1186 std::ostringstream os;
1187 os << *prefix <<
"Allocate values_wdv: Pre "
1188 << valuesUnpacked_wdv.extent(0) <<
", post "
1189 << lclTotalNumEntries << endl;
1190 std::cerr << os.str();
1193 valuesUnpacked_wdv = values_wdv_type(
1194 values_type(
"Tpetra::CrsMatrix::values",
1195 lclTotalNumEntries));
1199 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1204 using ::Tpetra::Details::computeOffsetsFromCounts;
1205 using ::Tpetra::Details::getEntryOnHost;
1206 using Teuchos::arcp_const_cast;
1207 using Teuchos::Array;
1208 using Teuchos::ArrayRCP;
1209 using Teuchos::null;
1213 using row_map_type =
typename local_graph_device_type::row_map_type;
1214 using lclinds_1d_type =
typename Graph::local_graph_device_type::entries_type::non_const_type;
1215 using values_type =
typename local_matrix_device_type::values_type;
1217 (
"Tpetra::CrsMatrix::fillLocalGraphAndMatrix");
1219 const char tfecfFuncName[] =
"fillLocalGraphAndMatrix (called from "
1220 "fillComplete or expertStaticFillComplete): ";
1221 const char suffix[] =
1222 " Please report this bug to the Tpetra developers.";
1223 const bool debug = Details::Behavior::debug(
"CrsMatrix");
1224 const bool verbose = Details::Behavior::verbose(
"CrsMatrix");
1226 std::unique_ptr<std::string> prefix;
1228 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalGraphAndMatrix");
1229 std::ostringstream os;
1230 os << *prefix << endl;
1231 std::cerr << os.str ();
1237 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1238 (myGraph_.is_null (), std::logic_error,
"The nonconst graph "
1239 "(myGraph_) is null. This means that the matrix has a "
1240 "const (a.k.a. \"static\") graph. fillComplete or "
1241 "expertStaticFillComplete should never call "
1242 "fillLocalGraphAndMatrix in that case." << suffix);
1245 const size_t lclNumRows = this->getLocalNumRows ();
1255 typedef decltype (myGraph_->k_numRowEntries_) row_entries_type;
1257 typename Graph::local_graph_device_type::row_map_type curRowOffsets =
1258 myGraph_->rowPtrsUnpacked_dev_;
1261 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1262 (curRowOffsets.extent (0) == 0, std::logic_error,
1263 "curRowOffsets.extent(0) == 0.");
1264 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1265 (curRowOffsets.extent (0) != lclNumRows + 1, std::logic_error,
1266 "curRowOffsets.extent(0) = "
1267 << curRowOffsets.extent (0) <<
" != lclNumRows + 1 = "
1268 << (lclNumRows + 1) <<
".");
1269 const size_t numOffsets = curRowOffsets.extent (0);
1270 const auto valToCheck = myGraph_->rowPtrsUnpacked_host_(numOffsets - 1);
1271 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1273 myGraph_->lclIndsUnpacked_wdv.extent (0) != valToCheck,
1274 std::logic_error,
"numOffsets = " <<
1275 numOffsets <<
" != 0 and myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1276 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
" != curRowOffsets("
1277 << numOffsets <<
") = " << valToCheck <<
".");
1280 if (myGraph_->getLocalNumEntries() !=
1281 myGraph_->getLocalAllocationSize()) {
1285 typename row_map_type::non_const_type k_ptrs;
1286 row_map_type k_ptrs_const;
1287 lclinds_1d_type k_inds;
1291 std::ostringstream os;
1292 const auto numEnt = myGraph_->getLocalNumEntries();
1293 const auto allocSize = myGraph_->getLocalAllocationSize();
1294 os << *prefix <<
"Unpacked 1-D storage: numEnt=" << numEnt
1295 <<
", allocSize=" << allocSize << endl;
1296 std::cerr << os.str ();
1304 if (debug && curRowOffsets.extent (0) != 0) {
1305 const size_t numOffsets =
1306 static_cast<size_t> (curRowOffsets.extent (0));
1307 const auto valToCheck = myGraph_->rowPtrsUnpacked_host_(numOffsets - 1);
1308 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1309 (
static_cast<size_t> (valToCheck) !=
1310 static_cast<size_t> (valuesUnpacked_wdv.extent (0)),
1311 std::logic_error,
"(unpacked branch) Before "
1312 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1313 <<
") = " << valToCheck <<
" != valuesUnpacked_wdv.extent(0)"
1314 " = " << valuesUnpacked_wdv.extent (0) <<
".");
1315 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1316 (
static_cast<size_t> (valToCheck) !=
1317 static_cast<size_t> (myGraph_->lclIndsUnpacked_wdv.extent (0)),
1318 std::logic_error,
"(unpacked branch) Before "
1319 "allocating or packing, curRowOffsets(" << (numOffsets-1)
1320 <<
") = " << valToCheck
1321 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0) = "
1322 << myGraph_->lclIndsUnpacked_wdv.extent (0) <<
".");
1330 size_t lclTotalNumEntries = 0;
1336 std::ostringstream os;
1337 os << *prefix <<
"Allocate packed row offsets: "
1338 << (lclNumRows+1) << endl;
1339 std::cerr << os.str ();
1341 typename row_map_type::non_const_type
1342 packedRowOffsets (
"Tpetra::CrsGraph::ptr", lclNumRows + 1);
1343 typename row_entries_type::const_type numRowEnt_h =
1344 myGraph_->k_numRowEntries_;
1347 lclTotalNumEntries =
1348 computeOffsetsFromCounts (packedRowOffsets, numRowEnt_h);
1351 k_ptrs = packedRowOffsets;
1352 k_ptrs_const = k_ptrs;
1356 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1357 (
static_cast<size_t> (k_ptrs.extent (0)) != lclNumRows + 1,
1359 "(unpacked branch) After packing k_ptrs, "
1360 "k_ptrs.extent(0) = " << k_ptrs.extent (0) <<
" != "
1361 "lclNumRows+1 = " << (lclNumRows+1) <<
".");
1362 const auto valToCheck = getEntryOnHost (k_ptrs, lclNumRows);
1363 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1364 (valToCheck != lclTotalNumEntries, std::logic_error,
1365 "(unpacked branch) After filling k_ptrs, "
1366 "k_ptrs(lclNumRows=" << lclNumRows <<
") = " << valToCheck
1367 <<
" != total number of entries on the calling process = "
1368 << lclTotalNumEntries <<
".");
1373 std::ostringstream os;
1374 os << *prefix <<
"Allocate packed local column indices: "
1375 << lclTotalNumEntries << endl;
1376 std::cerr << os.str ();
1378 k_inds = lclinds_1d_type (
"Tpetra::CrsGraph::lclInds", lclTotalNumEntries);
1380 std::ostringstream os;
1381 os << *prefix <<
"Allocate packed values: "
1382 << lclTotalNumEntries << endl;
1383 std::cerr << os.str ();
1385 k_vals = values_type (
"Tpetra::CrsMatrix::values", lclTotalNumEntries);
1397 using inds_packer_type = pack_functor<
1398 typename Graph::local_graph_device_type::entries_type::non_const_type,
1399 typename Graph::local_inds_dualv_type::t_dev::const_type,
1400 typename Graph::local_graph_device_type::row_map_type::non_const_type,
1401 typename Graph::local_graph_device_type::row_map_type>;
1402 inds_packer_type indsPacker (
1404 myGraph_->lclIndsUnpacked_wdv.getDeviceView(Access::ReadOnly),
1405 k_ptrs, curRowOffsets);
1406 using exec_space =
typename decltype (k_inds)::execution_space;
1407 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1408 Kokkos::parallel_for
1409 (
"Tpetra::CrsMatrix pack column indices",
1410 range_type (0, lclNumRows), indsPacker);
1414 using vals_packer_type = pack_functor<
1415 typename values_type::non_const_type,
1416 typename values_type::const_type,
1417 typename row_map_type::non_const_type,
1418 typename row_map_type::const_type>;
1419 vals_packer_type valsPacker (
1421 this->valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1422 k_ptrs, curRowOffsets);
1423 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1424 range_type (0, lclNumRows), valsPacker);
1427 const char myPrefix[] =
"(\"Optimize Storage\""
1428 "=true branch) After packing, ";
1429 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1430 (k_ptrs.extent (0) == 0, std::logic_error, myPrefix
1431 <<
"k_ptrs.extent(0) = 0. This probably means that "
1432 "rowPtrsUnpacked_ was never allocated.");
1433 if (k_ptrs.extent (0) != 0) {
1434 const size_t numOffsets (k_ptrs.extent (0));
1435 const auto valToCheck =
1436 getEntryOnHost (k_ptrs, numOffsets - 1);
1437 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1438 (
size_t (valToCheck) != k_vals.extent (0),
1439 std::logic_error, myPrefix <<
1440 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1441 " != k_vals.extent(0) = " << k_vals.extent (0) <<
".");
1442 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1443 (
size_t (valToCheck) != k_inds.extent (0),
1444 std::logic_error, myPrefix <<
1445 "k_ptrs(" << (numOffsets-1) <<
") = " << valToCheck <<
1446 " != k_inds.extent(0) = " << k_inds.extent (0) <<
".");
1450 myGraph_->setRowPtrsPacked(k_ptrs_const);
1451 myGraph_->lclIndsPacked_wdv =
1452 typename crs_graph_type::local_inds_wdv_type(k_inds);
1453 valuesPacked_wdv = values_wdv_type(k_vals);
1458 myGraph_->rowPtrsPacked_dev_ = myGraph_->rowPtrsUnpacked_dev_;
1459 myGraph_->rowPtrsPacked_host_ = myGraph_->rowPtrsUnpacked_host_;
1460 myGraph_->lclIndsPacked_wdv = myGraph_->lclIndsUnpacked_wdv;
1461 valuesPacked_wdv = valuesUnpacked_wdv;
1464 std::ostringstream os;
1465 os << *prefix <<
"Storage already packed: rowPtrsUnpacked_: "
1466 << myGraph_->rowPtrsUnpacked_host_.extent(0) <<
", lclIndsUnpacked_wdv: "
1467 << myGraph_->lclIndsUnpacked_wdv.extent(0) <<
", valuesUnpacked_wdv: "
1468 << valuesUnpacked_wdv.extent(0) << endl;
1469 std::cerr << os.str();
1473 const char myPrefix[] =
1474 "(\"Optimize Storage\"=false branch) ";
1475 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1476 (myGraph_->rowPtrsUnpacked_dev_.extent (0) == 0, std::logic_error, myPrefix
1477 <<
"myGraph->rowPtrsUnpacked_dev_.extent(0) = 0. This probably means "
1478 "that rowPtrsUnpacked_ was never allocated.");
1479 if (myGraph_->rowPtrsUnpacked_dev_.extent (0) != 0) {
1480 const size_t numOffsets (myGraph_->rowPtrsUnpacked_host_.extent (0));
1481 const auto valToCheck = myGraph_->rowPtrsUnpacked_host_(numOffsets - 1);
1482 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1483 (
size_t (valToCheck) != valuesPacked_wdv.extent (0),
1484 std::logic_error, myPrefix <<
1485 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1486 <<
" != valuesPacked_wdv.extent(0) = "
1487 << valuesPacked_wdv.extent (0) <<
".");
1488 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1489 (
size_t (valToCheck) != myGraph_->lclIndsPacked_wdv.extent (0),
1490 std::logic_error, myPrefix <<
1491 "k_ptrs_const(" << (numOffsets-1) <<
") = " << valToCheck
1492 <<
" != myGraph_->lclIndsPacked.extent(0) = "
1493 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1499 const char myPrefix[] =
"After packing, ";
1500 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1501 (
size_t (myGraph_->rowPtrsPacked_host_.extent (0)) != size_t (lclNumRows + 1),
1502 std::logic_error, myPrefix <<
"myGraph_->rowPtrsPacked_host_.extent(0) = "
1503 << myGraph_->rowPtrsPacked_host_.extent (0) <<
" != lclNumRows+1 = " <<
1504 (lclNumRows+1) <<
".");
1505 if (myGraph_->rowPtrsPacked_host_.extent (0) != 0) {
1506 const size_t numOffsets (myGraph_->rowPtrsPacked_host_.extent (0));
1507 const size_t valToCheck = myGraph_->rowPtrsPacked_host_(numOffsets-1);
1508 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1509 (valToCheck !=
size_t (valuesPacked_wdv.extent (0)),
1510 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1511 (numOffsets-1) <<
") = " << valToCheck
1512 <<
" != valuesPacked_wdv.extent(0) = "
1513 << valuesPacked_wdv.extent (0) <<
".");
1514 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1515 (valToCheck !=
size_t (myGraph_->lclIndsPacked_wdv.extent (0)),
1516 std::logic_error, myPrefix <<
"k_ptrs_const(" <<
1517 (numOffsets-1) <<
") = " << valToCheck
1518 <<
" != myGraph_->lclIndsPacked_wdvk_inds.extent(0) = "
1519 << myGraph_->lclIndsPacked_wdv.extent (0) <<
".");
1527 const bool defaultOptStorage =
1528 ! isStaticGraph () || staticGraph_->isStorageOptimized ();
1529 const bool requestOptimizedStorage =
1530 (! params.is_null () &&
1531 params->get (
"Optimize Storage", defaultOptStorage)) ||
1532 (params.is_null () && defaultOptStorage);
1537 if (requestOptimizedStorage) {
1542 std::ostringstream os;
1543 os << *prefix <<
"Optimizing storage: free k_numRowEntries_: "
1544 << myGraph_->k_numRowEntries_.extent(0) << endl;
1545 std::cerr << os.str();
1548 myGraph_->k_numRowEntries_ = row_entries_type ();
1553 myGraph_->rowPtrsUnpacked_dev_ = myGraph_->rowPtrsPacked_dev_;
1554 myGraph_->rowPtrsUnpacked_host_ = myGraph_->rowPtrsPacked_host_;
1555 myGraph_->lclIndsUnpacked_wdv = myGraph_->lclIndsPacked_wdv;
1556 valuesUnpacked_wdv = valuesPacked_wdv;
1558 myGraph_->storageStatus_ = Details::STORAGE_1D_PACKED;
1559 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1563 std::ostringstream os;
1564 os << *prefix <<
"User requested NOT to optimize storage"
1566 std::cerr << os.str();
1571 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1576 using ::Tpetra::Details::ProfilingRegion;
1577 using Teuchos::ArrayRCP;
1578 using Teuchos::Array;
1579 using Teuchos::null;
1583 using row_map_type =
typename Graph::local_graph_device_type::row_map_type;
1584 using non_const_row_map_type =
typename row_map_type::non_const_type;
1585 using values_type =
typename local_matrix_device_type::values_type;
1586 ProfilingRegion regionFLM(
"Tpetra::CrsMatrix::fillLocalMatrix");
1587 const size_t lclNumRows = getLocalNumRows();
1589 const bool verbose = Details::Behavior::verbose(
"CrsMatrix");
1590 std::unique_ptr<std::string> prefix;
1592 prefix = this->createPrefix(
"CrsMatrix",
"fillLocalMatrix");
1593 std::ostringstream os;
1594 os << *prefix <<
"lclNumRows: " << lclNumRows << endl;
1595 std::cerr << os.str ();
1607 size_t nodeNumEntries = staticGraph_->getLocalNumEntries ();
1608 size_t nodeNumAllocated = staticGraph_->getLocalAllocationSize ();
1609 row_map_type k_rowPtrs = staticGraph_->rowPtrsPacked_dev_;
1611 row_map_type k_ptrs;
1617 bool requestOptimizedStorage =
true;
1618 const bool default_OptimizeStorage =
1619 ! isStaticGraph() || staticGraph_->isStorageOptimized();
1620 if (! params.is_null() &&
1621 ! params->get(
"Optimize Storage", default_OptimizeStorage)) {
1622 requestOptimizedStorage =
false;
1629 if (! staticGraph_->isStorageOptimized () &&
1630 requestOptimizedStorage) {
1632 (
true, std::runtime_error,
"You requested optimized storage "
1633 "by setting the \"Optimize Storage\" flag to \"true\" in "
1634 "the ParameterList, or by virtue of default behavior. "
1635 "However, the associated CrsGraph was filled separately and "
1636 "requested not to optimize storage. Therefore, the "
1637 "CrsMatrix cannot optimize storage.");
1638 requestOptimizedStorage =
false;
1641 using row_entries_type =
decltype (staticGraph_->k_numRowEntries_);
1660 if (nodeNumEntries != nodeNumAllocated) {
1662 std::ostringstream os;
1663 os << *prefix <<
"Unpacked 1-D storage: numEnt="
1664 << nodeNumEntries <<
", allocSize=" << nodeNumAllocated
1666 std::cerr << os.str();
1671 std::ostringstream os;
1672 os << *prefix <<
"Allocate packed row offsets: "
1673 << (lclNumRows+1) << endl;
1674 std::cerr << os.str();
1676 non_const_row_map_type tmpk_ptrs (
"Tpetra::CrsGraph::ptr",
1681 size_t lclTotalNumEntries = 0;
1684 typename row_entries_type::const_type numRowEnt_h =
1685 staticGraph_->k_numRowEntries_;
1687 lclTotalNumEntries =
1688 Details::computeOffsetsFromCounts (tmpk_ptrs, numRowEnt_h);
1694 std::ostringstream os;
1695 os << *prefix <<
"Allocate packed values: "
1696 << lclTotalNumEntries << endl;
1697 std::cerr << os.str ();
1699 k_vals = values_type (
"Tpetra::CrsMatrix::val", lclTotalNumEntries);
1703 typename values_type::non_const_type,
1704 typename values_type::const_type,
1705 typename row_map_type::non_const_type,
1706 typename row_map_type::const_type> valsPacker
1707 (k_vals, valuesUnpacked_wdv.getDeviceView(Access::ReadOnly),
1708 tmpk_ptrs, k_rowPtrs);
1710 using exec_space =
typename decltype (k_vals)::execution_space;
1711 using range_type = Kokkos::RangePolicy<exec_space, LocalOrdinal>;
1712 Kokkos::parallel_for (
"Tpetra::CrsMatrix pack values",
1713 range_type (0, lclNumRows), valsPacker);
1714 valuesPacked_wdv = values_wdv_type(k_vals);
1717 valuesPacked_wdv = valuesUnpacked_wdv;
1719 std::ostringstream os;
1720 os << *prefix <<
"Storage already packed: "
1721 <<
"valuesUnpacked_wdv: " << valuesUnpacked_wdv.extent(0) << endl;
1722 std::cerr << os.str();
1727 if (requestOptimizedStorage) {
1730 valuesUnpacked_wdv = valuesPacked_wdv;
1732 this->storageStatus_ = Details::STORAGE_1D_PACKED;
1736 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1741 const typename crs_graph_type::SLocalGlobalViews& newInds,
1742 const Teuchos::ArrayView<impl_scalar_type>& oldRowVals,
1743 const Teuchos::ArrayView<const impl_scalar_type>& newRowVals,
1744 const ELocalGlobal lg,
1745 const ELocalGlobal I)
1747 const size_t oldNumEnt = rowInfo.numEntries;
1748 const size_t numInserted = graph.insertIndices (rowInfo, newInds, lg, I);
1754 if (numInserted > 0) {
1755 const size_t startOffset = oldNumEnt;
1756 memcpy (&oldRowVals[startOffset], &newRowVals[0],
1757 numInserted *
sizeof (impl_scalar_type));
1761 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1765 const Teuchos::ArrayView<const LocalOrdinal>& indices,
1766 const Teuchos::ArrayView<const Scalar>& values,
1770 const char tfecfFuncName[] =
"insertLocalValues: ";
1772 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1773 (! this->isFillActive (), std::runtime_error,
1774 "Fill is not active. After calling fillComplete, you must call "
1775 "resumeFill before you may insert entries into the matrix again.");
1776 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1777 (this->isStaticGraph (), std::runtime_error,
1778 "Cannot insert indices with static graph; use replaceLocalValues() "
1782 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1783 (graph.
colMap_.is_null (), std::runtime_error,
1784 "Cannot insert local indices without a column map.");
1785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1787 std::runtime_error,
"Graph indices are global; use "
1788 "insertGlobalValues().");
1789 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1790 (values.size () != indices.size (), std::runtime_error,
1791 "values.size() = " << values.size ()
1792 <<
" != indices.size() = " << indices.size () <<
".");
1793 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1794 ! graph.
rowMap_->isNodeLocalElement (lclRow), std::runtime_error,
1795 "Local row index " << lclRow <<
" does not belong to this process.");
1797 if (! graph.indicesAreAllocated ()) {
1800 const bool verbose = Details::Behavior::verbose(
"CrsMatrix");
1801 this->allocateValues (LocalIndices, GraphNotYetAllocated, verbose);
1804#ifdef HAVE_TPETRA_DEBUG
1805 const size_t numEntriesToAdd =
static_cast<size_t> (indices.size ());
1810 using Teuchos::toString;
1813 Teuchos::Array<LocalOrdinal> badColInds;
1814 bool allInColMap =
true;
1815 for (
size_t k = 0; k < numEntriesToAdd; ++k) {
1817 allInColMap =
false;
1818 badColInds.push_back (indices[k]);
1821 if (! allInColMap) {
1822 std::ostringstream os;
1823 os <<
"You attempted to insert entries in owned row " << lclRow
1824 <<
", at the following column indices: " << toString (indices)
1826 os <<
"Of those, the following indices are not in the column Map on "
1827 "this process: " << toString (badColInds) <<
"." << endl <<
"Since "
1828 "the matrix has a column Map already, it is invalid to insert "
1829 "entries at those locations.";
1830 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1831 (
true, std::invalid_argument, os.str ());
1838 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1840 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1841 valsView[offset] += values[k]; };
1842 std::function<void(
size_t const,
size_t const,
size_t const)> cb(std::ref(fun));
1843 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1844 }
else if (CM == INSERT) {
1845 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset) {
1846 valsView[offset] = values[k]; };
1847 std::function<void(
size_t const,
size_t const,
size_t const)> cb(std::ref(fun));
1848 graph.insertLocalIndicesImpl(lclRow, indices, cb);
1850 std::ostringstream os;
1851 os <<
"You attempted to use insertLocalValues with CombineMode " <<
combineModeToString(CM)
1852 <<
"but this has not been implemented." << endl;
1853 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1854 (
true, std::invalid_argument, os.str ());
1858 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1862 const LocalOrdinal numEnt,
1863 const Scalar vals[],
1864 const LocalOrdinal cols[],
1867 Teuchos::ArrayView<const LocalOrdinal> colsT (cols, numEnt);
1868 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
1869 this->insertLocalValues (localRow, colsT, valsT, CM);
1872 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1877 const GlobalOrdinal gblColInds[],
1879 const size_t numInputEnt)
1881#ifdef HAVE_TPETRA_DEBUG
1882 const char tfecfFuncName[] =
"insertGlobalValuesImpl: ";
1884 const size_t curNumEnt = rowInfo.numEntries;
1887 if (! graph.indicesAreAllocated ()) {
1890 using ::Tpetra::Details::Behavior;
1891 const bool verbose = Behavior::verbose(
"CrsMatrix");
1892 this->allocateValues (GlobalIndices, GraphNotYetAllocated, verbose);
1897 rowInfo = graph.
getRowInfo (rowInfo.localRow);
1900 auto valsView = this->getValuesViewHostNonConst(rowInfo);
1901 auto fun = [&](
size_t const k,
size_t const ,
size_t const offset){
1902 valsView[offset] += vals[k];
1904 std::function<void(
size_t const,
size_t const,
size_t const)> cb(std::ref(fun));
1905#ifdef HAVE_TPETRA_DEBUG
1911#ifdef HAVE_TPETRA_DEBUG
1912 size_t newNumEnt = curNumEnt + numInserted;
1913 const size_t chkNewNumEnt =
1915 if (chkNewNumEnt != newNumEnt) {
1916 std::ostringstream os;
1917 os << std::endl <<
"newNumEnt = " << newNumEnt
1918 <<
" != graph.getNumEntriesInLocalRow(" << rowInfo.localRow
1919 <<
") = " << chkNewNumEnt <<
"." << std::endl
1920 <<
"\torigNumEnt: " << origNumEnt << std::endl
1921 <<
"\tnumInputEnt: " << numInputEnt << std::endl
1922 <<
"\tgblColInds: [";
1923 for (
size_t k = 0; k < numInputEnt; ++k) {
1924 os << gblColInds[k];
1925 if (k +
size_t (1) < numInputEnt) {
1929 os <<
"]" << std::endl
1931 for (
size_t k = 0; k < numInputEnt; ++k) {
1933 if (k +
size_t (1) < numInputEnt) {
1937 os <<
"]" << std::endl;
1939 if (this->supportsRowViews ()) {
1940 values_host_view_type vals2;
1941 if (this->isGloballyIndexed ()) {
1942 global_inds_host_view_type gblColInds2;
1943 const GlobalOrdinal gblRow =
1944 graph.
rowMap_->getGlobalElement (rowInfo.localRow);
1946 Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()) {
1947 os <<
"Local row index " << rowInfo.localRow <<
" is invalid!"
1951 bool getViewThrew =
false;
1953 this->getGlobalRowView (gblRow, gblColInds2, vals2);
1955 catch (std::exception& e) {
1956 getViewThrew =
true;
1957 os <<
"getGlobalRowView threw exception:" << std::endl
1958 << e.what () << std::endl;
1960 if (! getViewThrew) {
1961 os <<
"\tNew global column indices: ";
1962 for (
size_t jjj = 0; jjj < gblColInds2.extent(0); jjj++)
1963 os << gblColInds2[jjj] <<
" ";
1965 os <<
"\tNew values: ";
1966 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1967 os << vals2[jjj] <<
" ";
1972 else if (this->isLocallyIndexed ()) {
1973 local_inds_host_view_type lclColInds2;
1974 this->getLocalRowView (rowInfo.localRow, lclColInds2, vals2);
1975 os <<
"\tNew local column indices: ";
1976 for (
size_t jjj = 0; jjj < lclColInds2.extent(0); jjj++)
1977 os << lclColInds2[jjj] <<
" ";
1979 os <<
"\tNew values: ";
1980 for (
size_t jjj = 0; jjj < vals2.extent(0); jjj++)
1981 os << vals2[jjj] <<
" ";
1986 os <<
"Please report this bug to the Tpetra developers.";
1987 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1988 (
true, std::logic_error, os.str ());
1993 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1997 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
1998 const Teuchos::ArrayView<const Scalar>& values)
2000 using Teuchos::toString;
2003 typedef LocalOrdinal LO;
2004 typedef GlobalOrdinal GO;
2005 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2006 typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
2007 const char tfecfFuncName[] =
"insertGlobalValues: ";
2009#ifdef HAVE_TPETRA_DEBUG
2010 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2011 (values.size () != indices.size (), std::runtime_error,
2012 "values.size() = " << values.size () <<
" != indices.size() = "
2013 << indices.size () <<
".");
2018 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2021 if (lclRow == OTLO::invalid ()) {
2028 this->insertNonownedGlobalValues (gblRow, indices, values);
2031 if (this->isStaticGraph ()) {
2033 const int myRank = rowMap.
getComm ()->getRank ();
2034 const int numProcs = rowMap.
getComm ()->getSize ();
2035 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2036 (
true, std::runtime_error,
2037 "The matrix was constructed with a constant (\"static\") graph, "
2038 "yet the given global row index " << gblRow <<
" is in the row "
2039 "Map on the calling process (with rank " << myRank <<
", of " <<
2040 numProcs <<
" process(es)). In this case, you may not insert "
2041 "new entries into rows owned by the calling process.");
2044 crs_graph_type& graph = * (this->myGraph_);
2045 const IST*
const inputVals =
2046 reinterpret_cast<const IST*
> (values.getRawPtr ());
2047 const GO*
const inputGblColInds = indices.getRawPtr ();
2048 const size_t numInputEnt = indices.size ();
2049 RowInfo rowInfo = graph.getRowInfo (lclRow);
2057 if (! graph.colMap_.is_null ()) {
2058 const map_type& colMap = * (graph.colMap_);
2063#ifdef HAVE_TPETRA_DEBUG
2064 Teuchos::Array<GO> badColInds;
2066 const size_type numEntriesToInsert = indices.size ();
2067 bool allInColMap =
true;
2068 for (size_type k = 0; k < numEntriesToInsert; ++k) {
2070 allInColMap =
false;
2071#ifdef HAVE_TPETRA_DEBUG
2072 badColInds.push_back (indices[k]);
2078 if (! allInColMap) {
2079 std::ostringstream os;
2080 os <<
"You attempted to insert entries in owned row " << gblRow
2081 <<
", at the following column indices: " << toString (indices)
2083#ifdef HAVE_TPETRA_DEBUG
2084 os <<
"Of those, the following indices are not in the column Map "
2085 "on this process: " << toString (badColInds) <<
"." << endl
2086 <<
"Since the matrix has a column Map already, it is invalid "
2087 "to insert entries at those locations.";
2089 os <<
"At least one of those indices is not in the column Map "
2090 "on this process." << endl <<
"It is invalid to insert into "
2091 "columns not in the column Map on the process that owns the "
2094 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2095 (
true, std::invalid_argument, os.str ());
2099 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2100 inputVals, numInputEnt);
2105 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2109 const LocalOrdinal numEnt,
2110 const Scalar vals[],
2111 const GlobalOrdinal inds[])
2113 Teuchos::ArrayView<const GlobalOrdinal> indsT (inds, numEnt);
2114 Teuchos::ArrayView<const Scalar> valsT (vals, numEnt);
2115 this->insertGlobalValues (globalRow, indsT, valsT);
2119 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2123 const GlobalOrdinal gblRow,
2124 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2125 const Teuchos::ArrayView<const Scalar>& values,
2128 typedef impl_scalar_type IST;
2129 typedef LocalOrdinal LO;
2130 typedef GlobalOrdinal GO;
2131 typedef Tpetra::Details::OrdinalTraits<LO> OTLO;
2132 const char tfecfFuncName[] =
"insertGlobalValuesFiltered: ";
2135 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2136 (values.size () != indices.size (), std::runtime_error,
2137 "values.size() = " << values.size () <<
" != indices.size() = "
2138 << indices.size () <<
".");
2143 const map_type& rowMap = * (this->getCrsGraphRef ().rowMap_);
2145 if (lclRow == OTLO::invalid ()) {
2152 this->insertNonownedGlobalValues (gblRow, indices, values);
2155 if (this->isStaticGraph ()) {
2157 const int myRank = rowMap.getComm ()->getRank ();
2158 const int numProcs = rowMap.getComm ()->getSize ();
2159 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2160 (
true, std::runtime_error,
2161 "The matrix was constructed with a constant (\"static\") graph, "
2162 "yet the given global row index " << gblRow <<
" is in the row "
2163 "Map on the calling process (with rank " << myRank <<
", of " <<
2164 numProcs <<
" process(es)). In this case, you may not insert "
2165 "new entries into rows owned by the calling process.");
2168 crs_graph_type& graph = * (this->myGraph_);
2169 const IST*
const inputVals =
2170 reinterpret_cast<const IST*
> (values.getRawPtr ());
2171 const GO*
const inputGblColInds = indices.getRawPtr ();
2172 const size_t numInputEnt = indices.size ();
2173 RowInfo rowInfo = graph.getRowInfo (lclRow);
2175 if (!graph.colMap_.is_null() && graph.isLocallyIndexed()) {
2182 const map_type& colMap = * (graph.colMap_);
2183 size_t curOffset = 0;
2184 while (curOffset < numInputEnt) {
2188 Teuchos::Array<LO> lclIndices;
2189 size_t endOffset = curOffset;
2190 for ( ; endOffset < numInputEnt; ++endOffset) {
2192 if (lclIndex != OTLO::invalid())
2193 lclIndices.push_back(lclIndex);
2200 const LO numIndInSeq = (endOffset - curOffset);
2201 if (numIndInSeq != 0) {
2202 this->insertLocalValues(lclRow, lclIndices(), values(curOffset, numIndInSeq));
2208 const bool invariant = endOffset == numInputEnt ||
2209 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2210 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2211 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2213 curOffset = endOffset + 1;
2216 else if (! graph.colMap_.is_null ()) {
2217 const map_type& colMap = * (graph.colMap_);
2218 size_t curOffset = 0;
2219 while (curOffset < numInputEnt) {
2223 size_t endOffset = curOffset;
2224 for ( ; endOffset < numInputEnt &&
2225 colMap.getLocalElement (inputGblColInds[endOffset]) != OTLO::invalid ();
2231 const LO numIndInSeq = (endOffset - curOffset);
2232 if (numIndInSeq != 0) {
2233 rowInfo = graph.getRowInfo(lclRow);
2234 this->insertGlobalValuesImpl (graph, rowInfo,
2235 inputGblColInds + curOffset,
2236 inputVals + curOffset,
2243 const bool invariant = endOffset == numInputEnt ||
2244 colMap.getLocalElement (inputGblColInds[endOffset]) == OTLO::invalid ();
2245 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2246 (! invariant, std::logic_error, std::endl <<
"Invariant failed!");
2248 curOffset = endOffset + 1;
2252 this->insertGlobalValuesImpl (graph, rowInfo, inputGblColInds,
2253 inputVals, numInputEnt);
2258 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2260 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2261 insertGlobalValuesFilteredChecked(
2262 const GlobalOrdinal gblRow,
2263 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2264 const Teuchos::ArrayView<const Scalar>& values,
2265 const char*
const prefix,
2269 using Details::verbosePrintArray;
2273 insertGlobalValuesFiltered(gblRow, indices, values, debug);
2275 catch(std::exception& e) {
2276 std::ostringstream os;
2278 const size_t maxNumToPrint =
2279 Details::Behavior::verbosePrintCountThreshold();
2280 os << *prefix <<
": insertGlobalValuesFiltered threw an "
2281 "exception: " << e.what() << endl
2282 <<
"Global row index: " << gblRow << endl;
2283 verbosePrintArray(os, indices,
"Global column indices",
2286 verbosePrintArray(os, values,
"Values", maxNumToPrint);
2290 os <<
": insertGlobalValuesFiltered threw an exception: "
2293 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error, os.str());
2297 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2303 const LocalOrdinal inds[],
2305 const LocalOrdinal numElts)
2307 typedef LocalOrdinal LO;
2308 typedef GlobalOrdinal GO;
2309 const bool sorted = graph.
isSorted ();
2319 for (LO j = 0; j < numElts; ++j) {
2320 const LO lclColInd = inds[j];
2321 const size_t offset =
2322 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2323 lclColInd, hint, sorted);
2324 if (offset != rowInfo.numEntries) {
2325 rowVals[offset] = newVals[j];
2332 if (graph.
colMap_.is_null ()) {
2333 return Teuchos::OrdinalTraits<LO>::invalid ();
2335 const map_type colMap = * (graph.
colMap_);
2341 for (LO j = 0; j < numElts; ++j) {
2342 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2343 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
2344 const size_t offset =
2345 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2346 gblColInd, hint, sorted);
2347 if (offset != rowInfo.numEntries) {
2348 rowVals[offset] = newVals[j];
2367 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2371 const Teuchos::ArrayView<const LocalOrdinal>& lclCols,
2372 const Teuchos::ArrayView<const Scalar>& vals)
2374 typedef LocalOrdinal LO;
2376 const LO numInputEnt =
static_cast<LO
> (lclCols.size ());
2377 if (
static_cast<LO
> (vals.size ()) != numInputEnt) {
2378 return Teuchos::OrdinalTraits<LO>::invalid ();
2380 const LO*
const inputInds = lclCols.getRawPtr ();
2381 const Scalar*
const inputVals = vals.getRawPtr ();
2382 return this->replaceLocalValues (localRow, numInputEnt,
2383 inputVals, inputInds);
2386 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2392 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2393 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2396 const LO numInputEnt = inputInds.extent(0);
2397 if (numInputEnt !=
static_cast<LO
>(inputVals.extent(0))) {
2398 return Teuchos::OrdinalTraits<LO>::invalid();
2400 const Scalar*
const inVals =
2401 reinterpret_cast<const Scalar*
>(inputVals.data());
2402 return this->replaceLocalValues(localRow, numInputEnt,
2403 inVals, inputInds.data());
2406 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2410 const LocalOrdinal numEnt,
2411 const Scalar inputVals[],
2412 const LocalOrdinal inputCols[])
2415 typedef LocalOrdinal LO;
2417 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2419 return Teuchos::OrdinalTraits<LO>::invalid ();
2424 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2427 return static_cast<LO
> (0);
2429 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2430 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2431 return this->replaceLocalValuesImpl (curRowVals.data (), graph, rowInfo,
2432 inputCols, inVals, numEnt);
2435 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2441 const GlobalOrdinal inds[],
2443 const LocalOrdinal numElts)
2445 Teuchos::ArrayView<const GlobalOrdinal> indsT(inds, numElts);
2447 [&](
size_t const k,
size_t const ,
size_t const offset) {
2448 rowVals[offset] = newVals[k];
2450 std::function<void(
size_t const,
size_t const,
size_t const)> cb(std::ref(fun));
2454 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2458 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2459 const Teuchos::ArrayView<const Scalar>& inputVals)
2461 typedef LocalOrdinal LO;
2463 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2464 if (
static_cast<LO
> (inputVals.size ()) != numInputEnt) {
2465 return Teuchos::OrdinalTraits<LO>::invalid ();
2467 return this->replaceGlobalValues (globalRow, numInputEnt,
2468 inputVals.getRawPtr (),
2469 inputGblColInds.getRawPtr ());
2472 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2476 const LocalOrdinal numEnt,
2477 const Scalar inputVals[],
2478 const GlobalOrdinal inputGblColInds[])
2481 typedef LocalOrdinal LO;
2483 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2485 return Teuchos::OrdinalTraits<LO>::invalid ();
2490 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2493 return static_cast<LO
> (0);
2496 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2497 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2498 return this->replaceGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2499 inputGblColInds, inVals, numEnt);
2502 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2508 const Kokkos::View<const global_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
2509 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals)
2518 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
2519 if (
static_cast<LO
>(inputVals.extent(0)) != numInputEnt) {
2520 return Teuchos::OrdinalTraits<LO>::invalid();
2522 const Scalar*
const inVals =
2523 reinterpret_cast<const Scalar*
>(inputVals.data());
2524 return this->replaceGlobalValues(globalRow, numInputEnt, inVals,
2528 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2534 const GlobalOrdinal inds[],
2536 const LocalOrdinal numElts,
2539 typedef LocalOrdinal LO;
2540 typedef GlobalOrdinal GO;
2542 const bool sorted = graph.
isSorted ();
2551 if (graph.
colMap_.is_null ()) {
2562 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2564 for (LO j = 0; j < numElts; ++j) {
2566 if (lclColInd != LINV) {
2567 const size_t offset =
2568 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2569 lclColInd, hint, sorted);
2570 if (offset != rowInfo.numEntries) {
2572 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2575 rowVals[offset] += newVals[j];
2588 for (LO j = 0; j < numElts; ++j) {
2589 const GO gblColInd = inds[j];
2590 const size_t offset =
2591 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2592 gblColInd, hint, sorted);
2593 if (offset != rowInfo.numEntries) {
2595 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
2598 rowVals[offset] += newVals[j];
2612 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2616 const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds,
2617 const Teuchos::ArrayView<const Scalar>& inputVals,
2620 typedef LocalOrdinal LO;
2622 const LO numInputEnt =
static_cast<LO
> (inputGblColInds.size ());
2623 if (
static_cast<LO
> (inputVals.size ()) != numInputEnt) {
2624 return Teuchos::OrdinalTraits<LO>::invalid ();
2626 return this->sumIntoGlobalValues (gblRow, numInputEnt,
2627 inputVals.getRawPtr (),
2628 inputGblColInds.getRawPtr (),
2632 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2636 const LocalOrdinal numInputEnt,
2637 const Scalar inputVals[],
2638 const GlobalOrdinal inputGblColInds[],
2642 typedef LocalOrdinal LO;
2643 typedef GlobalOrdinal GO;
2645 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2647 return Teuchos::OrdinalTraits<LO>::invalid ();
2652 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2657 using Teuchos::ArrayView;
2658 ArrayView<const GO> inputGblColInds_av(
2659 numInputEnt == 0 ?
nullptr : inputGblColInds,
2661 ArrayView<const Scalar> inputVals_av(
2662 numInputEnt == 0 ?
nullptr :
2663 inputVals, numInputEnt);
2668 this->insertNonownedGlobalValues (gblRow, inputGblColInds_av,
2679 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2680 const IST*
const inVals =
reinterpret_cast<const IST*
> (inputVals);
2681 return this->sumIntoGlobalValuesImpl (curRowVals.data (), graph, rowInfo,
2682 inputGblColInds, inVals,
2683 numInputEnt, atomic);
2687 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2689 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2690 transformLocalValues (
const LocalOrdinal lclRow,
2691 const LocalOrdinal numInputEnt,
2692 const impl_scalar_type inputVals[],
2693 const LocalOrdinal inputCols[],
2694 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2697 using Tpetra::Details::OrdinalTraits;
2698 typedef LocalOrdinal LO;
2700 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2702 return Teuchos::OrdinalTraits<LO>::invalid ();
2704 const crs_graph_type& graph = * (this->staticGraph_);
2705 const RowInfo rowInfo = graph.getRowInfo (lclRow);
2707 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2710 return static_cast<LO
> (0);
2712 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2713 return this->transformLocalValues (curRowVals.data (), graph,
2714 rowInfo, inputCols, inputVals,
2715 numInputEnt, f, atomic);
2718 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2722 const LocalOrdinal numInputEnt,
2723 const impl_scalar_type inputVals[],
2724 const GlobalOrdinal inputCols[],
2725 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2728 using Tpetra::Details::OrdinalTraits;
2729 typedef LocalOrdinal LO;
2731 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
2733 return OrdinalTraits<LO>::invalid ();
2735 const crs_graph_type& graph = * (this->staticGraph_);
2736 const RowInfo rowInfo = graph.getRowInfoFromGlobalRowIndex (gblRow);
2738 if (rowInfo.localRow == OrdinalTraits<size_t>::invalid ()) {
2741 return static_cast<LO
> (0);
2743 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
2744 return this->transformGlobalValues (curRowVals.data (), graph,
2745 rowInfo, inputCols, inputVals,
2746 numInputEnt, f, atomic);
2749 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2751 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
2752 transformLocalValues (impl_scalar_type rowVals[],
2753 const crs_graph_type& graph,
2754 const RowInfo& rowInfo,
2755 const LocalOrdinal inds[],
2756 const impl_scalar_type newVals[],
2757 const LocalOrdinal numElts,
2758 std::function<impl_scalar_type (
const impl_scalar_type&,
const impl_scalar_type&) > f,
2761 typedef impl_scalar_type ST;
2762 typedef LocalOrdinal LO;
2763 typedef GlobalOrdinal GO;
2770 const bool sorted = graph.isSorted ();
2775 if (graph.isLocallyIndexed ()) {
2778 auto colInds = graph.getLocalIndsViewHost (rowInfo);
2780 for (LO j = 0; j < numElts; ++j) {
2781 const LO lclColInd = inds[j];
2782 const size_t offset =
2783 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2784 lclColInd, hint, sorted);
2785 if (offset != rowInfo.numEntries) {
2794 volatile ST*
const dest = &rowVals[offset];
2795 (void) atomic_binary_function_update (dest, newVals[j], f);
2799 rowVals[offset] = f (rowVals[offset], newVals[j]);
2806 else if (graph.isGloballyIndexed ()) {
2810 if (graph.colMap_.is_null ()) {
2817 const map_type& colMap = * (graph.colMap_);
2820 auto colInds = graph.getGlobalIndsViewHost (rowInfo);
2822 const GO GINV = Teuchos::OrdinalTraits<GO>::invalid ();
2823 for (LO j = 0; j < numElts; ++j) {
2824 const GO gblColInd = colMap.getGlobalElement (inds[j]);
2825 if (gblColInd != GINV) {
2826 const size_t offset =
2827 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2828 gblColInd, hint, sorted);
2829 if (offset != rowInfo.numEntries) {
2838 volatile ST*
const dest = &rowVals[offset];
2839 (void) atomic_binary_function_update (dest, newVals[j], f);
2843 rowVals[offset] = f (rowVals[offset], newVals[j]);
2858 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2864 const GlobalOrdinal inds[],
2866 const LocalOrdinal numElts,
2871 typedef LocalOrdinal LO;
2872 typedef GlobalOrdinal GO;
2879 const bool sorted = graph.
isSorted ();
2889 for (LO j = 0; j < numElts; ++j) {
2890 const GO gblColInd = inds[j];
2891 const size_t offset =
2892 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2893 gblColInd, hint, sorted);
2894 if (offset != rowInfo.numEntries) {
2903 volatile ST*
const dest = &rowVals[offset];
2904 (void) atomic_binary_function_update (dest, newVals[j], f);
2908 rowVals[offset] = f (rowVals[offset], newVals[j]);
2919 if (graph.
colMap_.is_null ()) {
2925 const map_type& colMap = * (graph.
colMap_);
2930 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
2931 for (LO j = 0; j < numElts; ++j) {
2932 const LO lclColInd = colMap.getLocalElement (inds[j]);
2933 if (lclColInd != LINV) {
2934 const size_t offset =
2935 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2936 lclColInd, hint, sorted);
2937 if (offset != rowInfo.numEntries) {
2946 volatile ST*
const dest = &rowVals[offset];
2947 (void) atomic_binary_function_update (dest, newVals[j], f);
2951 rowVals[offset] = f (rowVals[offset], newVals[j]);
2966 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2972 const LocalOrdinal inds[],
2974 const LocalOrdinal numElts,
2977 typedef LocalOrdinal LO;
2978 typedef GlobalOrdinal GO;
2980 const bool sorted = graph.
isSorted ();
2990 for (LO j = 0; j < numElts; ++j) {
2991 const LO lclColInd = inds[j];
2992 const size_t offset =
2993 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
2994 lclColInd, hint, sorted);
2995 if (offset != rowInfo.numEntries) {
2997 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3000 rowVals[offset] += newVals[j];
3008 if (graph.
colMap_.is_null ()) {
3009 return Teuchos::OrdinalTraits<LO>::invalid ();
3017 for (LO j = 0; j < numElts; ++j) {
3019 if (gblColInd != Teuchos::OrdinalTraits<GO>::invalid ()) {
3020 const size_t offset =
3021 KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
3022 gblColInd, hint, sorted);
3023 if (offset != rowInfo.numEntries) {
3025 Kokkos::atomic_add (&rowVals[offset], newVals[j]);
3028 rowVals[offset] += newVals[j];
3048 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3052 const Teuchos::ArrayView<const LocalOrdinal>& indices,
3053 const Teuchos::ArrayView<const Scalar>& values,
3057 const LO numInputEnt =
static_cast<LO
>(indices.size());
3058 if (
static_cast<LO
>(values.size()) != numInputEnt) {
3059 return Teuchos::OrdinalTraits<LO>::invalid();
3061 const LO*
const inputInds = indices.getRawPtr();
3062 const scalar_type*
const inputVals = values.getRawPtr();
3063 return this->sumIntoLocalValues(localRow, numInputEnt,
3064 inputVals, inputInds, atomic);
3067 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3073 const Kokkos::View<const local_ordinal_type*, Kokkos::AnonymousSpace>& inputInds,
3074 const Kokkos::View<const impl_scalar_type*, Kokkos::AnonymousSpace>& inputVals,
3078 const LO numInputEnt =
static_cast<LO
>(inputInds.extent(0));
3079 if (
static_cast<LO
>(inputVals.extent(0)) != numInputEnt) {
3080 return Teuchos::OrdinalTraits<LO>::invalid();
3083 reinterpret_cast<const scalar_type*
>(inputVals.data());
3084 return this->sumIntoLocalValues(localRow, numInputEnt, inVals,
3085 inputInds.data(), atomic);
3088 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3092 const LocalOrdinal numEnt,
3093 const Scalar vals[],
3094 const LocalOrdinal cols[],
3098 typedef LocalOrdinal LO;
3100 if (! this->isFillActive () || this->staticGraph_.is_null ()) {
3102 return Teuchos::OrdinalTraits<LO>::invalid ();
3107 if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
3110 return static_cast<LO
> (0);
3112 auto curRowVals = this->getValuesViewHostNonConst (rowInfo);
3113 const IST*
const inputVals =
reinterpret_cast<const IST*
> (vals);
3114 return this->sumIntoLocalValuesImpl (curRowVals.data (), graph, rowInfo,
3115 cols, inputVals, numEnt, atomic);
3118 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3120 values_dualv_type::t_host::const_type
3124 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3125 return typename values_dualv_type::t_host::const_type ();
3127 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3132 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3134 values_dualv_type::t_host
3138 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3139 return typename values_dualv_type::t_host ();
3141 return valuesUnpacked_wdv.getHostSubview(rowinfo.offset1D,
3146 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3148 values_dualv_type::t_dev::const_type
3152 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3153 return typename values_dualv_type::t_dev::const_type ();
3155 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3160 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3162 values_dualv_type::t_dev
3166 if (rowinfo.allocSize == 0 || valuesUnpacked_wdv.extent(0) == 0)
3167 return typename values_dualv_type::t_dev ();
3169 return valuesUnpacked_wdv.getDeviceSubview(rowinfo.offset1D,
3175 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3179 nonconst_local_inds_host_view_type &indices,
3180 nonconst_values_host_view_type &values,
3181 size_t& numEntries)
const
3183 using Teuchos::ArrayView;
3184 using Teuchos::av_reinterpret_cast;
3185 const char tfecfFuncName[] =
"getLocalRowCopy: ";
3187 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3188 (! this->hasColMap (), std::runtime_error,
3189 "The matrix does not have a column Map yet. This means we don't have "
3190 "local indices for columns yet, so it doesn't make sense to call this "
3191 "method. If the matrix doesn't have a column Map yet, you should call "
3192 "fillComplete on it first.");
3194 const RowInfo rowinfo = staticGraph_->getRowInfo (localRow);
3195 const size_t theNumEntries = rowinfo.numEntries;
3196 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3197 (
static_cast<size_t> (indices.size ()) < theNumEntries ||
3198 static_cast<size_t> (values.size ()) < theNumEntries,
3199 std::runtime_error,
"Row with local index " << localRow <<
" has " <<
3200 theNumEntries <<
" entry/ies, but indices.size() = " <<
3201 indices.size () <<
" and values.size() = " << values.size () <<
".");
3202 numEntries = theNumEntries;
3204 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3205 if (staticGraph_->isLocallyIndexed ()) {
3206 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3207 auto curVals = getValuesViewHost(rowinfo);
3209 for (
size_t j = 0; j < theNumEntries; ++j) {
3210 values[j] = curVals[j];
3211 indices[j] = curLclInds(j);
3214 else if (staticGraph_->isGloballyIndexed ()) {
3216 const map_type& colMap = * (staticGraph_->colMap_);
3217 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3218 auto curVals = getValuesViewHost(rowinfo);
3220 for (
size_t j = 0; j < theNumEntries; ++j) {
3221 values[j] = curVals[j];
3229template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3233 nonconst_global_inds_host_view_type &indices,
3234 nonconst_values_host_view_type &values,
3235 size_t& numEntries)
const
3237 using Teuchos::ArrayView;
3238 using Teuchos::av_reinterpret_cast;
3239 const char tfecfFuncName[] =
"getGlobalRowCopy: ";
3242 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3243 const size_t theNumEntries = rowinfo.numEntries;
3244 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3245 static_cast<size_t> (indices.size ()) < theNumEntries ||
3246 static_cast<size_t> (values.size ()) < theNumEntries,
3247 std::runtime_error,
"Row with global index " << globalRow <<
" has "
3248 << theNumEntries <<
" entry/ies, but indices.size() = " <<
3249 indices.size () <<
" and values.size() = " << values.size () <<
".");
3250 numEntries = theNumEntries;
3252 if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3253 if (staticGraph_->isLocallyIndexed ()) {
3254 const map_type& colMap = * (staticGraph_->colMap_);
3255 auto curLclInds = staticGraph_->getLocalIndsViewHost(rowinfo);
3256 auto curVals = getValuesViewHost(rowinfo);
3258 for (
size_t j = 0; j < theNumEntries; ++j) {
3259 values[j] = curVals[j];
3263 else if (staticGraph_->isGloballyIndexed ()) {
3264 auto curGblInds = staticGraph_->getGlobalIndsViewHost(rowinfo);
3265 auto curVals = getValuesViewHost(rowinfo);
3267 for (
size_t j = 0; j < theNumEntries; ++j) {
3268 values[j] = curVals[j];
3269 indices[j] = curGblInds(j);
3276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3280 local_inds_host_view_type &indices,
3281 values_host_view_type &values)
const
3283 const char tfecfFuncName[] =
"getLocalRowView: ";
3285 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3286 isGloballyIndexed (), std::runtime_error,
"The matrix currently stores "
3287 "its indices as global indices, so you cannot get a view with local "
3288 "column indices. If the matrix has a column Map, you may call "
3289 "getLocalRowCopy() to get local column indices; otherwise, you may get "
3290 "a view with global column indices by calling getGlobalRowCopy().");
3292 const RowInfo rowInfo = staticGraph_->getRowInfo (localRow);
3293 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3294 rowInfo.numEntries > 0) {
3295 indices = staticGraph_->lclIndsUnpacked_wdv.getHostSubview(
3299 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3306 indices = local_inds_host_view_type();
3307 values = values_host_view_type();
3310#ifdef HAVE_TPETRA_DEBUG
3311 const char suffix[] =
". This should never happen. Please report this "
3312 "bug to the Tpetra developers.";
3313 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3314 (
static_cast<size_t> (indices.size ()) !=
3315 static_cast<size_t> (values.size ()), std::logic_error,
3316 "At the end of this method, for local row " << localRow <<
", "
3317 "indices.size() = " << indices.size () <<
" != values.size () = "
3318 << values.size () << suffix);
3319 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3320 (
static_cast<size_t> (indices.size ()) !=
3321 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3322 "At the end of this method, for local row " << localRow <<
", "
3323 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3324 << rowInfo.numEntries << suffix);
3325 const size_t expectedNumEntries = getNumEntriesInLocalRow (localRow);
3326 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3327 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3328 "of this method, for local row " << localRow <<
", rowInfo.numEntries = "
3329 << rowInfo.numEntries <<
" != getNumEntriesInLocalRow(localRow) = " <<
3330 expectedNumEntries << suffix);
3335 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3339 global_inds_host_view_type &indices,
3340 values_host_view_type &values)
const
3342 const char tfecfFuncName[] =
"getGlobalRowView: ";
3344 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3345 isLocallyIndexed (), std::runtime_error,
3346 "The matrix is locally indexed, so we cannot return a view of the row "
3347 "with global column indices. Use getGlobalRowCopy() instead.");
3352 staticGraph_->getRowInfoFromGlobalRowIndex (globalRow);
3353 if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3354 rowInfo.numEntries > 0) {
3355 indices = staticGraph_->gblInds_wdv.getHostSubview(rowInfo.offset1D,
3358 values = valuesUnpacked_wdv.getHostSubview(rowInfo.offset1D,
3363 indices = global_inds_host_view_type();
3364 values = values_host_view_type();
3367#ifdef HAVE_TPETRA_DEBUG
3368 const char suffix[] =
". This should never happen. Please report this "
3369 "bug to the Tpetra developers.";
3370 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3371 (
static_cast<size_t> (indices.size ()) !=
3372 static_cast<size_t> (values.size ()), std::logic_error,
3373 "At the end of this method, for global row " << globalRow <<
", "
3374 "indices.size() = " << indices.size () <<
" != values.size () = "
3375 << values.size () << suffix);
3376 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3377 (
static_cast<size_t> (indices.size ()) !=
3378 static_cast<size_t> (rowInfo.numEntries), std::logic_error,
3379 "At the end of this method, for global row " << globalRow <<
", "
3380 "indices.size() = " << indices.size () <<
" != rowInfo.numEntries = "
3381 << rowInfo.numEntries << suffix);
3382 const size_t expectedNumEntries = getNumEntriesInGlobalRow (globalRow);
3383 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3384 (rowInfo.numEntries != expectedNumEntries, std::logic_error,
"At the end "
3385 "of this method, for global row " << globalRow <<
", rowInfo.numEntries "
3386 "= " << rowInfo.numEntries <<
" != getNumEntriesInGlobalRow(globalRow) ="
3387 " " << expectedNumEntries << suffix);
3392 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3395 scale (
const Scalar& alpha)
3400 const size_t numEntries = staticGraph_->getLocalNumEntries ();
3401 if (! staticGraph_->indicesAreAllocated () ||
3402 nlrs == 0 || numEntries == 0) {
3407 auto vals = valuesPacked_wdv.getDeviceView(Access::ReadWrite);
3408 KokkosBlas::scal(vals, theAlpha, vals);
3413 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3425 if (! staticGraph_->indicesAreAllocated () || numEntries == 0) {
3430 Kokkos::deep_copy (
execution_space(), valuesUnpacked_wdv.getDeviceView(Access::OverwriteAll),
3437 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3440 setAllValues (
const typename local_graph_device_type::row_map_type& rowPointers,
3441 const typename local_graph_device_type::entries_type::non_const_type& columnIndices,
3442 const typename local_matrix_device_type::values_type& values)
3445 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues");
3446 const char tfecfFuncName[] =
"setAllValues: ";
3447 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3448 (columnIndices.size () != values.size (), std::invalid_argument,
3449 "columnIndices.size() = " << columnIndices.size () <<
" != values.size()"
3450 " = " << values.size () <<
".");
3451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3452 (myGraph_.is_null (), std::runtime_error,
"myGraph_ must not be null.");
3455 myGraph_->setAllIndices (rowPointers, columnIndices);
3457 catch (std::exception &e) {
3458 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3459 (
true, std::runtime_error,
"myGraph_->setAllIndices() threw an "
3460 "exception: " << e.what ());
3467 auto lclGraph = myGraph_->getLocalGraphDevice ();
3468 const size_t numEnt = lclGraph.entries.extent (0);
3469 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3470 (lclGraph.row_map.extent (0) != rowPointers.extent (0) ||
3471 numEnt !=
static_cast<size_t> (columnIndices.extent (0)),
3472 std::logic_error,
"myGraph_->setAllIndices() did not correctly create "
3473 "local graph. Please report this bug to the Tpetra developers.");
3475 valuesPacked_wdv = values_wdv_type(values);
3476 valuesUnpacked_wdv = valuesPacked_wdv;
3480 this->storageStatus_ = Details::STORAGE_1D_PACKED;
3482 checkInternalState ();
3485 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3491 ProfilingRegion region (
"Tpetra::CrsMatrix::setAllValues from KokkosSparse::CrsMatrix");
3493 auto graph = localDeviceMatrix.graph;
3496 auto rows = graph.row_map;
3497 auto columns = graph.entries;
3498 auto values = localDeviceMatrix.values;
3500 setAllValues(rows,columns,values);
3503 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3507 const Teuchos::ArrayRCP<LocalOrdinal>& ind,
3508 const Teuchos::ArrayRCP<Scalar>& val)
3510 using Kokkos::Compat::getKokkosViewDeepCopy;
3511 using Teuchos::ArrayRCP;
3512 using Teuchos::av_reinterpret_cast;
3515 typedef typename local_graph_device_type::row_map_type row_map_type;
3517 const char tfecfFuncName[] =
"setAllValues(ArrayRCP<size_t>, ArrayRCP<LO>, ArrayRCP<Scalar>): ";
3523 typename row_map_type::non_const_type ptrNative (
"ptr", ptr.size ());
3524 Kokkos::View<
const size_t*,
3525 typename row_map_type::array_layout,
3527 Kokkos::MemoryUnmanaged> ptrSizeT (ptr.getRawPtr (), ptr.size ());
3530 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3531 (ptrNative.extent (0) != ptrSizeT.extent (0),
3532 std::logic_error,
"ptrNative.extent(0) = " <<
3533 ptrNative.extent (0) <<
" != ptrSizeT.extent(0) = "
3534 << ptrSizeT.extent (0) <<
". Please report this bug to the "
3535 "Tpetra developers.");
3537 auto indIn = getKokkosViewDeepCopy<DT> (ind ());
3538 auto valIn = getKokkosViewDeepCopy<DT> (av_reinterpret_cast<IST> (val ()));
3539 this->setAllValues (ptrNative, indIn, valIn);
3542 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3547 const char tfecfFuncName[] =
"getLocalDiagOffsets: ";
3548 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3549 (staticGraph_.is_null (), std::runtime_error,
"The matrix has no graph.");
3556 const size_t lclNumRows = staticGraph_->getLocalNumRows ();
3557 if (
static_cast<size_t> (offsets.size ()) < lclNumRows) {
3558 offsets.resize (lclNumRows);
3564 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
3569 Kokkos::MemoryUnmanaged> output_type;
3570 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3571 staticGraph_->getLocalDiagOffsets (offsetsOut);
3574 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
3575 staticGraph_->getLocalDiagOffsets (offsetsTmp);
3576 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
3577 Kokkos::MemoryUnmanaged> output_type;
3578 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
3584 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3589 using Teuchos::ArrayRCP;
3590 using Teuchos::ArrayView;
3591 using Teuchos::av_reinterpret_cast;
3592 const char tfecfFuncName[] =
"getLocalDiagCopy (1-arg): ";
3596 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3597 staticGraph_.is_null (), std::runtime_error,
3598 "This method requires that the matrix have a graph.");
3599 auto rowMapPtr = this->getRowMap ();
3600 if (rowMapPtr.is_null () || rowMapPtr->getComm ().is_null ()) {
3606 auto colMapPtr = this->getColMap ();
3607 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3608 (! this->hasColMap () || colMapPtr.is_null (), std::runtime_error,
3609 "This method requires that the matrix have a column Map.");
3610 const map_type& rowMap = * rowMapPtr;
3611 const map_type& colMap = * colMapPtr;
3612 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3614#ifdef HAVE_TPETRA_DEBUG
3617 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3618 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3619 "The input Vector's Map must be compatible with the CrsMatrix's row "
3620 "Map. You may check this by using Map's isCompatible method: "
3621 "diag.getMap ()->isCompatible (A.getRowMap ());");
3624 if (this->isFillComplete ()) {
3627 const auto D_lcl_1d =
3628 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3632 using ::Tpetra::Details::getDiagCopyWithoutOffsets;
3633 (void) getDiagCopyWithoutOffsets (D_lcl_1d, lclRowMap,
3635 getLocalMatrixDevice ());
3638 using ::Tpetra::Details::getLocalDiagCopyWithoutOffsetsNotFillComplete;
3639 (void) getLocalDiagCopyWithoutOffsetsNotFillComplete (diag, *
this);
3643 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3648 Kokkos::MemoryUnmanaged>& offsets)
const
3650 typedef LocalOrdinal LO;
3652#ifdef HAVE_TPETRA_DEBUG
3653 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3654 const map_type& rowMap = * (this->getRowMap ());
3657 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3658 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3659 "The input Vector's Map must be compatible with (in the sense of Map::"
3660 "isCompatible) the CrsMatrix's row Map.");
3671 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3674 Kokkos::subview (D_lcl, Kokkos::make_pair (LO (0), myNumRows), 0);
3676 KokkosSparse::getDiagCopy (D_lcl_1d, offsets,
3677 getLocalMatrixDevice ());
3680 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3684 const Teuchos::ArrayView<const size_t>& offsets)
const
3686 using LO = LocalOrdinal;
3687 using host_execution_space = Kokkos::DefaultHostExecutionSpace;
3690#ifdef HAVE_TPETRA_DEBUG
3691 const char tfecfFuncName[] =
"getLocalDiagCopy: ";
3692 const map_type& rowMap = * (this->getRowMap ());
3695 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3696 ! diag.
getMap ()->isCompatible (rowMap), std::runtime_error,
3697 "The input Vector's Map must be compatible with (in the sense of Map::"
3698 "isCompatible) the CrsMatrix's row Map.");
3711 auto lclVecHost1d = Kokkos::subview (lclVecHost, Kokkos::ALL (), 0);
3713 using host_offsets_view_type =
3714 Kokkos::View<
const size_t*, Kokkos::HostSpace,
3715 Kokkos::MemoryTraits<Kokkos::Unmanaged> >;
3716 host_offsets_view_type h_offsets (offsets.getRawPtr (), offsets.size ());
3718 using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
3719 const LO myNumRows =
static_cast<LO
> (this->getLocalNumRows ());
3720 const size_t INV = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
3722 auto rowPtrsPackedHost = staticGraph_->rowPtrsPacked_host_;
3723 auto valuesPackedHost = valuesPacked_wdv.getHostView(Access::ReadOnly);
3724 Kokkos::parallel_for
3725 (
"Tpetra::CrsMatrix::getLocalDiagCopy",
3726 range_type (0, myNumRows),
3727 [&, INV, h_offsets] (
const LO lclRow) {
3728 lclVecHost1d(lclRow) = STS::zero ();
3729 if (h_offsets[lclRow] != INV) {
3730 auto curRowOffset = rowPtrsPackedHost (lclRow);
3731 lclVecHost1d(lclRow) =
3732 static_cast<IST
> (valuesPackedHost(curRowOffset+h_offsets[lclRow]));
3739 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3744 using ::Tpetra::Details::ProfilingRegion;
3745 using Teuchos::ArrayRCP;
3746 using Teuchos::ArrayView;
3747 using Teuchos::null;
3750 using Teuchos::rcpFromRef;
3752 const char tfecfFuncName[] =
"leftScale: ";
3754 ProfilingRegion region (
"Tpetra::CrsMatrix::leftScale");
3756 RCP<const vec_type> xp;
3757 if (this->getRangeMap ()->isSameAs (* (x.
getMap ()))) {
3760 auto exporter = this->getCrsGraphRef ().getExporter ();
3761 if (exporter.get () !=
nullptr) {
3762 RCP<vec_type> tempVec (
new vec_type (this->getRowMap ()));
3763 tempVec->doImport (x, *exporter,
REPLACE);
3767 xp = rcpFromRef (x);
3770 else if (this->getRowMap ()->isSameAs (* (x.
getMap ()))) {
3771 xp = rcpFromRef (x);
3774 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3775 (
true, std::invalid_argument,
"x's Map must be the same as "
3776 "either the row Map or the range Map of the CrsMatrix.");
3779 if (this->isFillComplete()) {
3780 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3781 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3782 using ::Tpetra::Details::leftScaleLocalCrsMatrix;
3783 leftScaleLocalCrsMatrix (getLocalMatrixDevice (),
3784 x_lcl_1d,
false,
false);
3788 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3789 (
true, std::runtime_error,
"CrsMatrix::leftScale requires matrix to be"
3794 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3799 using ::Tpetra::Details::ProfilingRegion;
3800 using Teuchos::ArrayRCP;
3801 using Teuchos::ArrayView;
3802 using Teuchos::null;
3805 using Teuchos::rcpFromRef;
3807 const char tfecfFuncName[] =
"rightScale: ";
3809 ProfilingRegion region (
"Tpetra::CrsMatrix::rightScale");
3811 RCP<const vec_type> xp;
3812 if (this->getDomainMap ()->isSameAs (* (x.
getMap ()))) {
3815 auto importer = this->getCrsGraphRef ().getImporter ();
3816 if (importer.get () !=
nullptr) {
3817 RCP<vec_type> tempVec (
new vec_type (this->getColMap ()));
3818 tempVec->doImport (x, *importer,
REPLACE);
3822 xp = rcpFromRef (x);
3825 else if (this->getColMap ()->isSameAs (* (x.
getMap ()))) {
3826 xp = rcpFromRef (x);
3828 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3829 (
true, std::runtime_error,
"x's Map must be the same as "
3830 "either the domain Map or the column Map of the CrsMatrix.");
3833 if (this->isFillComplete()) {
3834 auto x_lcl = xp->getLocalViewDevice (Access::ReadOnly);
3835 auto x_lcl_1d = Kokkos::subview (x_lcl, Kokkos::ALL (), 0);
3836 using ::Tpetra::Details::rightScaleLocalCrsMatrix;
3837 rightScaleLocalCrsMatrix (getLocalMatrixDevice (),
3838 x_lcl_1d,
false,
false);
3842 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3843 (
true, std::runtime_error,
"CrsMatrix::rightScale requires matrix to be"
3848 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3849 typename CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::mag_type
3853 using Teuchos::ArrayView;
3854 using Teuchos::outArg;
3855 using Teuchos::REDUCE_SUM;
3856 using Teuchos::reduceAll;
3864 if (getLocalNumEntries() > 0) {
3865 if (isStorageOptimized ()) {
3868 const size_t numEntries = getLocalNumEntries ();
3869 auto values = valuesPacked_wdv.getHostView(Access::ReadOnly);
3870 for (
size_t k = 0; k < numEntries; ++k) {
3871 auto val = values[k];
3875 const mag_type val_abs = STS::abs (val);
3876 mySum += val_abs * val_abs;
3880 const LocalOrdinal numRows =
3881 static_cast<LocalOrdinal
> (this->getLocalNumRows ());
3882 for (LocalOrdinal r = 0; r < numRows; ++r) {
3883 const RowInfo rowInfo = myGraph_->getRowInfo (r);
3884 const size_t numEntries = rowInfo.numEntries;
3885 auto A_r = this->getValuesViewHost(rowInfo);
3886 for (
size_t k = 0; k < numEntries; ++k) {
3888 const mag_type val_abs = STS::abs (val);
3889 mySum += val_abs * val_abs;
3894 mag_type totalSum = STM::zero ();
3895 reduceAll<int, mag_type> (* (getComm ()), REDUCE_SUM,
3896 mySum, outArg (totalSum));
3897 return STM::sqrt (totalSum);
3900 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3903 replaceColMap (
const Teuchos::RCP<const map_type>& newColMap)
3905 const char tfecfFuncName[] =
"replaceColMap: ";
3909 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3910 myGraph_.is_null (), std::runtime_error,
3911 "This method does not work if the matrix has a const graph. The whole "
3912 "idea of a const graph is that you are not allowed to change it, but "
3913 "this method necessarily must modify the graph, since the graph owns "
3914 "the matrix's column Map.");
3915 myGraph_->replaceColMap (newColMap);
3918 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3922 const Teuchos::RCP<const map_type>& newColMap,
3923 const Teuchos::RCP<const import_type>& newImport,
3924 const bool sortEachRow)
3926 const char tfecfFuncName[] =
"reindexColumns: ";
3927 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3928 graph ==
nullptr && myGraph_.is_null (), std::invalid_argument,
3929 "The input graph is null, but the matrix does not own its graph.");
3931 crs_graph_type& theGraph = (graph ==
nullptr) ? *myGraph_ : *graph;
3932 const bool sortGraph =
false;
3937 const LocalOrdinal lclNumRows =
3940 for (LocalOrdinal row = 0; row < lclNumRows; ++row) {
3944 auto vals = this->getValuesViewHostNonConst (rowInfo);
3946 sort2 (lclColInds.data (),
3947 lclColInds.data () + rowInfo.numEntries,
3954 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3959 const char tfecfFuncName[] =
"replaceDomainMap: ";
3960 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3961 myGraph_.is_null (), std::runtime_error,
3962 "This method does not work if the matrix has a const graph. The whole "
3963 "idea of a const graph is that you are not allowed to change it, but this"
3964 " method necessarily must modify the graph, since the graph owns the "
3965 "matrix's domain Map and Import objects.");
3966 myGraph_->replaceDomainMap (newDomainMap);
3969 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3973 Teuchos::RCP<const import_type>& newImporter)
3975 const char tfecfFuncName[] =
"replaceDomainMapAndImporter: ";
3976 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3977 myGraph_.is_null (), std::runtime_error,
3978 "This method does not work if the matrix has a const graph. The whole "
3979 "idea of a const graph is that you are not allowed to change it, but this"
3980 " method necessarily must modify the graph, since the graph owns the "
3981 "matrix's domain Map and Import objects.");
3982 myGraph_->replaceDomainMapAndImporter (newDomainMap, newImporter);
3985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3990 const char tfecfFuncName[] =
"replaceRangeMap: ";
3991 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3992 myGraph_.is_null (), std::runtime_error,
3993 "This method does not work if the matrix has a const graph. The whole "
3994 "idea of a const graph is that you are not allowed to change it, but this"
3995 " method necessarily must modify the graph, since the graph owns the "
3996 "matrix's domain Map and Import objects.");
3997 myGraph_->replaceRangeMap (newRangeMap);
4000 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4004 Teuchos::RCP<const export_type>& newExporter)
4006 const char tfecfFuncName[] =
"replaceRangeMapAndExporter: ";
4007 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4008 myGraph_.is_null (), std::runtime_error,
4009 "This method does not work if the matrix has a const graph. The whole "
4010 "idea of a const graph is that you are not allowed to change it, but this"
4011 " method necessarily must modify the graph, since the graph owns the "
4012 "matrix's domain Map and Import objects.");
4013 myGraph_->replaceRangeMapAndExporter (newRangeMap, newExporter);
4016 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4020 const Teuchos::ArrayView<const GlobalOrdinal>& indices,
4021 const Teuchos::ArrayView<const Scalar>& values)
4023 using Teuchos::Array;
4024 typedef GlobalOrdinal GO;
4025 typedef typename Array<GO>::size_type size_type;
4027 const size_type numToInsert = indices.size ();
4030 std::pair<Array<GO>, Array<Scalar> >& curRow = nonlocals_[globalRow];
4031 Array<GO>& curRowInds = curRow.first;
4032 Array<Scalar>& curRowVals = curRow.second;
4033 const size_type newCapacity = curRowInds.size () + numToInsert;
4034 curRowInds.reserve (newCapacity);
4035 curRowVals.reserve (newCapacity);
4036 for (size_type k = 0; k < numToInsert; ++k) {
4037 curRowInds.push_back (indices[k]);
4038 curRowVals.push_back (values[k]);
4042 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4049 using Teuchos::Comm;
4050 using Teuchos::outArg;
4053 using Teuchos::REDUCE_MAX;
4054 using Teuchos::REDUCE_MIN;
4055 using Teuchos::reduceAll;
4059 typedef GlobalOrdinal GO;
4060 typedef typename Teuchos::Array<GO>::size_type size_type;
4061 const char tfecfFuncName[] =
"globalAssemble: ";
4062 ProfilingRegion regionGlobalAssemble (
"Tpetra::CrsMatrix::globalAssemble");
4064 const bool verbose = Behavior::verbose(
"CrsMatrix");
4065 std::unique_ptr<std::string> prefix;
4067 prefix = this->createPrefix(
"CrsMatrix",
"globalAssemble");
4068 std::ostringstream os;
4069 os << *prefix <<
"nonlocals_.size()=" << nonlocals_.size()
4071 std::cerr << os.str();
4073 RCP<const Comm<int> > comm = getComm ();
4075 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4076 (! isFillActive (), std::runtime_error,
"Fill must be active before "
4077 "you may call this method.");
4079 const size_t myNumNonlocalRows = nonlocals_.size ();
4086 const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
4087 int someoneHasNonlocalRows = 0;
4088 reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
4089 outArg (someoneHasNonlocalRows));
4090 if (someoneHasNonlocalRows == 0) {
4104 RCP<const map_type> nonlocalRowMap;
4105 Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
4107 Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
4108 size_type curPos = 0;
4109 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4110 ++mapIter, ++curPos) {
4111 myNonlocalGblRows[curPos] = mapIter->first;
4114 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4115 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4122 sort2 (gblCols.begin (), gblCols.end (), vals.begin ());
4123 typename Teuchos::Array<GO>::iterator gblCols_newEnd;
4124 typename Teuchos::Array<Scalar>::iterator vals_newEnd;
4125 merge2 (gblCols_newEnd, vals_newEnd,
4126 gblCols.begin (), gblCols.end (),
4127 vals.begin (), vals.end ());
4128 gblCols.erase (gblCols_newEnd, gblCols.end ());
4129 vals.erase (vals_newEnd, vals.end ());
4130 numEntPerNonlocalRow[curPos] = gblCols.size ();
4141 GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
4143 auto iter = std::min_element (myNonlocalGblRows.begin (),
4144 myNonlocalGblRows.end ());
4145 if (iter != myNonlocalGblRows.end ()) {
4146 myMinNonlocalGblRow = *iter;
4149 GO gblMinNonlocalGblRow = 0;
4150 reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
4151 outArg (gblMinNonlocalGblRow));
4152 const GO indexBase = gblMinNonlocalGblRow;
4153 const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
4154 nonlocalRowMap = rcp (
new map_type (INV, myNonlocalGblRows (), indexBase, comm));
4163 std::ostringstream os;
4164 os << *prefix <<
"Create nonlocal matrix" << endl;
4165 std::cerr << os.str();
4167 RCP<crs_matrix_type> nonlocalMatrix =
4168 rcp (
new crs_matrix_type (nonlocalRowMap, numEntPerNonlocalRow ()));
4170 size_type curPos = 0;
4171 for (
auto mapIter = nonlocals_.begin (); mapIter != nonlocals_.end ();
4172 ++mapIter, ++curPos) {
4173 const GO gblRow = mapIter->first;
4175 Teuchos::Array<GO>& gblCols = (mapIter->second).first;
4176 Teuchos::Array<Scalar>& vals = (mapIter->second).second;
4178 nonlocalMatrix->insertGlobalValues (gblRow, gblCols (), vals ());
4190 auto origRowMap = this->getRowMap ();
4191 const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
4193 int isLocallyComplete = 1;
4195 if (origRowMapIsOneToOne) {
4197 std::ostringstream os;
4198 os << *prefix <<
"Original row Map is 1-to-1" << endl;
4199 std::cerr << os.str();
4201 export_type exportToOrig (nonlocalRowMap, origRowMap);
4203 isLocallyComplete = 0;
4206 std::ostringstream os;
4207 os << *prefix <<
"doExport from nonlocalMatrix" << endl;
4208 std::cerr << os.str();
4210 this->doExport (*nonlocalMatrix, exportToOrig,
Tpetra::ADD);
4215 std::ostringstream os;
4216 os << *prefix <<
"Original row Map is NOT 1-to-1" << endl;
4217 std::cerr << os.str();
4224 export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
4226 isLocallyComplete = 0;
4234 std::ostringstream os;
4235 os << *prefix <<
"Create & doExport into 1-to-1 matrix"
4237 std::cerr << os.str();
4239 crs_matrix_type oneToOneMatrix (oneToOneRowMap, 0);
4241 oneToOneMatrix.doExport(*nonlocalMatrix, exportToOneToOne,
4247 std::ostringstream os;
4248 os << *prefix <<
"Free nonlocalMatrix" << endl;
4249 std::cerr << os.str();
4251 nonlocalMatrix = Teuchos::null;
4255 std::ostringstream os;
4256 os << *prefix <<
"doImport from 1-to-1 matrix" << endl;
4257 std::cerr << os.str();
4259 import_type importToOrig (oneToOneRowMap, origRowMap);
4260 this->doImport (oneToOneMatrix, importToOrig,
Tpetra::ADD);
4268 std::ostringstream os;
4269 os << *prefix <<
"Free nonlocals_ (std::map)" << endl;
4270 std::cerr << os.str();
4272 decltype (nonlocals_) newNonlocals;
4273 std::swap (nonlocals_, newNonlocals);
4282 int isGloballyComplete = 0;
4283 reduceAll<int, int> (*comm, REDUCE_MIN, isLocallyComplete,
4284 outArg (isGloballyComplete));
4285 TEUCHOS_TEST_FOR_EXCEPTION
4286 (isGloballyComplete != 1, std::runtime_error,
"On at least one process, "
4287 "you called insertGlobalValues with a global row index which is not in "
4288 "the matrix's row Map on any process in its communicator.");
4291 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4294 resumeFill (
const Teuchos::RCP<Teuchos::ParameterList>& params)
4296 if (! isStaticGraph ()) {
4299 fillComplete_ =
false;
4302 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4309 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4312 fillComplete (
const Teuchos::RCP<Teuchos::ParameterList>& params)
4314 const char tfecfFuncName[] =
"fillComplete(params): ";
4316 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4317 (this->getCrsGraph ().is_null (), std::logic_error,
4318 "getCrsGraph() returns null. This should not happen at this point. "
4319 "Please report this bug to the Tpetra developers.");
4329 Teuchos::RCP<const map_type> rangeMap = graph.
getRowMap ();
4330 Teuchos::RCP<const map_type> domainMap = rangeMap;
4331 this->fillComplete (domainMap, rangeMap, params);
4335 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4338 fillComplete (
const Teuchos::RCP<const map_type>& domainMap,
4339 const Teuchos::RCP<const map_type>& rangeMap,
4340 const Teuchos::RCP<Teuchos::ParameterList>& params)
4344 using Teuchos::ArrayRCP;
4348 const char tfecfFuncName[] =
"fillComplete: ";
4349 ProfilingRegion regionFillComplete
4350 (
"Tpetra::CrsMatrix::fillComplete");
4351 const bool verbose = Behavior::verbose(
"CrsMatrix");
4352 std::unique_ptr<std::string> prefix;
4354 prefix = this->createPrefix(
"CrsMatrix",
"fillComplete(dom,ran,p)");
4355 std::ostringstream os;
4356 os << *prefix << endl;
4357 std::cerr << os.str ();
4360 "Tpetra::CrsMatrix::fillCompete",
4363 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4364 (! this->isFillActive () || this->isFillComplete (), std::runtime_error,
4365 "Matrix fill state must be active (isFillActive() "
4366 "must be true) before you may call fillComplete().");
4367 const int numProcs = this->getComm ()->getSize ();
4377 bool assertNoNonlocalInserts =
false;
4380 bool sortGhosts =
true;
4382 if (! params.is_null ()) {
4383 assertNoNonlocalInserts = params->get (
"No Nonlocal Changes",
4384 assertNoNonlocalInserts);
4385 if (params->isParameter (
"sort column map ghost gids")) {
4386 sortGhosts = params->get (
"sort column map ghost gids", sortGhosts);
4388 else if (params->isParameter (
"Sort column Map ghost GIDs")) {
4389 sortGhosts = params->get (
"Sort column Map ghost GIDs", sortGhosts);
4394 const bool needGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
4396 if (! this->myGraph_.is_null ()) {
4397 this->myGraph_->sortGhostsAssociatedWithEachProcessor_ = sortGhosts;
4400 if (! this->getCrsGraphRef ().indicesAreAllocated ()) {
4401 if (this->hasColMap ()) {
4402 allocateValues(LocalIndices, GraphNotYetAllocated, verbose);
4405 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
4410 if (needGlobalAssemble) {
4411 this->globalAssemble ();
4414 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4415 (numProcs == 1 && nonlocals_.size() > 0,
4416 std::runtime_error,
"Cannot have nonlocal entries on a serial run. "
4417 "An invalid entry (i.e., with row index not in the row Map) must have "
4418 "been submitted to the CrsMatrix.");
4421 if (this->isStaticGraph ()) {
4429#ifdef HAVE_TPETRA_DEBUG
4447 const bool domainMapsMatch =
4448 this->staticGraph_->getDomainMap ()->isSameAs (*domainMap);
4449 const bool rangeMapsMatch =
4450 this->staticGraph_->getRangeMap ()->isSameAs (*rangeMap);
4452 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4453 (! domainMapsMatch, std::runtime_error,
4454 "The CrsMatrix's domain Map does not match the graph's domain Map. "
4455 "The graph cannot be changed because it was given to the CrsMatrix "
4456 "constructor as const. You can fix this by passing in the graph's "
4457 "domain Map and range Map to the matrix's fillComplete call.");
4459 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4460 (! rangeMapsMatch, std::runtime_error,
4461 "The CrsMatrix's range Map does not match the graph's range Map. "
4462 "The graph cannot be changed because it was given to the CrsMatrix "
4463 "constructor as const. You can fix this by passing in the graph's "
4464 "domain Map and range Map to the matrix's fillComplete call.");
4469 this->fillLocalMatrix (params);
4477 this->myGraph_->setDomainRangeMaps (domainMap, rangeMap);
4480 Teuchos::Array<int> remotePIDs (0);
4481 const bool mustBuildColMap = ! this->hasColMap ();
4482 if (mustBuildColMap) {
4483 this->myGraph_->makeColMap (remotePIDs);
4488 const std::pair<size_t, std::string> makeIndicesLocalResult =
4489 this->myGraph_->makeIndicesLocal(verbose);
4494 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4495 (makeIndicesLocalResult.first != 0, std::runtime_error,
4496 makeIndicesLocalResult.second);
4498 const bool sorted = this->myGraph_->isSorted ();
4499 const bool merged = this->myGraph_->isMerged ();
4500 this->sortAndMergeIndicesAndValues (sorted, merged);
4505 this->myGraph_->makeImportExport (remotePIDs, mustBuildColMap);
4509 this->fillLocalGraphAndMatrix (params);
4511 const bool callGraphComputeGlobalConstants = params.get () ==
nullptr ||
4512 params->get (
"compute global constants",
true);
4513 if (callGraphComputeGlobalConstants) {
4514 this->myGraph_->computeGlobalConstants ();
4517 this->myGraph_->computeLocalConstants ();
4519 this->myGraph_->fillComplete_ =
true;
4520 this->myGraph_->checkInternalState ();
4525 this->fillComplete_ =
true;
4528 "Tpetra::CrsMatrix::fillCompete",
"checkInternalState"
4530 this->checkInternalState ();
4534 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4538 const Teuchos::RCP<const map_type> & rangeMap,
4539 const Teuchos::RCP<const import_type>& importer,
4540 const Teuchos::RCP<const export_type>& exporter,
4541 const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
4543#ifdef HAVE_TPETRA_MMM_TIMINGS
4545 if(!params.is_null())
4546 label = params->get(
"Timer Label",label);
4547 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
4548 using Teuchos::TimeMonitor;
4550 Teuchos::TimeMonitor all(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-all")));
4553 const char tfecfFuncName[] =
"expertStaticFillComplete: ";
4554 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( ! isFillActive() || isFillComplete(),
4555 std::runtime_error,
"Matrix fill state must be active (isFillActive() "
4556 "must be true) before calling fillComplete().");
4557 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4558 myGraph_.is_null (), std::logic_error,
"myGraph_ is null. This is not allowed.");
4561#ifdef HAVE_TPETRA_MMM_TIMINGS
4562 Teuchos::TimeMonitor graph(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-Graph")));
4565 myGraph_->expertStaticFillComplete (domainMap, rangeMap, importer, exporter,params);
4569#ifdef HAVE_TPETRA_MMM_TIMINGS
4570 TimeMonitor fLGAM(*TimeMonitor::getNewTimer(prefix + std::string(
"eSFC-M-fLGAM")));
4573 fillLocalGraphAndMatrix (params);
4578 fillComplete_ =
true;
4581#ifdef HAVE_TPETRA_DEBUG
4582 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(isFillActive(), std::logic_error,
4583 ": We're at the end of fillComplete(), but isFillActive() is true. "
4584 "Please report this bug to the Tpetra developers.");
4585 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(! isFillComplete(), std::logic_error,
4586 ": We're at the end of fillComplete(), but isFillActive() is true. "
4587 "Please report this bug to the Tpetra developers.");
4590#ifdef HAVE_TPETRA_MMM_TIMINGS
4591 Teuchos::TimeMonitor cIS(*TimeMonitor::getNewTimer(prefix + std::string(
"ESFC-M-cIS")));
4594 checkInternalState();
4598 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4604 LocalOrdinal* beg = cols;
4605 LocalOrdinal* end = cols + rowLen;
4606 LocalOrdinal* newend = beg;
4608 LocalOrdinal* cur = beg + 1;
4612 while (cur != end) {
4613 if (*cur != *newend) {
4630 return newend - beg;
4633 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4638 using ::Tpetra::Details::ProfilingRegion;
4639 typedef LocalOrdinal LO;
4640 typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
4641 host_execution_space;
4642 typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
4643 const char tfecfFuncName[] =
"sortAndMergeIndicesAndValues: ";
4644 ProfilingRegion regionSAM (
"Tpetra::CrsMatrix::sortAndMergeIndicesAndValues");
4646 if (! sorted || ! merged) {
4647 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4648 (this->isStaticGraph (), std::runtime_error,
"Cannot sort or merge with "
4649 "\"static\" (const) graph, since the matrix does not own the graph.");
4650 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4651 (this->myGraph_.is_null (), std::logic_error,
"myGraph_ is null, but "
4652 "this matrix claims ! isStaticGraph(). "
4653 "Please report this bug to the Tpetra developers.");
4654 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4655 (this->isStorageOptimized (), std::logic_error,
"It is invalid to call "
4656 "this method if the graph's storage has already been optimized. "
4657 "Please report this bug to the Tpetra developers.");
4660 const LO lclNumRows =
static_cast<LO
> (this->getLocalNumRows ());
4661 size_t totalNumDups = 0;
4664 auto rowBegins_ = graph.rowPtrsUnpacked_host_;
4666 auto vals_ = this->valuesUnpacked_wdv.getHostView(Access::ReadWrite);
4668 Kokkos::parallel_reduce (
"sortAndMergeIndicesAndValues", range_type (0, lclNumRows),
4669 [=] (
const LO lclRow,
size_t& numDups) {
4670 size_t rowBegin = rowBegins_(lclRow);
4671 size_t rowLen = rowLengths_(lclRow);
4672 LO* cols = cols_.data() + rowBegin;
4675 sort2 (cols, cols + rowLen, vals);
4678 size_t newRowLength = mergeRowIndicesAndValues (rowLen, cols, vals);
4679 rowLengths_(lclRow) = newRowLength;
4680 numDups += rowLen - newRowLength;
4693 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4704 using Teuchos::rcp_const_cast;
4705 using Teuchos::rcpFromRef;
4706 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4707 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one ();
4713 if (alpha ==
ZERO) {
4716 }
else if (beta != ONE) {
4730 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4731 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4737 const bool Y_is_overwritten = (beta ==
ZERO);
4740 const bool Y_is_replicated =
4741 (! Y_in.
isDistributed () && this->getComm ()->getSize () != 1);
4749 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4756 RCP<const MV> X_colMap;
4757 if (importer.is_null ()) {
4765 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in,
true);
4767 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4772 X_colMap = rcpFromRef (X_in);
4776 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply: Import");
4782 RCP<MV> X_colMapNonConst = getColumnMapMultiVector (X_in);
4785 X_colMapNonConst->doImport (X_in, *importer,
INSERT);
4786 X_colMap = rcp_const_cast<const MV> (X_colMapNonConst);
4793 RCP<MV> Y_rowMap = getRowMapMultiVector (Y_in);
4800 if (! exporter.is_null ()) {
4801 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha,
ZERO);
4803 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply: Export");
4809 if (Y_is_overwritten) {
4835 Y_rowMap = getRowMapMultiVector (Y_in,
true);
4842 this->localApply (*X_colMap, *Y_rowMap, Teuchos::NO_TRANS, alpha, beta);
4846 this->localApply (*X_colMap, Y_in, Teuchos::NO_TRANS, alpha, beta);
4854 if (Y_is_replicated) {
4855 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply: Reduce Y");
4860 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4865 const Teuchos::ETransp mode,
4870 using Teuchos::null;
4873 using Teuchos::rcp_const_cast;
4874 using Teuchos::rcpFromRef;
4875 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
4878 if (alpha ==
ZERO) {
4901 RCP<const import_type> importer = this->getGraph ()->getImporter ();
4902 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
4908 const bool Y_is_overwritten = (beta ==
ZERO);
4909 if (Y_is_replicated && this->getComm ()->getRank () > 0) {
4915 X = rcp (
new MV (X_in, Teuchos::Copy));
4917 X = rcpFromRef (X_in);
4921 if (importer != Teuchos::null) {
4922 if (importMV_ != Teuchos::null && importMV_->getNumVectors() != numVectors) {
4925 if (importMV_ == null) {
4926 importMV_ = rcp (
new MV (this->getColMap (), numVectors));
4929 if (exporter != Teuchos::null) {
4930 if (exportMV_ != Teuchos::null && exportMV_->getNumVectors() != numVectors) {
4933 if (exportMV_ == null) {
4934 exportMV_ = rcp (
new MV (this->getRowMap (), numVectors));
4940 if (! exporter.is_null ()) {
4941 ProfilingRegion regionImport (
"Tpetra::CrsMatrix::apply (transpose): Import");
4942 exportMV_->doImport (X_in, *exporter,
INSERT);
4949 if (importer != Teuchos::null) {
4950 ProfilingRegion regionExport (
"Tpetra::CrsMatrix::apply (transpose): Export");
4957 importMV_->putScalar (
ZERO);
4959 this->localApply (*X, *importMV_, mode, alpha,
ZERO);
4961 if (Y_is_overwritten) {
4978 MV Y (Y_in, Teuchos::Copy);
4979 this->localApply (*X, Y, mode, alpha, beta);
4982 this->localApply (*X, Y_in, mode, alpha, beta);
4989 if (Y_is_replicated) {
4990 ProfilingRegion regionReduce (
"Tpetra::CrsMatrix::apply (transpose): Reduce Y");
4995 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5000 const Teuchos::ETransp mode,
5001 const Scalar& alpha,
5002 const Scalar& beta)
const
5005 using Teuchos::NO_TRANS;
5006 ProfilingRegion regionLocalApply (
"Tpetra::CrsMatrix::localApply");
5010 auto matrix_lcl = getLocalMultiplyOperator();
5014 const char tfecfFuncName[] =
"localApply: ";
5015 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5019 const bool transpose = (mode != Teuchos::NO_TRANS);
5020 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5022 getColMap ()->getLocalNumElements (), std::runtime_error,
5023 "NO_TRANS case: X has the wrong number of local rows. "
5025 "getColMap()->getLocalNumElements() = " <<
5026 getColMap ()->getLocalNumElements () <<
".");
5027 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5029 getRowMap ()->getLocalNumElements (), std::runtime_error,
5030 "NO_TRANS case: Y has the wrong number of local rows. "
5032 "getRowMap()->getLocalNumElements() = " <<
5033 getRowMap ()->getLocalNumElements () <<
".");
5034 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5036 getRowMap ()->getLocalNumElements (), std::runtime_error,
5037 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5039 <<
" != getRowMap()->getLocalNumElements() = "
5040 << getRowMap ()->getLocalNumElements () <<
".");
5041 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5043 getColMap ()->getLocalNumElements (), std::runtime_error,
5044 "TRANS or CONJ_TRANS case: X has the wrong number of local "
5046 <<
" != getColMap()->getLocalNumElements() = "
5047 << getColMap ()->getLocalNumElements () <<
".");
5048 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5049 (! isFillComplete (), std::runtime_error,
"The matrix is not "
5050 "fill complete. You must call fillComplete() (possibly with "
5051 "domain and range Map arguments) without an intervening "
5052 "resumeFill() call before you may call this method.");
5053 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5055 std::runtime_error,
"X and Y must be constant stride.");
5060 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5061 (X_lcl.data () == Y_lcl.data () && X_lcl.data () !=
nullptr
5062 && X_lcl.extent(0) != 0,
5063 std::runtime_error,
"X and Y may not alias one another.");
5066 LocalOrdinal nrows = getLocalNumRows();
5067 LocalOrdinal maxRowImbalance = 0;
5069 maxRowImbalance = getLocalMaxNumRowEntries() - (getLocalNumEntries() / nrows);
5072 matrix_lcl->applyImbalancedRows (X_lcl, Y_lcl, mode, alpha, beta);
5074 matrix_lcl->apply (X_lcl, Y_lcl, mode, alpha, beta);
5077 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5082 Teuchos::ETransp mode,
5087 const char fnName[] =
"Tpetra::CrsMatrix::apply";
5089 TEUCHOS_TEST_FOR_EXCEPTION
5090 (! isFillComplete (), std::runtime_error,
5091 fnName <<
": Cannot call apply() until fillComplete() "
5092 "has been called.");
5094 if (mode == Teuchos::NO_TRANS) {
5095 ProfilingRegion regionNonTranspose (fnName);
5096 this->applyNonTranspose (X, Y, alpha, beta);
5099 ProfilingRegion regionTranspose (
"Tpetra::CrsMatrix::apply (transpose)");
5106 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
5110 this->applyTranspose (X, Y, mode, alpha, beta);
5115 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5117 Teuchos::RCP<CrsMatrix<T, LocalOrdinal, GlobalOrdinal, Node> >
5123 const char tfecfFuncName[] =
"convert: ";
5125 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5126 (! this->isFillComplete (), std::runtime_error,
"This matrix (the source "
5127 "of the conversion) is not fill complete. You must first call "
5128 "fillComplete() (possibly with the domain and range Map) without an "
5129 "intervening call to resumeFill(), before you may call this method.");
5131 RCP<output_matrix_type> newMatrix
5132 (
new output_matrix_type (this->getCrsGraph ()));
5135 using ::Tpetra::Details::copyConvert;
5136 copyConvert (newMatrix->getLocalMatrixDevice ().values,
5137 this->getLocalMatrixDevice ().values);
5141 newMatrix->fillComplete (this->getDomainMap (), this->getRangeMap ());
5147 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5154 const char tfecfFuncName[] =
"checkInternalState: ";
5155 const char err[] =
"Internal state is not consistent. "
5156 "Please report this bug to the Tpetra developers.";
5160 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5161 (staticGraph_.is_null (), std::logic_error, err);
5165 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5166 (! myGraph_.is_null () && myGraph_ != staticGraph_,
5167 std::logic_error, err);
5169 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5170 (isFillComplete () && ! staticGraph_->isFillComplete (),
5171 std::logic_error, err <<
" Specifically, the matrix is fill complete, "
5172 "but its graph is NOT fill complete.");
5175 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5176 (staticGraph_->indicesAreAllocated () &&
5177 staticGraph_->getLocalAllocationSize() > 0 &&
5178 staticGraph_->getLocalNumRows() > 0 &&
5179 valuesUnpacked_wdv.extent (0) == 0,
5180 std::logic_error, err);
5184 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5189 std::ostringstream os;
5191 os <<
"Tpetra::CrsMatrix (Kokkos refactor): {";
5192 if (this->getObjectLabel () !=
"") {
5193 os <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5195 if (isFillComplete ()) {
5196 os <<
"isFillComplete: true"
5197 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5198 << getGlobalNumCols () <<
"]"
5199 <<
", global number of entries: " << getGlobalNumEntries ()
5203 os <<
"isFillComplete: false"
5204 <<
", global dimensions: [" << getGlobalNumRows () <<
", "
5205 << getGlobalNumCols () <<
"]}";
5210 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5213 describe (Teuchos::FancyOStream &out,
5214 const Teuchos::EVerbosityLevel verbLevel)
const
5218 using Teuchos::ArrayView;
5219 using Teuchos::Comm;
5221 using Teuchos::TypeNameTraits;
5222 using Teuchos::VERB_DEFAULT;
5223 using Teuchos::VERB_NONE;
5224 using Teuchos::VERB_LOW;
5225 using Teuchos::VERB_MEDIUM;
5226 using Teuchos::VERB_HIGH;
5227 using Teuchos::VERB_EXTREME;
5229 const Teuchos::EVerbosityLevel vl = (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
5231 if (vl == VERB_NONE) {
5236 Teuchos::OSTab tab0 (out);
5238 RCP<const Comm<int> > comm = this->getComm();
5239 const int myRank = comm->getRank();
5240 const int numProcs = comm->getSize();
5242 for (
size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5245 width = std::max<size_t> (width,
static_cast<size_t> (11)) + 2;
5255 out <<
"Tpetra::CrsMatrix (Kokkos refactor):" << endl;
5257 Teuchos::OSTab tab1 (out);
5260 if (this->getObjectLabel () !=
"") {
5261 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
5264 out <<
"Template parameters:" << endl;
5265 Teuchos::OSTab tab2 (out);
5266 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
5267 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
5268 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
5269 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
5271 if (isFillComplete()) {
5272 out <<
"isFillComplete: true" << endl
5273 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5274 << getGlobalNumCols () <<
"]" << endl
5275 <<
"Global number of entries: " << getGlobalNumEntries () << endl
5276 << endl <<
"Global max number of entries in a row: "
5277 << getGlobalMaxNumRowEntries () << endl;
5280 out <<
"isFillComplete: false" << endl
5281 <<
"Global dimensions: [" << getGlobalNumRows () <<
", "
5282 << getGlobalNumCols () <<
"]" << endl;
5286 if (vl < VERB_MEDIUM) {
5292 out << endl <<
"Row Map:" << endl;
5294 if (getRowMap ().is_null ()) {
5296 out <<
"null" << endl;
5303 getRowMap ()->describe (out, vl);
5308 out <<
"Column Map: ";
5310 if (getColMap ().is_null ()) {
5312 out <<
"null" << endl;
5314 }
else if (getColMap () == getRowMap ()) {
5316 out <<
"same as row Map" << endl;
5322 getColMap ()->describe (out, vl);
5327 out <<
"Domain Map: ";
5329 if (getDomainMap ().is_null ()) {
5331 out <<
"null" << endl;
5333 }
else if (getDomainMap () == getRowMap ()) {
5335 out <<
"same as row Map" << endl;
5337 }
else if (getDomainMap () == getColMap ()) {
5339 out <<
"same as column Map" << endl;
5345 getDomainMap ()->describe (out, vl);
5350 out <<
"Range Map: ";
5352 if (getRangeMap ().is_null ()) {
5354 out <<
"null" << endl;
5356 }
else if (getRangeMap () == getDomainMap ()) {
5358 out <<
"same as domain Map" << endl;
5360 }
else if (getRangeMap () == getRowMap ()) {
5362 out <<
"same as row Map" << endl;
5368 getRangeMap ()->describe (out, vl);
5372 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5373 if (myRank == curRank) {
5374 out <<
"Process rank: " << curRank << endl;
5375 Teuchos::OSTab tab2 (out);
5376 if (! staticGraph_->indicesAreAllocated ()) {
5377 out <<
"Graph indices not allocated" << endl;
5380 out <<
"Number of allocated entries: "
5381 << staticGraph_->getLocalAllocationSize () << endl;
5383 out <<
"Number of entries: " << getLocalNumEntries () << endl
5384 <<
"Max number of entries per row: " << getLocalMaxNumRowEntries ()
5393 if (vl < VERB_HIGH) {
5398 for (
int curRank = 0; curRank < numProcs; ++curRank) {
5399 if (myRank == curRank) {
5400 out << std::setw(width) <<
"Proc Rank"
5401 << std::setw(width) <<
"Global Row"
5402 << std::setw(width) <<
"Num Entries";
5403 if (vl == VERB_EXTREME) {
5404 out << std::setw(width) <<
"(Index,Value)";
5407 for (
size_t r = 0; r < getLocalNumRows (); ++r) {
5408 const size_t nE = getNumEntriesInLocalRow(r);
5409 GlobalOrdinal gid = getRowMap()->getGlobalElement(r);
5410 out << std::setw(width) << myRank
5411 << std::setw(width) << gid
5412 << std::setw(width) << nE;
5413 if (vl == VERB_EXTREME) {
5414 if (isGloballyIndexed()) {
5415 global_inds_host_view_type rowinds;
5416 values_host_view_type rowvals;
5417 getGlobalRowView (gid, rowinds, rowvals);
5418 for (
size_t j = 0; j < nE; ++j) {
5419 out <<
" (" << rowinds[j]
5420 <<
", " << rowvals[j]
5424 else if (isLocallyIndexed()) {
5425 local_inds_host_view_type rowinds;
5426 values_host_view_type rowvals;
5427 getLocalRowView (r, rowinds, rowvals);
5428 for (
size_t j=0; j < nE; ++j) {
5429 out <<
" (" << getColMap()->getGlobalElement(rowinds[j])
5430 <<
", " << rowvals[j]
5446 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5460 return (srcRowMat !=
nullptr);
5463 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5467 const typename crs_graph_type::padding_type& padding,
5471 using Details::padCrsArrays;
5473 using LO = local_ordinal_type;
5474 using row_ptrs_type =
5475 typename local_graph_device_type::row_map_type::non_const_type;
5476 using range_policy =
5477 Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LO>>;
5478 const char tfecfFuncName[] =
"applyCrsPadding";
5479 const char suffix[] =
5480 ". Please report this bug to the Tpetra developers.";
5481 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::applyCrsPadding");
5483 std::unique_ptr<std::string> prefix;
5485 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
5486 std::ostringstream os;
5487 os << *prefix <<
"padding: ";
5490 std::cerr << os.str();
5492 const int myRank = ! verbose ? -1 : [&] () {
5493 auto map = this->getMap();
5494 if (map.is_null()) {
5497 auto comm = map->getComm();
5498 if (comm.is_null()) {
5501 return comm->getRank();
5505 if (! myGraph_->indicesAreAllocated()) {
5507 std::ostringstream os;
5508 os << *prefix <<
"Call allocateIndices" << endl;
5509 std::cerr << os.str();
5511 allocateValues(GlobalIndices, GraphNotYetAllocated, verbose);
5523 std::ostringstream os;
5524 os << *prefix <<
"Allocate row_ptrs_beg: "
5525 << myGraph_->rowPtrsUnpacked_host_.extent(0) << endl;
5526 std::cerr << os.str();
5528 using Kokkos::view_alloc;
5529 using Kokkos::WithoutInitializing;
5530 row_ptrs_type row_ptr_beg(view_alloc(
"row_ptr_beg", WithoutInitializing),
5531 myGraph_->rowPtrsUnpacked_dev_.extent(0));
5533 Kokkos::deep_copy(execution_space(),row_ptr_beg, myGraph_->rowPtrsUnpacked_dev_);
5535 const size_t N = row_ptr_beg.extent(0) == 0 ? size_t(0) :
5536 size_t(row_ptr_beg.extent(0) - 1);
5538 std::ostringstream os;
5539 os << *prefix <<
"Allocate row_ptrs_end: " << N << endl;
5540 std::cerr << os.str();
5542 row_ptrs_type row_ptr_end(
5543 view_alloc(
"row_ptr_end", WithoutInitializing), N);
5545 row_ptrs_type num_row_entries_d;
5547 const bool refill_num_row_entries =
5548 myGraph_->k_numRowEntries_.extent(0) != 0;
5550 if (refill_num_row_entries) {
5553 num_row_entries_d = create_mirror_view_and_copy(memory_space(),
5554 myGraph_->k_numRowEntries_);
5555 Kokkos::parallel_for
5556 (
"Fill end row pointers", range_policy(0, N),
5557 KOKKOS_LAMBDA (
const size_t i) {
5558 row_ptr_end(i) = row_ptr_beg(i) + num_row_entries_d(i);
5565 Kokkos::parallel_for
5566 (
"Fill end row pointers", range_policy(0, N),
5567 KOKKOS_LAMBDA (
const size_t i) {
5568 row_ptr_end(i) = row_ptr_beg(i+1);
5572 if (myGraph_->isGloballyIndexed()) {
5574 myGraph_->gblInds_wdv,
5575 valuesUnpacked_wdv, padding, myRank, verbose);
5576 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5577 const auto newColIndsLen = myGraph_->gblInds_wdv.extent(0);
5578 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5579 (newValuesLen != newColIndsLen, std::logic_error,
5580 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5581 <<
" != myGraph_->gblInds_wdv.extent(0)=" << newColIndsLen
5586 myGraph_->lclIndsUnpacked_wdv,
5587 valuesUnpacked_wdv, padding, myRank, verbose);
5588 const auto newValuesLen = valuesUnpacked_wdv.extent(0);
5589 const auto newColIndsLen = myGraph_->lclIndsUnpacked_wdv.extent(0);
5590 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5591 (newValuesLen != newColIndsLen, std::logic_error,
5592 ": After padding, valuesUnpacked_wdv.extent(0)=" << newValuesLen
5593 <<
" != myGraph_->lclIndsUnpacked_wdv.extent(0)=" << newColIndsLen
5597 if (refill_num_row_entries) {
5598 Kokkos::parallel_for
5599 (
"Fill num entries", range_policy(0, N),
5600 KOKKOS_LAMBDA (
const size_t i) {
5601 num_row_entries_d(i) = row_ptr_end(i) - row_ptr_beg(i);
5603 Kokkos::deep_copy(myGraph_->k_numRowEntries_, num_row_entries_d);
5607 std::ostringstream os;
5608 os << *prefix <<
"Assign myGraph_->rowPtrsUnpacked_; "
5609 <<
"old size: " << myGraph_->rowPtrsUnpacked_host_.extent(0)
5610 <<
", new size: " << row_ptr_beg.extent(0) << endl;
5611 std::cerr << os.str();
5612 TEUCHOS_ASSERT( myGraph_->rowPtrsUnpacked_host_.extent(0) ==
5613 row_ptr_beg.extent(0) );
5615 myGraph_->setRowPtrsUnpacked(row_ptr_beg);
5618 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5620 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5621 copyAndPermuteStaticGraph(
5622 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5623 const size_t numSameIDs,
5624 const LocalOrdinal permuteToLIDs[],
5625 const LocalOrdinal permuteFromLIDs[],
5626 const size_t numPermutes)
5628 using Details::ProfilingRegion;
5629 using Teuchos::Array;
5630 using Teuchos::ArrayView;
5632 using LO = LocalOrdinal;
5633 using GO = GlobalOrdinal;
5634 const char tfecfFuncName[] =
"copyAndPermuteStaticGraph";
5635 const char suffix[] =
5636 " Please report this bug to the Tpetra developers.";
5637 ProfilingRegion regionCAP
5638 (
"Tpetra::CrsMatrix::copyAndPermuteStaticGraph");
5640 const bool debug = Details::Behavior::debug(
"CrsGraph");
5641 const bool verbose = Details::Behavior::verbose(
"CrsGraph");
5642 std::unique_ptr<std::string> prefix;
5644 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5645 std::ostringstream os;
5646 os << *prefix <<
"Start" << endl;
5648 const char*
const prefix_raw =
5649 verbose ? prefix.get()->c_str() :
nullptr;
5651 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5656 const map_type& srcRowMap = * (srcMat.getRowMap ());
5657 nonconst_global_inds_host_view_type rowInds;
5658 nonconst_values_host_view_type rowVals;
5659 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5660 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5664 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5665 const GO targetGID = sourceGID;
5667 ArrayView<const GO>rowIndsConstView;
5668 ArrayView<const Scalar> rowValsConstView;
5670 if (sourceIsLocallyIndexed) {
5671 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5672 if (rowLength >
static_cast<size_t> (rowInds.size())) {
5673 Kokkos::resize(rowInds,rowLength);
5674 Kokkos::resize(rowVals,rowLength);
5678 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5679 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5684 size_t checkRowLength = 0;
5685 srcMat.getGlobalRowCopy (sourceGID, rowIndsView,
5686 rowValsView, checkRowLength);
5688 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5689 (rowLength != checkRowLength, std::logic_error,
"For "
5690 "global row index " << sourceGID <<
", the source "
5691 "matrix's getNumEntriesInGlobalRow returns a row length "
5692 "of " << rowLength <<
", but getGlobalRowCopy reports "
5693 "a row length of " << checkRowLength <<
"." << suffix);
5700 rowIndsConstView = Teuchos::ArrayView<const GO> (
5701 rowIndsView.data(), rowIndsView.extent(0),
5702 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5703 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5704 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5705 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5710 global_inds_host_view_type rowIndsView;
5711 values_host_view_type rowValsView;
5712 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5717 rowIndsConstView = Teuchos::ArrayView<const GO> (
5718 rowIndsView.data(), rowIndsView.extent(0),
5719 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5720 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5721 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5722 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5730 combineGlobalValues(targetGID, rowIndsConstView,
5731 rowValsConstView, REPLACE,
5732 prefix_raw, debug, verbose);
5736 std::ostringstream os;
5737 os << *prefix <<
"Do permutes" << endl;
5740 const map_type& tgtRowMap = * (this->getRowMap ());
5741 for (
size_t p = 0; p < numPermutes; ++p) {
5742 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5743 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5745 ArrayView<const GO> rowIndsConstView;
5746 ArrayView<const Scalar> rowValsConstView;
5748 if (sourceIsLocallyIndexed) {
5749 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5750 if (rowLength >
static_cast<size_t> (rowInds.size ())) {
5751 Kokkos::resize(rowInds,rowLength);
5752 Kokkos::resize(rowVals,rowLength);
5756 nonconst_global_inds_host_view_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5757 nonconst_values_host_view_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5762 size_t checkRowLength = 0;
5763 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5764 rowValsView, checkRowLength);
5766 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5767 (rowLength != checkRowLength, std::logic_error,
"For "
5768 "source matrix global row index " << sourceGID <<
", "
5769 "getNumEntriesInGlobalRow returns a row length of " <<
5770 rowLength <<
", but getGlobalRowCopy a row length of "
5771 << checkRowLength <<
"." << suffix);
5778 rowIndsConstView = Teuchos::ArrayView<const GO> (
5779 rowIndsView.data(), rowIndsView.extent(0),
5780 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5781 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5782 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5783 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5788 global_inds_host_view_type rowIndsView;
5789 values_host_view_type rowValsView;
5790 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5795 rowIndsConstView = Teuchos::ArrayView<const GO> (
5796 rowIndsView.data(), rowIndsView.extent(0),
5797 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5798 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5799 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5800 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5805 combineGlobalValues(targetGID, rowIndsConstView,
5806 rowValsConstView, REPLACE,
5807 prefix_raw, debug, verbose);
5811 std::ostringstream os;
5812 os << *prefix <<
"Done" << endl;
5816 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
5818 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
5819 copyAndPermuteNonStaticGraph(
5820 const RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>& srcMat,
5821 const size_t numSameIDs,
5822 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs_dv,
5823 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs_dv,
5824 const size_t numPermutes)
5826 using Details::ProfilingRegion;
5827 using Teuchos::Array;
5828 using Teuchos::ArrayView;
5830 using LO = LocalOrdinal;
5831 using GO = GlobalOrdinal;
5832 const char tfecfFuncName[] =
"copyAndPermuteNonStaticGraph";
5833 const char suffix[] =
5834 " Please report this bug to the Tpetra developers.";
5835 ProfilingRegion regionCAP
5836 (
"Tpetra::CrsMatrix::copyAndPermuteNonStaticGraph");
5838 const bool debug = Details::Behavior::debug(
"CrsGraph");
5839 const bool verbose = Details::Behavior::verbose(
"CrsGraph");
5840 std::unique_ptr<std::string> prefix;
5842 prefix = this->
createPrefix(
"CrsGraph", tfecfFuncName);
5843 std::ostringstream os;
5844 os << *prefix <<
"Start" << endl;
5846 const char*
const prefix_raw =
5847 verbose ? prefix.get()->c_str() :
nullptr;
5850 using row_graph_type = RowGraph<LO, GO, Node>;
5851 const row_graph_type& srcGraph = *(srcMat.getGraph());
5853 myGraph_->computeCrsPadding(srcGraph, numSameIDs,
5854 permuteToLIDs_dv, permuteFromLIDs_dv, verbose);
5855 applyCrsPadding(*padding, verbose);
5857 const bool sourceIsLocallyIndexed = srcMat.isLocallyIndexed ();
5862 const map_type& srcRowMap = * (srcMat.getRowMap ());
5863 const LO numSameIDs_as_LID =
static_cast<LO
> (numSameIDs);
5864 using gids_type = nonconst_global_inds_host_view_type;
5865 using vals_type = nonconst_values_host_view_type;
5868 for (LO sourceLID = 0; sourceLID < numSameIDs_as_LID; ++sourceLID) {
5872 const GO sourceGID = srcRowMap.getGlobalElement (sourceLID);
5873 const GO targetGID = sourceGID;
5875 ArrayView<const GO> rowIndsConstView;
5876 ArrayView<const Scalar> rowValsConstView;
5878 if (sourceIsLocallyIndexed) {
5880 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5881 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
5882 Kokkos::resize(rowInds,rowLength);
5883 Kokkos::resize(rowVals,rowLength);
5887 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5888 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5893 size_t checkRowLength = 0;
5894 srcMat.getGlobalRowCopy (sourceGID, rowIndsView, rowValsView,
5897 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5898 (rowLength != checkRowLength, std::logic_error,
": For "
5899 "global row index " << sourceGID <<
", the source "
5900 "matrix's getNumEntriesInGlobalRow returns a row length "
5901 "of " << rowLength <<
", but getGlobalRowCopy reports "
5902 "a row length of " << checkRowLength <<
"." << suffix);
5904 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5905 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5908 global_inds_host_view_type rowIndsView;
5909 values_host_view_type rowValsView;
5910 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5916 rowIndsConstView = Teuchos::ArrayView<const GO> (
5917 rowIndsView.data(), rowIndsView.extent(0),
5918 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5919 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5920 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5921 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5927 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5928 rowValsConstView, prefix_raw, debug, verbose);
5932 std::ostringstream os;
5933 os << *prefix <<
"Do permutes" << endl;
5935 const LO*
const permuteFromLIDs = permuteFromLIDs_dv.view_host().data();
5936 const LO*
const permuteToLIDs = permuteToLIDs_dv.view_host().data();
5938 const map_type& tgtRowMap = * (this->getRowMap ());
5939 for (
size_t p = 0; p < numPermutes; ++p) {
5940 const GO sourceGID = srcRowMap.getGlobalElement (permuteFromLIDs[p]);
5941 const GO targetGID = tgtRowMap.getGlobalElement (permuteToLIDs[p]);
5943 ArrayView<const GO> rowIndsConstView;
5944 ArrayView<const Scalar> rowValsConstView;
5946 if (sourceIsLocallyIndexed) {
5947 const size_t rowLength = srcMat.getNumEntriesInGlobalRow (sourceGID);
5948 if (rowLength >
static_cast<size_t> (rowInds.extent(0))) {
5949 Kokkos::resize(rowInds,rowLength);
5950 Kokkos::resize(rowVals,rowLength);
5954 gids_type rowIndsView = Kokkos::subview(rowInds,std::make_pair((
size_t)0, rowLength));
5955 vals_type rowValsView = Kokkos::subview(rowVals,std::make_pair((
size_t)0, rowLength));
5960 size_t checkRowLength = 0;
5961 srcMat.getGlobalRowCopy(sourceGID, rowIndsView,
5962 rowValsView, checkRowLength);
5964 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5965 (rowLength != checkRowLength, std::logic_error,
"For "
5966 "source matrix global row index " << sourceGID <<
", "
5967 "getNumEntriesInGlobalRow returns a row length of " <<
5968 rowLength <<
", but getGlobalRowCopy a row length of "
5969 << checkRowLength <<
"." << suffix);
5971 rowIndsConstView = Teuchos::ArrayView<const GO>(rowIndsView.data(), rowLength);
5972 rowValsConstView = Teuchos::ArrayView<const Scalar>(
reinterpret_cast<Scalar *
>(rowValsView.data()), rowLength);
5975 global_inds_host_view_type rowIndsView;
5976 values_host_view_type rowValsView;
5977 srcMat.getGlobalRowView(sourceGID, rowIndsView, rowValsView);
5983 rowIndsConstView = Teuchos::ArrayView<const GO> (
5984 rowIndsView.data(), rowIndsView.extent(0),
5985 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5986 rowValsConstView = Teuchos::ArrayView<const Scalar> (
5987 reinterpret_cast<const Scalar*
>(rowValsView.data()), rowValsView.extent(0),
5988 Teuchos::RCP_DISABLE_NODE_LOOKUP);
5994 insertGlobalValuesFilteredChecked(targetGID, rowIndsConstView,
5995 rowValsConstView, prefix_raw, debug, verbose);
5999 std::ostringstream os;
6000 os << *prefix <<
"Done" << endl;
6004 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6006 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6008 const SrcDistObject& srcObj,
6009 const size_t numSameIDs,
6010 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
6011 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
6014 using Details::Behavior;
6015 using Details::dualViewStatusToString;
6016 using Details::ProfilingRegion;
6020 const char tfecfFuncName[] =
"copyAndPermute: ";
6021 ProfilingRegion regionCAP(
"Tpetra::CrsMatrix::copyAndPermute");
6023 const bool verbose = Behavior::verbose(
"CrsMatrix");
6024 std::unique_ptr<std::string> prefix;
6026 prefix = this->
createPrefix(
"CrsMatrix",
"copyAndPermute");
6027 std::ostringstream os;
6028 os << *prefix << endl
6029 << *prefix <<
" numSameIDs: " << numSameIDs << endl
6030 << *prefix <<
" numPermute: " << permuteToLIDs.extent(0)
6039 <<
"isStaticGraph: " << (isStaticGraph() ?
"true" :
"false")
6041 std::cerr << os.str ();
6044 const auto numPermute = permuteToLIDs.extent (0);
6045 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6046 (numPermute != permuteFromLIDs.extent (0),
6047 std::invalid_argument,
"permuteToLIDs.extent(0) = "
6048 << numPermute <<
"!= permuteFromLIDs.extent(0) = "
6049 << permuteFromLIDs.extent (0) <<
".");
6053 using RMT = RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>;
6054 const RMT& srcMat =
dynamic_cast<const RMT&
> (srcObj);
6055 if (isStaticGraph ()) {
6056 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
6057 auto permuteToLIDs_h = permuteToLIDs.view_host ();
6058 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
6059 auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
6061 copyAndPermuteStaticGraph(srcMat, numSameIDs,
6062 permuteToLIDs_h.data(),
6063 permuteFromLIDs_h.data(),
6067 copyAndPermuteNonStaticGraph(srcMat, numSameIDs, permuteToLIDs,
6068 permuteFromLIDs, numPermute);
6072 std::ostringstream os;
6073 os << *prefix <<
"Done" << endl;
6074 std::cerr << os.str();
6078 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6080 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6082 (
const SrcDistObject& source,
6083 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6084 Kokkos::DualView<char*, buffer_device_type>& exports,
6085 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6086 size_t& constantNumPackets)
6088 using Details::Behavior;
6089 using Details::dualViewStatusToString;
6090 using Details::ProfilingRegion;
6091 using Teuchos::outArg;
6092 using Teuchos::REDUCE_MAX;
6093 using Teuchos::reduceAll;
6095 typedef LocalOrdinal LO;
6096 typedef GlobalOrdinal GO;
6097 const char tfecfFuncName[] =
"packAndPrepare: ";
6098 ProfilingRegion regionPAP (
"Tpetra::CrsMatrix::packAndPrepare");
6100 const bool debug = Behavior::debug(
"CrsMatrix");
6101 const bool verbose = Behavior::verbose(
"CrsMatrix");
6104 Teuchos::RCP<const Teuchos::Comm<int> > pComm = this->getComm ();
6105 if (pComm.is_null ()) {
6108 const Teuchos::Comm<int>& comm = *pComm;
6109 const int myRank = comm.getSize ();
6111 std::unique_ptr<std::string> prefix;
6113 prefix = this->
createPrefix(
"CrsMatrix",
"packAndPrepare");
6114 std::ostringstream os;
6115 os << *prefix <<
"Start" << endl
6125 std::cerr << os.str ();
6148 std::ostringstream msg;
6151 using crs_matrix_type = CrsMatrix<Scalar, LO, GO, Node>;
6152 const crs_matrix_type* srcCrsMat =
6153 dynamic_cast<const crs_matrix_type*
> (&source);
6154 if (srcCrsMat !=
nullptr) {
6156 std::ostringstream os;
6157 os << *prefix <<
"Source matrix same (CrsMatrix) type as target; "
6158 "calling packNew" << endl;
6159 std::cerr << os.str ();
6162 srcCrsMat->packNew (exportLIDs, exports, numPacketsPerLID,
6163 constantNumPackets);
6165 catch (std::exception& e) {
6167 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6171 using Kokkos::HostSpace;
6172 using Kokkos::subview;
6173 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6174 using range_type = Kokkos::pair<size_t, size_t>;
6177 std::ostringstream os;
6178 os << *prefix <<
"Source matrix NOT same (CrsMatrix) type as target"
6180 std::cerr << os.str ();
6183 const row_matrix_type* srcRowMat =
6184 dynamic_cast<const row_matrix_type*
> (&source);
6185 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6186 (srcRowMat ==
nullptr, std::invalid_argument,
6187 "The source object of the Import or Export operation is neither a "
6188 "CrsMatrix (with the same template parameters as the target object), "
6189 "nor a RowMatrix (with the same first four template parameters as the "
6200 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6201 auto exportLIDs_h = exportLIDs.view_host ();
6202 Teuchos::ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6203 exportLIDs_h.size ());
6207 Teuchos::Array<char> exports_a;
6213 numPacketsPerLID.clear_sync_state ();
6214 numPacketsPerLID.modify_host ();
6215 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6216 Teuchos::ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6217 numPacketsPerLID_h.size ());
6222 srcRowMat->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6223 constantNumPackets);
6225 catch (std::exception& e) {
6227 msg <<
"Proc " << myRank <<
": " << e.what () << std::endl;
6231 const size_t newAllocSize =
static_cast<size_t> (exports_a.size ());
6232 if (
static_cast<size_t> (exports.extent (0)) < newAllocSize) {
6233 const std::string oldLabel = exports.d_view.label ();
6234 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6235 exports = exports_type (newLabel, newAllocSize);
6240 exports.modify_host();
6242 auto exports_h = exports.view_host ();
6243 auto exports_h_sub = subview (exports_h, range_type (0, newAllocSize));
6247 typedef typename exports_type::t_host::execution_space HES;
6248 typedef Kokkos::Device<HES, HostSpace> host_device_type;
6249 Kokkos::View<const char*, host_device_type>
6250 exports_a_kv (exports_a.getRawPtr (), newAllocSize);
6252 Kokkos::deep_copy (exports_h_sub, exports_a_kv);
6257 reduceAll<int, int> (comm, REDUCE_MAX, lclBad, outArg (gblBad));
6260 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6261 (
true, std::logic_error,
"packNew() or pack() threw an exception on "
6262 "one or more participating processes.");
6266 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6267 (lclBad != 0, std::logic_error,
"packNew threw an exception on one "
6268 "or more participating processes. Here is this process' error "
6269 "message: " << msg.str ());
6273 std::ostringstream os;
6274 os << *prefix <<
"packAndPrepare: Done!" << endl
6284 std::cerr << os.str ();
6288 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6290 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6291 packRow (
char exports[],
6292 const size_t offset,
6293 const size_t numEnt,
6294 const GlobalOrdinal gidsIn[],
6295 const impl_scalar_type valsIn[],
6296 const size_t numBytesPerValue)
const
6299 using Kokkos::subview;
6301 typedef LocalOrdinal LO;
6302 typedef GlobalOrdinal GO;
6303 typedef impl_scalar_type ST;
6311 const LO numEntLO =
static_cast<size_t> (numEnt);
6313 const size_t numEntBeg = offset;
6314 const size_t numEntLen = PackTraits<LO>::packValueCount (numEntLO);
6315 const size_t gidsBeg = numEntBeg + numEntLen;
6316 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6317 const size_t valsBeg = gidsBeg + gidsLen;
6318 const size_t valsLen = numEnt * numBytesPerValue;
6320 char*
const numEntOut = exports + numEntBeg;
6321 char*
const gidsOut = exports + gidsBeg;
6322 char*
const valsOut = exports + valsBeg;
6324 size_t numBytesOut = 0;
6326 numBytesOut += PackTraits<LO>::packValue (numEntOut, numEntLO);
6329 Kokkos::pair<int, size_t> p;
6330 p = PackTraits<GO>::packArray (gidsOut, gidsIn, numEnt);
6331 errorCode += p.first;
6332 numBytesOut += p.second;
6334 p = PackTraits<ST>::packArray (valsOut, valsIn, numEnt);
6335 errorCode += p.first;
6336 numBytesOut += p.second;
6339 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6340 TEUCHOS_TEST_FOR_EXCEPTION
6341 (numBytesOut != expectedNumBytes, std::logic_error,
"packRow: "
6342 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6343 << expectedNumBytes <<
".");
6344 TEUCHOS_TEST_FOR_EXCEPTION
6345 (errorCode != 0, std::runtime_error,
"packRow: "
6346 "PackTraits::packArray returned a nonzero error code");
6351 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6353 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6354 unpackRow (GlobalOrdinal gidsOut[],
6355 impl_scalar_type valsOut[],
6356 const char imports[],
6357 const size_t offset,
6358 const size_t numBytes,
6359 const size_t numEnt,
6360 const size_t numBytesPerValue)
6363 using Kokkos::subview;
6365 typedef LocalOrdinal LO;
6366 typedef GlobalOrdinal GO;
6367 typedef impl_scalar_type ST;
6369 Details::ProfilingRegion region_upack_row(
6370 "Tpetra::CrsMatrix::unpackRow",
6374 if (numBytes == 0) {
6377 const int myRank = this->getMap ()->getComm ()->getRank ();
6378 TEUCHOS_TEST_FOR_EXCEPTION
6379 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6380 "unpackRow: The number of bytes to unpack numBytes=0, but the "
6381 "number of entries to unpack (as reported by numPacketsPerLID) "
6382 "for this row numEnt=" << numEnt <<
" != 0.");
6387 if (numEnt == 0 && numBytes != 0) {
6388 const int myRank = this->getMap ()->getComm ()->getRank ();
6389 TEUCHOS_TEST_FOR_EXCEPTION
6390 (
true, std::logic_error,
"(Proc " << myRank <<
") CrsMatrix::"
6391 "unpackRow: The number of entries to unpack (as reported by "
6392 "numPacketsPerLID) numEnt=0, but the number of bytes to unpack "
6393 "numBytes=" << numBytes <<
" != 0.");
6399 const size_t numEntBeg = offset;
6400 const size_t numEntLen = PackTraits<LO>::packValueCount (lid);
6401 const size_t gidsBeg = numEntBeg + numEntLen;
6402 const size_t gidsLen = numEnt * PackTraits<GO>::packValueCount (gid);
6403 const size_t valsBeg = gidsBeg + gidsLen;
6404 const size_t valsLen = numEnt * numBytesPerValue;
6406 const char*
const numEntIn = imports + numEntBeg;
6407 const char*
const gidsIn = imports + gidsBeg;
6408 const char*
const valsIn = imports + valsBeg;
6410 size_t numBytesOut = 0;
6413 numBytesOut += PackTraits<LO>::unpackValue (numEntOut, numEntIn);
6414 if (
static_cast<size_t> (numEntOut) != numEnt ||
6415 numEntOut ==
static_cast<LO
> (0)) {
6416 const int myRank = this->getMap ()->getComm ()->getRank ();
6417 std::ostringstream os;
6418 os <<
"(Proc " << myRank <<
") CrsMatrix::unpackRow: ";
6419 bool firstErrorCondition =
false;
6420 if (
static_cast<size_t> (numEntOut) != numEnt) {
6421 os <<
"Number of entries from numPacketsPerLID numEnt=" << numEnt
6422 <<
" does not equal number of entries unpacked from imports "
6423 "buffer numEntOut=" << numEntOut <<
".";
6424 firstErrorCondition =
true;
6426 if (numEntOut ==
static_cast<LO
> (0)) {
6427 if (firstErrorCondition) {
6430 os <<
"Number of entries unpacked from imports buffer numEntOut=0, "
6431 "but number of bytes to unpack for this row numBytes=" << numBytes
6432 <<
" != 0. This should never happen, since packRow should only "
6433 "ever pack rows with a nonzero number of entries. In this case, "
6434 "the number of entries from numPacketsPerLID is numEnt=" << numEnt
6437 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error, os.str ());
6441 Kokkos::pair<int, size_t> p;
6442 p = PackTraits<GO>::unpackArray (gidsOut, gidsIn, numEnt);
6443 errorCode += p.first;
6444 numBytesOut += p.second;
6446 p = PackTraits<ST>::unpackArray (valsOut, valsIn, numEnt);
6447 errorCode += p.first;
6448 numBytesOut += p.second;
6451 TEUCHOS_TEST_FOR_EXCEPTION
6452 (numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = "
6453 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
6455 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
6456 TEUCHOS_TEST_FOR_EXCEPTION
6457 (numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: "
6458 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = "
6459 << expectedNumBytes <<
".");
6461 TEUCHOS_TEST_FOR_EXCEPTION
6462 (errorCode != 0, std::runtime_error,
"unpackRow: "
6463 "PackTraits::unpackArray returned a nonzero error code");
6468 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6470 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6471 allocatePackSpaceNew (Kokkos::DualView<char*, buffer_device_type>& exports,
6472 size_t& totalNumEntries,
6473 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs)
const
6475 using Details::Behavior;
6476 using Details::dualViewStatusToString;
6478 typedef impl_scalar_type IST;
6479 typedef LocalOrdinal LO;
6480 typedef GlobalOrdinal GO;
6486 const bool verbose = Behavior::verbose(
"CrsMatrix");
6487 std::unique_ptr<std::string> prefix;
6489 prefix = this->
createPrefix(
"CrsMatrix",
"allocatePackSpaceNew");
6490 std::ostringstream os;
6491 os << *prefix <<
"Before:"
6499 std::cerr << os.str ();
6504 const LO numExportLIDs =
static_cast<LO
> (exportLIDs.extent (0));
6506 TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6507 auto exportLIDs_h = exportLIDs.view_host ();
6510 totalNumEntries = 0;
6511 for (LO i = 0; i < numExportLIDs; ++i) {
6512 const LO lclRow = exportLIDs_h[i];
6513 size_t curNumEntries = this->getNumEntriesInLocalRow (lclRow);
6516 if (curNumEntries == Teuchos::OrdinalTraits<size_t>::invalid ()) {
6519 totalNumEntries += curNumEntries;
6530 const size_t allocSize =
6531 static_cast<size_t> (numExportLIDs) *
sizeof (LO) +
6532 totalNumEntries * (
sizeof (IST) +
sizeof (GO));
6533 if (
static_cast<size_t> (exports.extent (0)) < allocSize) {
6534 using exports_type = Kokkos::DualView<char*, buffer_device_type>;
6536 const std::string oldLabel = exports.d_view.label ();
6537 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
6538 exports = exports_type (newLabel, allocSize);
6542 std::ostringstream os;
6543 os << *prefix <<
"After:"
6551 std::cerr << os.str ();
6555 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6558 packNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6559 Kokkos::DualView<char*, buffer_device_type>& exports,
6560 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6561 size_t& constantNumPackets)
const
6565 if (this->isStaticGraph ()) {
6566 using ::Tpetra::Details::packCrsMatrixNew;
6567 packCrsMatrixNew (*
this, exports, numPacketsPerLID, exportLIDs,
6568 constantNumPackets);
6571 this->packNonStaticNew (exportLIDs, exports, numPacketsPerLID,
6572 constantNumPackets);
6576 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6579 packNonStaticNew (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
6580 Kokkos::DualView<char*, buffer_device_type>& exports,
6581 const Kokkos::DualView<size_t*, buffer_device_type>& numPacketsPerLID,
6582 size_t& constantNumPackets)
const
6585 using Details::dualViewStatusToString;
6587 using Details::create_mirror_view_from_raw_host_array;
6590 using LO = LocalOrdinal;
6591 using GO = GlobalOrdinal;
6592 using ST = impl_scalar_type;
6593 const char tfecfFuncName[] =
"packNonStaticNew: ";
6595 const bool verbose = Behavior::verbose(
"CrsMatrix");
6596 std::unique_ptr<std::string> prefix;
6598 prefix = this->createPrefix(
"CrsMatrix",
"packNonStaticNew");
6599 std::ostringstream os;
6600 os << *prefix <<
"Start" << endl;
6601 std::cerr << os.str ();
6604 const size_t numExportLIDs =
static_cast<size_t> (exportLIDs.extent (0));
6605 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6606 (numExportLIDs !=
static_cast<size_t> (numPacketsPerLID.extent (0)),
6607 std::invalid_argument,
"exportLIDs.size() = " << numExportLIDs
6608 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.extent (0)
6614 constantNumPackets = 0;
6619 size_t totalNumEntries = 0;
6620 this->allocatePackSpaceNew (exports, totalNumEntries, exportLIDs);
6621 const size_t bufSize =
static_cast<size_t> (exports.extent (0));
6624 exports.clear_sync_state();
6625 exports.modify_host();
6626 auto exports_h = exports.view_host ();
6628 std::ostringstream os;
6629 os << *prefix <<
"After marking exports as modified on host, "
6630 << dualViewStatusToString (exports,
"exports") << endl;
6631 std::cerr << os.str ();
6635 auto exportLIDs_h = exportLIDs.view_host ();
6638 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->clear_sync_state();
6639 const_cast<Kokkos::DualView<size_t*, buffer_device_type>*
>(&numPacketsPerLID)->modify_host();
6640 auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6645 auto maxRowNumEnt = this->getLocalMaxNumRowEntries();
6649 typename global_inds_host_view_type::non_const_type gidsIn_k;
6650 if (this->isLocallyIndexed()) {
6652 typename global_inds_host_view_type::non_const_type(
"packGids",
6657 for (
size_t i = 0; i < numExportLIDs; ++i) {
6658 const LO lclRow = exportLIDs_h[i];
6660 size_t numBytes = 0;
6661 size_t numEnt = this->getNumEntriesInLocalRow (lclRow);
6668 numPacketsPerLID_h[i] = 0;
6672 if (this->isLocallyIndexed ()) {
6673 typename global_inds_host_view_type::non_const_type gidsIn;
6674 values_host_view_type valsIn;
6678 local_inds_host_view_type lidsIn;
6679 this->getLocalRowView (lclRow, lidsIn, valsIn);
6680 const map_type& colMap = * (this->getColMap ());
6681 for (
size_t k = 0; k < numEnt; ++k) {
6682 gidsIn_k[k] = colMap.getGlobalElement (lidsIn[k]);
6684 gidsIn = Kokkos::subview(gidsIn_k, Kokkos::make_pair(GO(0),GO(numEnt)));
6686 const size_t numBytesPerValue =
6687 PackTraits<ST>::packValueCount (valsIn[0]);
6688 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
6689 gidsIn.data (), valsIn.data (),
6692 else if (this->isGloballyIndexed ()) {
6693 global_inds_host_view_type gidsIn;
6694 values_host_view_type valsIn;
6700 const map_type& rowMap = * (this->getRowMap ());
6701 const GO gblRow = rowMap.getGlobalElement (lclRow);
6702 this->getGlobalRowView (gblRow, gidsIn, valsIn);
6704 const size_t numBytesPerValue =
6705 PackTraits<ST>::packValueCount (valsIn[0]);
6706 numBytes = this->
packRow (exports_h.data (), offset, numEnt,
6707 gidsIn.data (), valsIn.data (),
6714 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6715 (offset > bufSize || offset + numBytes > bufSize, std::logic_error,
6716 "First invalid offset into 'exports' pack buffer at index i = " << i
6717 <<
". exportLIDs_h[i]: " << exportLIDs_h[i] <<
", bufSize: " <<
6718 bufSize <<
", offset: " << offset <<
", numBytes: " << numBytes <<
6723 numPacketsPerLID_h[i] = numBytes;
6728 std::ostringstream os;
6729 os << *prefix <<
"Tpetra::CrsMatrix::packNonStaticNew: After:" << endl
6736 std::cerr << os.str ();
6740 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6742 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6743 combineGlobalValuesRaw(
const LocalOrdinal lclRow,
6744 const LocalOrdinal numEnt,
6745 const impl_scalar_type vals[],
6746 const GlobalOrdinal cols[],
6748 const char*
const prefix,
6752 using GO = GlobalOrdinal;
6756 const GO gblRow = myGraph_->rowMap_->getGlobalElement(lclRow);
6757 Teuchos::ArrayView<const GO> cols_av
6758 (numEnt == 0 ?
nullptr : cols, numEnt);
6759 Teuchos::ArrayView<const Scalar> vals_av
6760 (numEnt == 0 ?
nullptr :
reinterpret_cast<const Scalar*
> (vals), numEnt);
6765 combineGlobalValues(gblRow, cols_av, vals_av, combMode,
6766 prefix, debug, verbose);
6770 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6772 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
6773 combineGlobalValues(
6774 const GlobalOrdinal globalRowIndex,
6775 const Teuchos::ArrayView<const GlobalOrdinal>& columnIndices,
6776 const Teuchos::ArrayView<const Scalar>& values,
6778 const char*
const prefix,
6782 const char tfecfFuncName[] =
"combineGlobalValues: ";
6784 if (isStaticGraph ()) {
6788 if (combineMode == ADD) {
6789 sumIntoGlobalValues (globalRowIndex, columnIndices, values);
6791 else if (combineMode == REPLACE) {
6792 replaceGlobalValues (globalRowIndex, columnIndices, values);
6794 else if (combineMode == ABSMAX) {
6795 using ::Tpetra::Details::AbsMax;
6797 this->
template transformGlobalValues<AbsMax<Scalar> > (globalRowIndex,
6801 else if (combineMode == INSERT) {
6802 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6803 (isStaticGraph() && combineMode == INSERT,
6804 std::invalid_argument,
"INSERT combine mode is forbidden "
6805 "if the matrix has a static (const) graph (i.e., was "
6806 "constructed with the CrsMatrix constructor that takes a "
6807 "const CrsGraph pointer).");
6810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6811 (
true, std::logic_error,
"Invalid combine mode; should "
6813 "Please report this bug to the Tpetra developers.");
6817 if (combineMode == ADD || combineMode == INSERT) {
6824 insertGlobalValuesFilteredChecked(globalRowIndex,
6825 columnIndices, values, prefix, debug, verbose);
6836 else if (combineMode == ABSMAX) {
6837 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6838 ! isStaticGraph () && combineMode == ABSMAX, std::logic_error,
6839 "ABSMAX combine mode when the matrix has a dynamic graph is not yet "
6842 else if (combineMode == REPLACE) {
6843 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6844 ! isStaticGraph () && combineMode == REPLACE, std::logic_error,
6845 "REPLACE combine mode when the matrix has a dynamic graph is not yet "
6849 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
6850 true, std::logic_error,
"Should never get here! Please report this "
6851 "bug to the Tpetra developers.");
6856 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6860 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
6861 Kokkos::DualView<char*, buffer_device_type> imports,
6862 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6863 const size_t constantNumPackets,
6867 using Details::dualViewStatusToString;
6870 const char tfecfFuncName[] =
"unpackAndCombine: ";
6871 ProfilingRegion regionUAC (
"Tpetra::CrsMatrix::unpackAndCombine");
6873 const bool debug = Behavior::debug(
"CrsMatrix");
6874 const bool verbose = Behavior::verbose(
"CrsMatrix");
6875 constexpr int numValidModes = 5;
6878 const char* validModeNames[numValidModes] =
6879 {
"ADD",
"REPLACE",
"ABSMAX",
"INSERT",
"ZERO"};
6881 std::unique_ptr<std::string> prefix;
6883 prefix = this->createPrefix(
"CrsMatrix",
"unpackAndCombine");
6884 std::ostringstream os;
6885 os << *prefix <<
"Start:" << endl
6887 << dualViewStatusToString (importLIDs,
"importLIDs")
6890 << dualViewStatusToString (imports,
"imports")
6893 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
6895 << *prefix <<
" constantNumPackets: " << constantNumPackets
6899 std::cerr << os.str ();
6903 if (std::find (validModes, validModes+numValidModes, combineMode) ==
6904 validModes+numValidModes) {
6905 std::ostringstream os;
6906 os <<
"Invalid combine mode. Valid modes are {";
6907 for (
int k = 0; k < numValidModes; ++k) {
6908 os << validModeNames[k];
6909 if (k < numValidModes - 1) {
6914 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6915 (
true, std::invalid_argument, os.str ());
6917 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6918 (importLIDs.extent(0) != numPacketsPerLID.extent(0),
6919 std::invalid_argument,
"importLIDs.extent(0)="
6920 << importLIDs.extent(0)
6921 <<
" != numPacketsPerLID.extent(0)="
6922 << numPacketsPerLID.extent(0) <<
".");
6925 if (combineMode ==
ZERO) {
6930 using Teuchos::reduceAll;
6931 std::unique_ptr<std::ostringstream> msg (
new std::ostringstream ());
6934 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6935 constantNumPackets, combineMode,
6937 }
catch (std::exception& e) {
6942 const Teuchos::Comm<int>& comm = * (this->getComm ());
6943 reduceAll<int, int> (comm, Teuchos::REDUCE_MAX,
6944 lclBad, Teuchos::outArg (gblBad));
6950 std::ostringstream os;
6951 os <<
"Proc " << comm.getRank () <<
": " << msg->str () << endl;
6952 msg = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
6954 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6955 (
true, std::logic_error, std::endl <<
"unpackAndCombineImpl "
6956 "threw an exception on one or more participating processes: "
6957 << endl << msg->str ());
6961 unpackAndCombineImpl(importLIDs, imports, numPacketsPerLID,
6962 constantNumPackets, combineMode,
6967 std::ostringstream os;
6968 os << *prefix <<
"Done!" << endl
6970 << dualViewStatusToString (importLIDs,
"importLIDs")
6973 << dualViewStatusToString (imports,
"imports")
6976 << dualViewStatusToString (numPacketsPerLID,
"numPacketsPerLID")
6978 std::cerr << os.str ();
6982 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
6986 const Kokkos::DualView<
const local_ordinal_type*,
6987 buffer_device_type>& importLIDs,
6988 Kokkos::DualView<char*, buffer_device_type> imports,
6989 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
6990 const size_t constantNumPackets,
6995 "Tpetra::CrsMatrix::unpackAndCombineImpl",
6999 const char tfecfFuncName[] =
"unpackAndCombineImpl";
7000 std::unique_ptr<std::string> prefix;
7002 prefix = this->createPrefix(
"CrsMatrix", tfecfFuncName);
7003 std::ostringstream os;
7004 os << *prefix <<
"isStaticGraph(): "
7005 << (isStaticGraph() ?
"true" :
"false")
7006 <<
", importLIDs.extent(0): "
7007 << importLIDs.extent(0)
7008 <<
", imports.extent(0): "
7009 << imports.extent(0)
7010 <<
", numPacketsPerLID.extent(0): "
7011 << numPacketsPerLID.extent(0)
7013 std::cerr << os.str();
7016 if (isStaticGraph ()) {
7017 using Details::unpackCrsMatrixAndCombineNew;
7018 unpackCrsMatrixAndCombineNew(*
this, imports, numPacketsPerLID,
7019 importLIDs, constantNumPackets,
7024 using padding_type =
typename crs_graph_type::padding_type;
7025 std::unique_ptr<padding_type> padding;
7027 padding = myGraph_->computePaddingForCrsMatrixUnpack(
7028 importLIDs, imports, numPacketsPerLID, verbose);
7030 catch (std::exception& e) {
7031 const auto rowMap = getRowMap();
7032 const auto comm = rowMap.is_null() ? Teuchos::null :
7034 const int myRank = comm.is_null() ? -1 : comm->getRank();
7035 TEUCHOS_TEST_FOR_EXCEPTION
7036 (
true, std::runtime_error,
"Proc " << myRank <<
": "
7037 "Tpetra::CrsGraph::computePaddingForCrsMatrixUnpack "
7038 "threw an exception: " << e.what());
7041 std::ostringstream os;
7042 os << *prefix <<
"Call applyCrsPadding" << endl;
7043 std::cerr << os.str();
7045 applyCrsPadding(*padding, verbose);
7048 std::ostringstream os;
7049 os << *prefix <<
"Call unpackAndCombineImplNonStatic" << endl;
7050 std::cerr << os.str();
7052 unpackAndCombineImplNonStatic(importLIDs, imports,
7059 std::ostringstream os;
7060 os << *prefix <<
"Done" << endl;
7061 std::cerr << os.str();
7065 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7067 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
7068 unpackAndCombineImplNonStatic(
7069 const Kokkos::DualView<
const local_ordinal_type*,
7070 buffer_device_type>& importLIDs,
7071 Kokkos::DualView<char*, buffer_device_type> imports,
7072 Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID,
7073 const size_t constantNumPackets,
7074 const CombineMode combineMode)
7077 using Kokkos::subview;
7078 using Kokkos::MemoryUnmanaged;
7079 using Details::Behavior;
7080 using Details::castAwayConstDualView;
7081 using Details::create_mirror_view_from_raw_host_array;
7082 using Details::PackTraits;
7083 using Details::ScalarViewTraits;
7085 using LO = LocalOrdinal;
7086 using GO = GlobalOrdinal;
7087 using ST = impl_scalar_type;
7088 using size_type =
typename Teuchos::ArrayView<LO>::size_type;
7090 typename View<int*, device_type>::HostMirror::execution_space;
7091 using pair_type = std::pair<typename View<int*, HES>::size_type,
7092 typename View<int*, HES>::size_type>;
7093 using gids_out_type = View<GO*, HES, MemoryUnmanaged>;
7094 using vals_out_type = View<ST*, HES, MemoryUnmanaged>;
7095 const char tfecfFuncName[] =
"unpackAndCombineImplNonStatic";
7097 const bool debug = Behavior::debug(
"CrsMatrix");
7098 const bool verbose = Behavior::verbose(
"CrsMatrix");
7099 std::unique_ptr<std::string> prefix;
7101 prefix = this->
createPrefix(
"CrsMatrix", tfecfFuncName);
7102 std::ostringstream os;
7103 os << *prefix << endl;
7104 std::cerr << os.str ();
7106 const char*
const prefix_raw =
7107 verbose ? prefix.get()->c_str() :
nullptr;
7109 const size_type numImportLIDs = importLIDs.extent (0);
7110 if (combineMode == ZERO || numImportLIDs == 0) {
7114 Details::ProfilingRegion region_unpack_and_combine_impl_non_static(
7115 "Tpetra::CrsMatrix::unpackAndCombineImplNonStatic",
7120 if (imports.need_sync_host()) {
7121 imports.sync_host ();
7123 auto imports_h = imports.view_host();
7126 if (numPacketsPerLID.need_sync_host()) {
7127 numPacketsPerLID.sync_host ();
7129 auto numPacketsPerLID_h = numPacketsPerLID.view_host();
7131 TEUCHOS_ASSERT( ! importLIDs.need_sync_host() );
7132 auto importLIDs_h = importLIDs.view_host();
7134 size_t numBytesPerValue;
7145 numBytesPerValue = PackTraits<ST>::packValueCount (val);
7150 size_t maxRowNumEnt = 0;
7151 for (size_type i = 0; i < numImportLIDs; ++i) {
7152 const size_t numBytes = numPacketsPerLID_h[i];
7153 if (numBytes == 0) {
7158 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7159 (offset + numBytes >
size_t(imports_h.extent (0)),
7160 std::logic_error,
": At local row index importLIDs_h[i="
7161 << i <<
"]=" << importLIDs_h[i] <<
", offset (=" << offset
7162 <<
") + numBytes (=" << numBytes <<
") > "
7163 "imports_h.extent(0)=" << imports_h.extent (0) <<
".");
7168 const size_t theNumBytes =
7169 PackTraits<LO>::packValueCount (numEntLO);
7170 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7171 (theNumBytes > numBytes, std::logic_error,
": theNumBytes="
7172 << theNumBytes <<
" > numBytes = " << numBytes <<
".");
7174 const char*
const inBuf = imports_h.data () + offset;
7175 const size_t actualNumBytes =
7176 PackTraits<LO>::unpackValue (numEntLO, inBuf);
7179 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7180 (actualNumBytes > numBytes, std::logic_error,
": At i=" << i
7181 <<
", actualNumBytes=" << actualNumBytes
7182 <<
" > numBytes=" << numBytes <<
".");
7183 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7184 (numEntLO == 0, std::logic_error,
": At local row index "
7185 "importLIDs_h[i=" << i <<
"]=" << importLIDs_h[i] <<
", "
7186 "the number of entries read from the packed data is "
7187 "numEntLO=" << numEntLO <<
", but numBytes=" << numBytes
7191 maxRowNumEnt = std::max(
size_t(numEntLO), maxRowNumEnt);
7199 View<GO*, HES> gblColInds;
7200 View<LO*, HES> lclColInds;
7201 View<ST*, HES> vals;
7214 gblColInds = ScalarViewTraits<GO, HES>::allocateArray(
7215 gid, maxRowNumEnt,
"gids");
7216 lclColInds = ScalarViewTraits<LO, HES>::allocateArray(
7217 lid, maxRowNumEnt,
"lids");
7218 vals = ScalarViewTraits<ST, HES>::allocateArray(
7219 val, maxRowNumEnt,
"vals");
7223 for (size_type i = 0; i < numImportLIDs; ++i) {
7224 const size_t numBytes = numPacketsPerLID_h[i];
7225 if (numBytes == 0) {
7229 const char*
const inBuf = imports_h.data () + offset;
7230 (void) PackTraits<LO>::unpackValue (numEntLO, inBuf);
7232 const size_t numEnt =
static_cast<size_t>(numEntLO);;
7233 const LO lclRow = importLIDs_h[i];
7235 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
7236 vals_out_type valsOut = subview (vals, pair_type (0, numEnt));
7238 const size_t numBytesOut =
7239 unpackRow (gidsOut.data (), valsOut.data (), imports_h.data (),
7240 offset, numBytes, numEnt, numBytesPerValue);
7241 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7242 (numBytes != numBytesOut, std::logic_error,
": At i=" << i
7243 <<
", numBytes=" << numBytes <<
" != numBytesOut="
7244 << numBytesOut <<
".");
7246 const ST*
const valsRaw =
const_cast<const ST*
> (valsOut.data ());
7247 const GO*
const gidsRaw =
const_cast<const GO*
> (gidsOut.data ());
7248 combineGlobalValuesRaw(lclRow, numEnt, valsRaw, gidsRaw,
7249 combineMode, prefix_raw, debug, verbose);
7255 std::ostringstream os;
7256 os << *prefix <<
"Done" << endl;
7257 std::cerr << os.str();
7261 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7262 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7265 const bool force)
const
7267 using Teuchos::null;
7271 TEUCHOS_TEST_FOR_EXCEPTION(
7272 ! this->hasColMap (), std::runtime_error,
"Tpetra::CrsMatrix::getColumn"
7273 "MapMultiVector: You may only call this method if the matrix has a "
7274 "column Map. If the matrix does not yet have a column Map, you should "
7275 "first call fillComplete (with domain and range Map if necessary).");
7279 TEUCHOS_TEST_FOR_EXCEPTION(
7280 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7281 "CrsMatrix::getColumnMapMultiVector: You may only call this method if "
7282 "this matrix's graph is fill complete.");
7285 RCP<const import_type> importer = this->getGraph ()->getImporter ();
7286 RCP<const map_type> colMap = this->getColMap ();
7299 if (! importer.is_null () || force) {
7300 if (importMV_.is_null () || importMV_->getNumVectors () != numVecs) {
7301 X_colMap = rcp (
new MV (colMap, numVecs));
7304 importMV_ = X_colMap;
7307 X_colMap = importMV_;
7318 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7319 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7322 const bool force)
const
7324 using Teuchos::null;
7330 TEUCHOS_TEST_FOR_EXCEPTION(
7331 ! this->getGraph ()->isFillComplete (), std::runtime_error,
"Tpetra::"
7332 "CrsMatrix::getRowMapMultiVector: You may only call this method if this "
7333 "matrix's graph is fill complete.");
7336 RCP<const export_type> exporter = this->getGraph ()->getExporter ();
7340 RCP<const map_type> rowMap = this->getRowMap ();
7352 if (! exporter.is_null () || force) {
7353 if (exportMV_.is_null () || exportMV_->getNumVectors () != numVecs) {
7354 Y_rowMap = rcp (
new MV (rowMap, numVecs));
7355 exportMV_ = Y_rowMap;
7358 Y_rowMap = exportMV_;
7364 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7369 TEUCHOS_TEST_FOR_EXCEPTION(
7370 myGraph_.is_null (), std::logic_error,
"Tpetra::CrsMatrix::"
7371 "removeEmptyProcessesInPlace: This method does not work when the matrix "
7372 "was created with a constant graph (that is, when it was created using "
7373 "the version of its constructor that takes an RCP<const CrsGraph>). "
7374 "This is because the matrix is not allowed to modify the graph in that "
7375 "case, but removing empty processes requires modifying the graph.");
7376 myGraph_->removeEmptyProcessesInPlace (newMap);
7380 this->map_ = this->getRowMap ();
7384 staticGraph_ = Teuchos::rcp_const_cast<const Graph> (myGraph_);
7387 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7388 Teuchos::RCP<RowMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
7390 add (
const Scalar& alpha,
7393 const Teuchos::RCP<const map_type>& domainMap,
7394 const Teuchos::RCP<const map_type>& rangeMap,
7395 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7397 using Teuchos::Array;
7398 using Teuchos::ArrayView;
7399 using Teuchos::ParameterList;
7402 using Teuchos::rcp_implicit_cast;
7403 using Teuchos::sublist;
7407 using crs_matrix_type =
7409 const char errPfx[] =
"Tpetra::CrsMatrix::add: ";
7411 const bool debug = Details::Behavior::debug(
"CrsMatrix");
7412 const bool verbose = Details::Behavior::verbose(
"CrsMatrix");
7413 std::unique_ptr<std::string> prefix;
7415 prefix = this->createPrefix(
"CrsMatrix",
"add");
7416 std::ostringstream os;
7417 os << *prefix <<
"Start" << endl;
7418 std::cerr << os.str ();
7421 const crs_matrix_type& B = *
this;
7422 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero();
7423 const Scalar ONE = Teuchos::ScalarTraits<Scalar>::one();
7430 RCP<const map_type> A_rangeMap = A.
getRangeMap ();
7431 RCP<const map_type> B_domainMap = B.getDomainMap ();
7432 RCP<const map_type> B_rangeMap = B.getRangeMap ();
7434 RCP<const map_type> theDomainMap = domainMap;
7435 RCP<const map_type> theRangeMap = rangeMap;
7437 if (domainMap.is_null ()) {
7438 if (B_domainMap.is_null ()) {
7439 TEUCHOS_TEST_FOR_EXCEPTION(
7440 A_domainMap.is_null (), std::invalid_argument,
7441 "Tpetra::CrsMatrix::add: If neither A nor B have a domain Map, "
7442 "then you must supply a nonnull domain Map to this method.");
7443 theDomainMap = A_domainMap;
7445 theDomainMap = B_domainMap;
7448 if (rangeMap.is_null ()) {
7449 if (B_rangeMap.is_null ()) {
7450 TEUCHOS_TEST_FOR_EXCEPTION(
7451 A_rangeMap.is_null (), std::invalid_argument,
7452 "Tpetra::CrsMatrix::add: If neither A nor B have a range Map, "
7453 "then you must supply a nonnull range Map to this method.");
7454 theRangeMap = A_rangeMap;
7456 theRangeMap = B_rangeMap;
7464 if (! A_domainMap.is_null() && ! A_rangeMap.is_null()) {
7465 if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7466 TEUCHOS_TEST_FOR_EXCEPTION
7467 (! B_domainMap->isSameAs(*A_domainMap),
7468 std::invalid_argument,
7469 errPfx <<
"The input RowMatrix A must have a domain Map "
7470 "which is the same as (isSameAs) this RowMatrix's "
7472 TEUCHOS_TEST_FOR_EXCEPTION
7473 (! B_rangeMap->isSameAs(*A_rangeMap), std::invalid_argument,
7474 errPfx <<
"The input RowMatrix A must have a range Map "
7475 "which is the same as (isSameAs) this RowMatrix's range "
7477 TEUCHOS_TEST_FOR_EXCEPTION
7478 (! domainMap.is_null() &&
7479 ! domainMap->isSameAs(*B_domainMap),
7480 std::invalid_argument,
7481 errPfx <<
"The input domain Map must be the same as "
7482 "(isSameAs) this RowMatrix's domain Map.");
7483 TEUCHOS_TEST_FOR_EXCEPTION
7484 (! rangeMap.is_null() &&
7485 ! rangeMap->isSameAs(*B_rangeMap),
7486 std::invalid_argument,
7487 errPfx <<
"The input range Map must be the same as "
7488 "(isSameAs) this RowMatrix's range Map.");
7491 else if (! B_domainMap.is_null() && ! B_rangeMap.is_null()) {
7492 TEUCHOS_TEST_FOR_EXCEPTION
7493 (! domainMap.is_null() &&
7494 ! domainMap->isSameAs(*B_domainMap),
7495 std::invalid_argument,
7496 errPfx <<
"The input domain Map must be the same as "
7497 "(isSameAs) this RowMatrix's domain Map.");
7498 TEUCHOS_TEST_FOR_EXCEPTION
7499 (! rangeMap.is_null() && ! rangeMap->isSameAs(*B_rangeMap),
7500 std::invalid_argument,
7501 errPfx <<
"The input range Map must be the same as "
7502 "(isSameAs) this RowMatrix's range Map.");
7505 TEUCHOS_TEST_FOR_EXCEPTION
7506 (domainMap.is_null() || rangeMap.is_null(),
7507 std::invalid_argument, errPfx <<
"If neither A nor B "
7508 "have a domain and range Map, then you must supply a "
7509 "nonnull domain and range Map to this method.");
7516 bool callFillComplete =
true;
7517 RCP<ParameterList> constructorSublist;
7518 RCP<ParameterList> fillCompleteSublist;
7519 if (! params.is_null()) {
7521 params->get(
"Call fillComplete", callFillComplete);
7522 constructorSublist = sublist(params,
"Constructor parameters");
7523 fillCompleteSublist = sublist(params,
"fillComplete parameters");
7526 RCP<const map_type> A_rowMap = A.
getRowMap ();
7527 RCP<const map_type> B_rowMap = B.getRowMap ();
7528 RCP<const map_type> C_rowMap = B_rowMap;
7529 RCP<crs_matrix_type> C;
7535 if (A_rowMap->isSameAs (*B_rowMap)) {
7536 const LO localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7537 Array<size_t> C_maxNumEntriesPerRow (localNumRows, 0);
7540 if (alpha !=
ZERO) {
7541 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7543 C_maxNumEntriesPerRow[localRow] += A_numEntries;
7548 for (LO localRow = 0; localRow < localNumRows; ++localRow) {
7549 const size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7550 C_maxNumEntriesPerRow[localRow] += B_numEntries;
7554 if (constructorSublist.is_null ()) {
7555 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow ()));
7557 C = rcp (
new crs_matrix_type (C_rowMap, C_maxNumEntriesPerRow (),
7558 constructorSublist));
7569 TEUCHOS_TEST_FOR_EXCEPTION
7570 (
true, std::invalid_argument, errPfx <<
"The row maps must "
7571 "be the same for statically allocated matrices, to ensure "
7572 "that there is sufficient space to do the addition.");
7575 TEUCHOS_TEST_FOR_EXCEPTION
7576 (C.is_null (), std::logic_error,
7577 errPfx <<
"C should not be null at this point. "
7578 "Please report this bug to the Tpetra developers.");
7581 std::ostringstream os;
7582 os << *prefix <<
"Compute C = alpha*A + beta*B" << endl;
7583 std::cerr << os.str ();
7585 using gids_type = nonconst_global_inds_host_view_type;
7586 using vals_type = nonconst_values_host_view_type;
7590 if (alpha !=
ZERO) {
7591 const LO A_localNumRows =
static_cast<LO
> (A_rowMap->getLocalNumElements ());
7592 for (LO localRow = 0; localRow < A_localNumRows; ++localRow) {
7594 const GO globalRow = A_rowMap->getGlobalElement (localRow);
7595 if (A_numEntries >
static_cast<size_t> (ind.size ())) {
7596 Kokkos::resize(ind,A_numEntries);
7597 Kokkos::resize(val,A_numEntries);
7599 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, A_numEntries));
7600 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, A_numEntries));
7604 for (
size_t k = 0; k < A_numEntries; ++k) {
7605 valView[k] *= alpha;
7608 C->insertGlobalValues (globalRow, A_numEntries,
7609 reinterpret_cast<Scalar *
>(valView.data()),
7615 const LO B_localNumRows =
static_cast<LO
> (B_rowMap->getLocalNumElements ());
7616 for (LO localRow = 0; localRow < B_localNumRows; ++localRow) {
7617 size_t B_numEntries = B.getNumEntriesInLocalRow (localRow);
7618 const GO globalRow = B_rowMap->getGlobalElement (localRow);
7619 if (B_numEntries >
static_cast<size_t> (ind.size ())) {
7620 Kokkos::resize(ind,B_numEntries);
7621 Kokkos::resize(val,B_numEntries);
7623 gids_type indView = Kokkos::subview(ind,std::make_pair((
size_t)0, B_numEntries));
7624 vals_type valView = Kokkos::subview(val,std::make_pair((
size_t)0, B_numEntries));
7625 B.getGlobalRowCopy (globalRow, indView, valView, B_numEntries);
7628 for (
size_t k = 0; k < B_numEntries; ++k) {
7632 C->insertGlobalValues (globalRow, B_numEntries,
7633 reinterpret_cast<Scalar *
>(valView.data()),
7638 if (callFillComplete) {
7640 std::ostringstream os;
7641 os << *prefix <<
"Call fillComplete on C" << endl;
7642 std::cerr << os.str ();
7644 if (fillCompleteSublist.is_null ()) {
7645 C->fillComplete (theDomainMap, theRangeMap);
7647 C->fillComplete (theDomainMap, theRangeMap, fillCompleteSublist);
7651 std::ostringstream os;
7652 os << *prefix <<
"Do NOT call fillComplete on C" << endl;
7653 std::cerr << os.str ();
7657 std::ostringstream os;
7658 os << *prefix <<
"Done" << endl;
7659 std::cerr << os.str ();
7661 return rcp_implicit_cast<row_matrix_type> (C);
7666 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
7670 const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7671 const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7672 const Teuchos::RCP<const map_type>& domainMap,
7673 const Teuchos::RCP<const map_type>& rangeMap,
7674 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
7677 using Details::getArrayViewFromDualView;
7678 using Details::packCrsMatrixWithOwningPIDs;
7679 using Details::unpackAndCombineWithOwningPIDsCount;
7680 using Details::unpackAndCombineIntoCrsArrays;
7681 using Teuchos::ArrayRCP;
7682 using Teuchos::ArrayView;
7683 using Teuchos::Comm;
7684 using Teuchos::ParameterList;
7687 typedef LocalOrdinal LO;
7688 typedef GlobalOrdinal GO;
7689 typedef node_type NT;
7694 const bool debug = Behavior::debug(
"CrsMatrix");
7695 const bool verbose = Behavior::verbose(
"CrsMatrix");
7696 int MyPID = getComm ()->getRank ();
7698 std::unique_ptr<std::string> verbosePrefix;
7701 this->createPrefix(
"CrsMatrix",
"transferAndFillComplete");
7702 std::ostringstream os;
7703 os <<
"Start" << endl;
7704 std::cerr << os.str();
7711 bool reverseMode =
false;
7712 bool restrictComm =
false;
7714 int mm_optimization_core_count =
7715 Behavior::TAFC_OptimizationCoreCount();
7716 RCP<ParameterList> matrixparams;
7717 bool overrideAllreduce =
false;
7718 if (! params.is_null ()) {
7719 matrixparams = sublist (params,
"CrsMatrix");
7720 reverseMode = params->get (
"Reverse Mode", reverseMode);
7721 restrictComm = params->get (
"Restrict Communicator", restrictComm);
7722 auto & slist = params->sublist(
"matrixmatrix: kernel params",
false);
7723 isMM = slist.get(
"isMatrixMatrix_TransferAndFillComplete",
false);
7724 mm_optimization_core_count = slist.get(
"MM_TAFC_OptimizationCoreCount",mm_optimization_core_count);
7726 overrideAllreduce = slist.get(
"MM_TAFC_OverrideAllreduceCheck",
false);
7727 if(getComm()->getSize() < mm_optimization_core_count && isMM) isMM =
false;
7728 if(reverseMode) isMM =
false;
7732 std::shared_ptr< ::Tpetra::Details::CommRequest> iallreduceRequest;
7734 int reduced_mismatch = 0;
7735 if (isMM && !overrideAllreduce) {
7738 const bool source_vals = ! getGraph ()->getImporter ().is_null();
7739 const bool target_vals = ! (rowTransfer.getExportLIDs ().size() == 0 ||
7740 rowTransfer.getRemoteLIDs ().size() == 0);
7741 mismatch = (source_vals != target_vals) ? 1 : 0;
7743 ::Tpetra::Details::iallreduce (mismatch, reduced_mismatch,
7744 Teuchos::REDUCE_MAX, * (getComm ()));
7747#ifdef HAVE_TPETRA_MMM_TIMINGS
7748 using Teuchos::TimeMonitor;
7750 if(!params.is_null())
7751 label = params->get(
"Timer Label",label);
7752 std::string prefix = std::string(
"Tpetra ")+ label + std::string(
": ");
7755 std::ostringstream os;
7756 if(isMM) os<<
":MMOpt";
7757 else os<<
":MMLegacy";
7761 Teuchos::TimeMonitor MMall(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC All") +tlstr ));
7769 const import_type* xferAsImport =
dynamic_cast<const import_type*
> (&rowTransfer);
7770 const export_type* xferAsExport =
dynamic_cast<const export_type*
> (&rowTransfer);
7771 TEUCHOS_TEST_FOR_EXCEPTION(
7772 xferAsImport ==
nullptr && xferAsExport ==
nullptr, std::invalid_argument,
7773 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' input "
7774 "argument must be either an Import or an Export, and its template "
7775 "parameters must match the corresponding template parameters of the "
7783 Teuchos::RCP<const import_type> xferDomainAsImport = Teuchos::rcp_dynamic_cast<const import_type> (domainTransfer);
7784 Teuchos::RCP<const export_type> xferDomainAsExport = Teuchos::rcp_dynamic_cast<const export_type> (domainTransfer);
7786 if(! domainTransfer.is_null()) {
7787 TEUCHOS_TEST_FOR_EXCEPTION(
7788 (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7789 "Tpetra::CrsMatrix::transferAndFillComplete: The 'domainTransfer' input "
7790 "argument must be either an Import or an Export, and its template "
7791 "parameters must match the corresponding template parameters of the "
7794 TEUCHOS_TEST_FOR_EXCEPTION(
7795 ( xferAsImport !=
nullptr || ! xferDomainAsImport.is_null() ) &&
7796 (( xferAsImport !=
nullptr && xferDomainAsImport.is_null() ) ||
7797 ( xferAsImport ==
nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7798 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7799 "arguments must be of the same type (either Import or Export).");
7801 TEUCHOS_TEST_FOR_EXCEPTION(
7802 ( xferAsExport !=
nullptr || ! xferDomainAsExport.is_null() ) &&
7803 (( xferAsExport !=
nullptr && xferDomainAsExport.is_null() ) ||
7804 ( xferAsExport ==
nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7805 "Tpetra::CrsMatrix::transferAndFillComplete: The 'rowTransfer' and 'domainTransfer' input "
7806 "arguments must be of the same type (either Import or Export).");
7812 const bool communication_needed = rowTransfer.getSourceMap ()->isDistributed ();
7816 RCP<const map_type> MyRowMap = reverseMode ?
7817 rowTransfer.getSourceMap () : rowTransfer.getTargetMap ();
7818 RCP<const map_type> MyColMap;
7819 RCP<const map_type> MyDomainMap = ! domainMap.is_null () ?
7820 domainMap : getDomainMap ();
7821 RCP<const map_type> MyRangeMap = ! rangeMap.is_null () ?
7822 rangeMap : getRangeMap ();
7823 RCP<const map_type> BaseRowMap = MyRowMap;
7824 RCP<const map_type> BaseDomainMap = MyDomainMap;
7832 if (! destMat.is_null ()) {
7843 const bool NewFlag = ! destMat->getGraph ()->isLocallyIndexed () &&
7844 ! destMat->getGraph ()->isGloballyIndexed ();
7845 TEUCHOS_TEST_FOR_EXCEPTION(
7846 ! NewFlag, std::invalid_argument,
"Tpetra::CrsMatrix::"
7847 "transferAndFillComplete: The input argument 'destMat' is only allowed "
7848 "to be nonnull, if its graph is empty (neither locally nor globally "
7857 TEUCHOS_TEST_FOR_EXCEPTION(
7858 ! destMat->getRowMap ()->isSameAs (*MyRowMap), std::invalid_argument,
7859 "Tpetra::CrsMatrix::transferAndFillComplete: The (row) Map of the "
7860 "input argument 'destMat' is not the same as the (row) Map specified "
7861 "by the input argument 'rowTransfer'.");
7862 TEUCHOS_TEST_FOR_EXCEPTION(
7863 ! destMat->checkSizes (*
this), std::invalid_argument,
7864 "Tpetra::CrsMatrix::transferAndFillComplete: You provided a nonnull "
7865 "destination matrix, but checkSizes() indicates that it is not a legal "
7866 "legal target for redistribution from the source matrix (*this). This "
7867 "may mean that they do not have the same dimensions.");
7881 TEUCHOS_TEST_FOR_EXCEPTION(
7882 ! (reverseMode || getRowMap ()->isSameAs (*rowTransfer.getSourceMap ())),
7883 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7884 "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7885 TEUCHOS_TEST_FOR_EXCEPTION(
7886 ! (! reverseMode || getRowMap ()->isSameAs (*rowTransfer.getTargetMap ())),
7887 std::invalid_argument,
"Tpetra::CrsMatrix::transferAndFillComplete: "
7888 "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7891 TEUCHOS_TEST_FOR_EXCEPTION(
7892 ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7893 std::invalid_argument,
7894 "Tpetra::CrsMatrix::transferAndFillComplete: The target map of the 'domainTransfer' input "
7895 "argument must be the same as the rebalanced domain map 'domainMap'");
7897 TEUCHOS_TEST_FOR_EXCEPTION(
7898 ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7899 std::invalid_argument,
7900 "Tpetra::CrsMatrix::transferAndFillComplete: The source map of the 'domainTransfer' input "
7901 "argument must be the same as the rebalanced domain map 'domainMap'");
7914 const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7915 ArrayView<const LO> ExportLIDs = reverseMode ?
7916 rowTransfer.getRemoteLIDs () : rowTransfer.getExportLIDs ();
7917 ArrayView<const LO> RemoteLIDs = reverseMode ?
7918 rowTransfer.getExportLIDs () : rowTransfer.getRemoteLIDs ();
7919 ArrayView<const LO> PermuteToLIDs = reverseMode ?
7920 rowTransfer.getPermuteFromLIDs () : rowTransfer.getPermuteToLIDs ();
7921 ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7922 rowTransfer.getPermuteToLIDs () : rowTransfer.getPermuteFromLIDs ();
7923 Distributor& Distor = rowTransfer.getDistributor ();
7926 Teuchos::Array<int> SourcePids;
7927 Teuchos::Array<int> TargetPids;
7930 RCP<const map_type> ReducedRowMap, ReducedColMap,
7931 ReducedDomainMap, ReducedRangeMap;
7932 RCP<const Comm<int> > ReducedComm;
7936 if (destMat.is_null ()) {
7937 destMat = rcp (
new this_CRS_type (MyRowMap, 0, matrixparams));
7944 ReducedRowMap = MyRowMap->removeEmptyProcesses ();
7945 ReducedComm = ReducedRowMap.is_null () ?
7947 ReducedRowMap->getComm ();
7948 destMat->removeEmptyProcessesInPlace (ReducedRowMap);
7950 ReducedDomainMap = MyRowMap.getRawPtr () == MyDomainMap.getRawPtr () ?
7952 MyDomainMap->replaceCommWithSubset (ReducedComm);
7953 ReducedRangeMap = MyRowMap.getRawPtr () == MyRangeMap.getRawPtr () ?
7955 MyRangeMap->replaceCommWithSubset (ReducedComm);
7958 MyRowMap = ReducedRowMap;
7959 MyDomainMap = ReducedDomainMap;
7960 MyRangeMap = ReducedRangeMap;
7963 if (! ReducedComm.is_null ()) {
7964 MyPID = ReducedComm->getRank ();
7971 ReducedComm = MyRowMap->getComm ();
7980 RCP<const import_type> MyImporter = getGraph ()->getImporter ();
7983 bool bSameDomainMap = BaseDomainMap->isSameAs (*getDomainMap ());
7985 if (! restrictComm && ! MyImporter.is_null () && bSameDomainMap ) {
7992 Import_Util::getPids (*MyImporter, SourcePids,
false);
7994 else if (restrictComm && ! MyImporter.is_null () && bSameDomainMap) {
7997 IntVectorType SourceDomain_pids(getDomainMap (),
true);
7998 IntVectorType SourceCol_pids(getColMap());
8000 SourceDomain_pids.putScalar(MyPID);
8002 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8003 SourcePids.resize (getColMap ()->getLocalNumElements ());
8004 SourceCol_pids.get1dCopy (SourcePids ());
8006 else if (MyImporter.is_null ()) {
8008 SourcePids.resize (getColMap ()->getLocalNumElements ());
8009 SourcePids.assign (getColMap ()->getLocalNumElements (), MyPID);
8011 else if ( ! MyImporter.is_null () &&
8012 ! domainTransfer.is_null () ) {
8019 IntVectorType TargetDomain_pids (domainMap);
8020 TargetDomain_pids.putScalar (MyPID);
8023 IntVectorType SourceDomain_pids (getDomainMap ());
8026 IntVectorType SourceCol_pids (getColMap ());
8028 if (! reverseMode && ! xferDomainAsImport.is_null() ) {
8029 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8031 else if (reverseMode && ! xferDomainAsExport.is_null() ) {
8032 SourceDomain_pids.doExport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8034 else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
8035 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsExport, INSERT);
8037 else if (reverseMode && ! xferDomainAsImport.is_null() ) {
8038 SourceDomain_pids.doImport (TargetDomain_pids, *xferDomainAsImport, INSERT);
8041 TEUCHOS_TEST_FOR_EXCEPTION(
8042 true, std::logic_error,
"Tpetra::CrsMatrix::"
8043 "transferAndFillComplete: Should never get here! "
8044 "Please report this bug to a Tpetra developer.");
8046 SourceCol_pids.doImport (SourceDomain_pids, *MyImporter, INSERT);
8047 SourcePids.resize (getColMap ()->getLocalNumElements ());
8048 SourceCol_pids.get1dCopy (SourcePids ());
8050 else if ( ! MyImporter.is_null () &&
8051 BaseDomainMap->isSameAs (*BaseRowMap) &&
8052 getDomainMap ()->isSameAs (*getRowMap ())) {
8055 IntVectorType TargetRow_pids (domainMap);
8056 IntVectorType SourceRow_pids (getRowMap ());
8057 IntVectorType SourceCol_pids (getColMap ());
8059 TargetRow_pids.putScalar (MyPID);
8060 if (! reverseMode && xferAsImport !=
nullptr) {
8061 SourceRow_pids.doExport (TargetRow_pids, *xferAsImport, INSERT);
8063 else if (reverseMode && xferAsExport !=
nullptr) {
8064 SourceRow_pids.doExport (TargetRow_pids, *xferAsExport, INSERT);
8066 else if (! reverseMode && xferAsExport !=
nullptr) {
8067 SourceRow_pids.doImport (TargetRow_pids, *xferAsExport, INSERT);
8069 else if (reverseMode && xferAsImport !=
nullptr) {
8070 SourceRow_pids.doImport (TargetRow_pids, *xferAsImport, INSERT);
8073 TEUCHOS_TEST_FOR_EXCEPTION(
8074 true, std::logic_error,
"Tpetra::CrsMatrix::"
8075 "transferAndFillComplete: Should never get here! "
8076 "Please report this bug to a Tpetra developer.");
8079 SourceCol_pids.doImport (SourceRow_pids, *MyImporter, INSERT);
8080 SourcePids.resize (getColMap ()->getLocalNumElements ());
8081 SourceCol_pids.get1dCopy (SourcePids ());
8084 TEUCHOS_TEST_FOR_EXCEPTION(
8085 true, std::invalid_argument,
"Tpetra::CrsMatrix::"
8086 "transferAndFillComplete: This method only allows either domainMap == "
8087 "getDomainMap (), or (domainMap == rowTransfer.getTargetMap () and "
8088 "getDomainMap () == getRowMap ()).");
8092 size_t constantNumPackets = destMat->constantNumberOfPackets ();
8093 if (constantNumPackets == 0) {
8094 destMat->reallocArraysForNumPacketsPerLid (ExportLIDs.size (),
8095 RemoteLIDs.size ());
8102 const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
8103 destMat->reallocImportsIfNeeded (rbufLen,
false,
nullptr);
8108 using Teuchos::outArg;
8109 using Teuchos::REDUCE_MAX;
8110 using Teuchos::reduceAll;
8113 RCP<const Teuchos::Comm<int> > comm = this->getComm ();
8114 const int myRank = comm->getRank ();
8116 std::ostringstream errStrm;
8120 Teuchos::ArrayView<size_t> numExportPacketsPerLID;
8123 destMat->numExportPacketsPerLID_.modify_host ();
8124 numExportPacketsPerLID =
8127 catch (std::exception& e) {
8128 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw: "
8129 << e.what () << std::endl;
8133 errStrm <<
"Proc " << myRank <<
": getArrayViewFromDualView threw "
8134 "an exception not a subclass of std::exception" << std::endl;
8138 if (! comm.is_null ()) {
8139 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8143 TEUCHOS_TEST_FOR_EXCEPTION(
8144 true, std::runtime_error,
"getArrayViewFromDualView threw an "
8145 "exception on at least one process.");
8149 std::ostringstream os;
8150 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8152 std::cerr << os.str ();
8157 numExportPacketsPerLID,
8160 constantNumPackets);
8162 catch (std::exception& e) {
8163 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw: "
8164 << e.what () << std::endl;
8168 errStrm <<
"Proc " << myRank <<
": packCrsMatrixWithOwningPIDs threw "
8169 "an exception not a subclass of std::exception" << std::endl;
8174 std::ostringstream os;
8175 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8177 std::cerr << os.str ();
8180 if (! comm.is_null ()) {
8181 reduceAll<int, int> (*comm, REDUCE_MAX, lclErr, outArg (gblErr));
8185 TEUCHOS_TEST_FOR_EXCEPTION(
8186 true, std::runtime_error,
"packCrsMatrixWithOwningPIDs threw an "
8187 "exception on at least one process.");
8192 destMat->numExportPacketsPerLID_.modify_host ();
8193 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
8196 std::ostringstream os;
8197 os << *verbosePrefix <<
"Calling packCrsMatrixWithOwningPIDs"
8199 std::cerr << os.str ();
8203 numExportPacketsPerLID,
8206 constantNumPackets);
8208 std::ostringstream os;
8209 os << *verbosePrefix <<
"Done with packCrsMatrixWithOwningPIDs"
8211 std::cerr << os.str ();
8216 if (! communication_needed) {
8218 std::ostringstream os;
8219 os << *verbosePrefix <<
"Communication not needed" << std::endl;
8220 std::cerr << os.str ();
8225 if (constantNumPackets == 0) {
8227 std::ostringstream os;
8228 os << *verbosePrefix <<
"Reverse mode, variable # packets / LID"
8230 std::cerr << os.str ();
8235 destMat->numExportPacketsPerLID_.sync_host ();
8236 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8238 destMat->numImportPacketsPerLID_.sync_host ();
8239 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8243 std::ostringstream os;
8244 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8246 std::cerr << os.str ();
8248 Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8249 destMat->numImportPacketsPerLID_.view_host());
8251 std::ostringstream os;
8252 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8254 std::cerr << os.str ();
8257 size_t totalImportPackets = 0;
8258 for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8259 totalImportPackets += numImportPacketsPerLID[i];
8264 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8265 verbosePrefix.get ());
8266 destMat->imports_.modify_host ();
8267 auto hostImports = destMat->imports_.view_host();
8270 destMat->exports_.sync_host ();
8271 auto hostExports = destMat->exports_.view_host();
8273 std::ostringstream os;
8274 os << *verbosePrefix <<
"Calling 4-arg doReversePostsAndWaits"
8276 std::cerr << os.str ();
8278 Distor.doReversePostsAndWaits (hostExports,
8279 numExportPacketsPerLID,
8281 numImportPacketsPerLID);
8283 std::ostringstream os;
8284 os << *verbosePrefix <<
"Finished 4-arg doReversePostsAndWaits"
8286 std::cerr << os.str ();
8291 std::ostringstream os;
8292 os << *verbosePrefix <<
"Reverse mode, constant # packets / LID"
8294 std::cerr << os.str ();
8296 destMat->imports_.modify_host ();
8297 auto hostImports = destMat->imports_.view_host();
8300 destMat->exports_.sync_host ();
8301 auto hostExports = destMat->exports_.view_host();
8303 std::ostringstream os;
8304 os << *verbosePrefix <<
"Calling 3-arg doReversePostsAndWaits"
8306 std::cerr << os.str ();
8308 Distor.doReversePostsAndWaits (hostExports,
8312 std::ostringstream os;
8313 os << *verbosePrefix <<
"Finished 3-arg doReversePostsAndWaits"
8315 std::cerr << os.str ();
8320 if (constantNumPackets == 0) {
8322 std::ostringstream os;
8323 os << *verbosePrefix <<
"Forward mode, variable # packets / LID"
8325 std::cerr << os.str ();
8330 destMat->numExportPacketsPerLID_.sync_host ();
8331 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
8333 destMat->numImportPacketsPerLID_.sync_host ();
8334 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
8337 std::ostringstream os;
8338 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8340 std::cerr << os.str ();
8342 Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1,
8343 destMat->numImportPacketsPerLID_.view_host());
8345 std::ostringstream os;
8346 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8348 std::cerr << os.str ();
8351 size_t totalImportPackets = 0;
8352 for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) {
8353 totalImportPackets += numImportPacketsPerLID[i];
8358 destMat->reallocImportsIfNeeded (totalImportPackets, verbose,
8359 verbosePrefix.get ());
8360 destMat->imports_.modify_host ();
8361 auto hostImports = destMat->imports_.view_host();
8364 destMat->exports_.sync_host ();
8365 auto hostExports = destMat->exports_.view_host();
8367 std::ostringstream os;
8368 os << *verbosePrefix <<
"Calling 4-arg doPostsAndWaits"
8370 std::cerr << os.str ();
8372 Distor.doPostsAndWaits (hostExports,
8373 numExportPacketsPerLID,
8375 numImportPacketsPerLID);
8377 std::ostringstream os;
8378 os << *verbosePrefix <<
"Finished 4-arg doPostsAndWaits"
8380 std::cerr << os.str ();
8385 std::ostringstream os;
8386 os << *verbosePrefix <<
"Forward mode, constant # packets / LID"
8388 std::cerr << os.str ();
8390 destMat->imports_.modify_host ();
8391 auto hostImports = destMat->imports_.view_host();
8394 destMat->exports_.sync_host ();
8395 auto hostExports = destMat->exports_.view_host();
8397 std::ostringstream os;
8398 os << *verbosePrefix <<
"Calling 3-arg doPostsAndWaits"
8400 std::cerr << os.str ();
8402 Distor.doPostsAndWaits (hostExports,
8406 std::ostringstream os;
8407 os << *verbosePrefix <<
"Finished 3-arg doPostsAndWaits"
8409 std::cerr << os.str ();
8420 destMat->numImportPacketsPerLID_.sync_host ();
8421 Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
8423 destMat->imports_.sync_host ();
8424 Teuchos::ArrayView<const char> hostImports =
8428 std::ostringstream os;
8429 os << *verbosePrefix <<
"Calling unpackAndCombineWithOwningPIDsCount"
8431 std::cerr << os.str ();
8437 numImportPacketsPerLID,
8444 std::ostringstream os;
8445 os << *verbosePrefix <<
"unpackAndCombineWithOwningPIDsCount returned "
8446 << mynnz << std::endl;
8447 std::cerr << os.str ();
8449 size_t N = BaseRowMap->getLocalNumElements ();
8452 ArrayRCP<size_t> CSR_rowptr(N+1);
8453 ArrayRCP<GO> CSR_colind_GID;
8454 ArrayRCP<LO> CSR_colind_LID;
8455 ArrayRCP<Scalar> CSR_vals;
8456 CSR_colind_GID.resize (mynnz);
8457 CSR_vals.resize (mynnz);
8461 if (
typeid (LO) ==
typeid (GO)) {
8462 CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO> (CSR_colind_GID);
8465 CSR_colind_LID.resize (mynnz);
8469 std::ostringstream os;
8470 os << *verbosePrefix <<
"Calling unpackAndCombineIntoCrsArrays"
8472 std::cerr << os.str ();
8482 numImportPacketsPerLID,
8493 Teuchos::av_reinterpret_cast<impl_scalar_type> (CSR_vals ()),
8499 for(
size_t i=0; i<static_cast<size_t>(TargetPids.size()); i++)
8501 if(TargetPids[i] == -1) TargetPids[i] = MyPID;
8509 Teuchos::Array<int> RemotePids;
8511 std::ostringstream os;
8512 os << *verbosePrefix <<
"Calling lowCommunicationMakeColMapAndReindex"
8514 std::cerr << os.str ();
8516 Import_Util::lowCommunicationMakeColMapAndReindex (CSR_rowptr (),
8525 std::ostringstream os;
8526 os << *verbosePrefix <<
"restrictComm="
8527 << (restrictComm ?
"true" :
"false") << std::endl;
8528 std::cerr << os.str ();
8535 ReducedColMap = (MyRowMap.getRawPtr () == MyColMap.getRawPtr ()) ?
8537 MyColMap->replaceCommWithSubset (ReducedComm);
8538 MyColMap = ReducedColMap;
8543 std::ostringstream os;
8544 os << *verbosePrefix <<
"Calling replaceColMap" << std::endl;
8545 std::cerr << os.str ();
8547 destMat->replaceColMap (MyColMap);
8554 if (ReducedComm.is_null ()) {
8556 std::ostringstream os;
8557 os << *verbosePrefix <<
"I am no longer in the communicator; "
8558 "returning" << std::endl;
8559 std::cerr << os.str ();
8567 if ((! reverseMode && xferAsImport !=
nullptr) ||
8568 (reverseMode && xferAsExport !=
nullptr)) {
8570 std::ostringstream os;
8571 os << *verbosePrefix <<
"Calling sortCrsEntries" << endl;
8572 std::cerr << os.str ();
8574 Import_Util::sortCrsEntries (CSR_rowptr (),
8578 else if ((! reverseMode && xferAsExport !=
nullptr) ||
8579 (reverseMode && xferAsImport !=
nullptr)) {
8581 std::ostringstream os;
8582 os << *verbosePrefix <<
"Calling sortAndMergeCrsEntries"
8584 std::cerr << os.str();
8586 Import_Util::sortAndMergeCrsEntries (CSR_rowptr (),
8589 if (CSR_rowptr[N] != mynnz) {
8590 CSR_colind_LID.resize (CSR_rowptr[N]);
8591 CSR_vals.resize (CSR_rowptr[N]);
8595 TEUCHOS_TEST_FOR_EXCEPTION(
8596 true, std::logic_error,
"Tpetra::CrsMatrix::"
8597 "transferAndFillComplete: Should never get here! "
8598 "Please report this bug to a Tpetra developer.");
8605 std::ostringstream os;
8606 os << *verbosePrefix <<
"Calling destMat->setAllValues" << endl;
8607 std::cerr << os.str ();
8615 destMat->setAllValues (CSR_rowptr, CSR_colind_LID, CSR_vals);
8621 Teuchos::ParameterList esfc_params;
8623 RCP<import_type> MyImport;
8626 if (iallreduceRequest.get () !=
nullptr) {
8628 std::ostringstream os;
8629 os << *verbosePrefix <<
"Calling iallreduceRequest->wait()"
8631 std::cerr << os.str ();
8633 iallreduceRequest->wait ();
8634 if (reduced_mismatch != 0) {
8640#ifdef HAVE_TPETRA_MMM_TIMINGS
8641 Teuchos::TimeMonitor MMisMM (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM Block")));
8646 std::ostringstream os;
8647 os << *verbosePrefix <<
"Getting CRS pointers" << endl;
8648 std::cerr << os.str ();
8651 Teuchos::ArrayRCP<LocalOrdinal> type3LIDs;
8652 Teuchos::ArrayRCP<int> type3PIDs;
8653 auto rowptr = getCrsGraph()->getLocalRowPtrsHost();
8654 auto colind = getCrsGraph()->getLocalIndicesHost();
8657 std::ostringstream os;
8658 os << *verbosePrefix <<
"Calling reverseNeighborDiscovery" << std::endl;
8659 std::cerr << os.str ();
8663#ifdef HAVE_TPETRA_MMM_TIMINGS
8664 TimeMonitor tm_rnd (*TimeMonitor::getNewTimer(prefix + std::string(
"isMMrevNeighDis")));
8666 Import_Util::reverseNeighborDiscovery(*
this,
8678 std::ostringstream os;
8679 os << *verbosePrefix <<
"Done with reverseNeighborDiscovery" << std::endl;
8680 std::cerr << os.str ();
8683 Teuchos::ArrayView<const int> EPID1 = MyImporter.is_null() ? Teuchos::ArrayView<const int>() : MyImporter->getExportPIDs();
8684 Teuchos::ArrayView<const LO> ELID1 = MyImporter.is_null() ? Teuchos::ArrayView<const LO>() : MyImporter->getExportLIDs();
8686 Teuchos::ArrayView<const int> TEPID2 = rowTransfer.getExportPIDs();
8687 Teuchos::ArrayView<const LO> TELID2 = rowTransfer.getExportLIDs();
8689 const int numCols = getGraph()->getColMap()->getLocalNumElements();
8691 std::vector<bool> IsOwned(numCols,
true);
8692 std::vector<int> SentTo(numCols,-1);
8693 if (! MyImporter.is_null ()) {
8694 for (
auto && rlid : MyImporter->getRemoteLIDs()) {
8695 IsOwned[rlid]=
false;
8699 std::vector<std::pair<int,GO> > usrtg;
8700 usrtg.reserve(TEPID2.size());
8703 const auto& colMap = * (this->getColMap ());
8704 for (Array_size_type i = 0; i < TEPID2.size (); ++i) {
8705 const LO row = TELID2[i];
8706 const int pid = TEPID2[i];
8707 for (
auto j = rowptr[row]; j < rowptr[row+1]; ++j) {
8708 const int col = colind[j];
8709 if (IsOwned[col] && SentTo[col] != pid) {
8711 GO gid = colMap.getGlobalElement (col);
8712 usrtg.push_back (std::pair<int,GO> (pid, gid));
8719 std::sort(usrtg.begin(),usrtg.end());
8720 auto eopg = std ::unique(usrtg.begin(),usrtg.end());
8722 usrtg.erase(eopg,usrtg.end());
8725 Teuchos::ArrayRCP<int> EPID2=Teuchos::arcp(
new int[type2_us_size],0,type2_us_size,
true);
8726 Teuchos::ArrayRCP< LO> ELID2=Teuchos::arcp(
new LO[type2_us_size],0,type2_us_size,
true);
8729 for(
auto && p : usrtg) {
8730 EPID2[pos]= p.first;
8731 ELID2[pos]= this->getDomainMap()->getLocalElement(p.second);
8735 Teuchos::ArrayView<int> EPID3 = type3PIDs();
8736 Teuchos::ArrayView< LO> ELID3 = type3LIDs();
8737 GO InfGID = std::numeric_limits<GO>::max();
8738 int InfPID = INT_MAX;
8742#define TPETRA_MIN3(x,y,z) ((x)<(y)?(std::min(x,z)):(std::min(y,z)))
8743 int i1=0, i2=0, i3=0;
8744 int Len1 = EPID1.size();
8745 int Len2 = EPID2.size();
8746 int Len3 = EPID3.size();
8748 int MyLen=Len1+Len2+Len3;
8749 Teuchos::ArrayRCP<LO> userExportLIDs = Teuchos::arcp(
new LO[MyLen],0,MyLen,
true);
8750 Teuchos::ArrayRCP<int> userExportPIDs = Teuchos::arcp(
new int[MyLen],0,MyLen,
true);
8753 while(i1 < Len1 || i2 < Len2 || i3 < Len3){
8754 int PID1 = (i1<Len1)?(EPID1[i1]):InfPID;
8755 int PID2 = (i2<Len2)?(EPID2[i2]):InfPID;
8756 int PID3 = (i3<Len3)?(EPID3[i3]):InfPID;
8758 GO GID1 = (i1<Len1)?getDomainMap()->getGlobalElement(ELID1[i1]):InfGID;
8759 GO GID2 = (i2<Len2)?getDomainMap()->getGlobalElement(ELID2[i2]):InfGID;
8760 GO GID3 = (i3<Len3)?getDomainMap()->getGlobalElement(ELID3[i3]):InfGID;
8762 int MIN_PID = TPETRA_MIN3(PID1,PID2,PID3);
8763 GO MIN_GID = TPETRA_MIN3( ((PID1==MIN_PID)?GID1:InfGID), ((PID2==MIN_PID)?GID2:InfGID), ((PID3==MIN_PID)?GID3:InfGID));
8767 bool added_entry=
false;
8769 if(PID1 == MIN_PID && GID1 == MIN_GID){
8770 userExportLIDs[iloc]=ELID1[i1];
8771 userExportPIDs[iloc]=EPID1[i1];
8776 if(PID2 == MIN_PID && GID2 == MIN_GID){
8778 userExportLIDs[iloc]=ELID2[i2];
8779 userExportPIDs[iloc]=EPID2[i2];
8785 if(PID3 == MIN_PID && GID3 == MIN_GID){
8787 userExportLIDs[iloc]=ELID3[i3];
8788 userExportPIDs[iloc]=EPID3[i3];
8796 std::ostringstream os;
8797 os << *verbosePrefix <<
"Create Import" << std::endl;
8798 std::cerr << os.str ();
8801#ifdef HAVE_TPETRA_MMM_TIMINGS
8802 auto ismmIctor(*TimeMonitor::getNewTimer(prefix + std::string(
"isMMIportCtor")));
8804 Teuchos::RCP<Teuchos::ParameterList> plist = rcp(
new Teuchos::ParameterList());
8806 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8807 MyImport = rcp (
new import_type (MyDomainMap,
8810 userExportLIDs.view(0,iloc).getConst(),
8811 userExportPIDs.view(0,iloc).getConst(),
8816 std::ostringstream os;
8817 os << *verbosePrefix <<
"Call expertStaticFillComplete" << std::endl;
8818 std::cerr << os.str ();
8822#ifdef HAVE_TPETRA_MMM_TIMINGS
8823 TimeMonitor esfc (*TimeMonitor::getNewTimer(prefix + std::string(
"isMM::destMat->eSFC")));
8824 esfc_params.set(
"Timer Label",label+std::string(
"isMM eSFC"));
8826 if(!params.is_null())
8827 esfc_params.set(
"compute global constants",params->get(
"compute global constants",
true));
8828 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap, MyImport,Teuchos::null,rcp(
new Teuchos::ParameterList(esfc_params)));
8834#ifdef HAVE_TPETRA_MMM_TIMINGS
8835 TimeMonitor MMnotMMblock (*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMblock")));
8838 std::ostringstream os;
8839 os << *verbosePrefix <<
"Create Import" << std::endl;
8840 std::cerr << os.str ();
8843#ifdef HAVE_TPETRA_MMM_TIMINGS
8844 TimeMonitor notMMIcTor(*TimeMonitor::getNewTimer(prefix + std::string(
"TAFC notMMCreateImporter")));
8846 Teuchos::RCP<Teuchos::ParameterList> mypars = rcp(
new Teuchos::ParameterList);
8847 mypars->set(
"Timer Label",
"notMMFrom_tAFC");
8848 if ((MyDomainMap != MyColMap) && (!MyDomainMap->isSameAs(*MyColMap)))
8849 MyImport = rcp (
new import_type (MyDomainMap, MyColMap, RemotePids, mypars));
8852 std::ostringstream os;
8853 os << *verbosePrefix <<
"Call expertStaticFillComplete" << endl;
8854 std::cerr << os.str ();
8857#ifdef HAVE_TPETRA_MMM_TIMINGS
8858 TimeMonitor esfcnotmm(*TimeMonitor::getNewTimer(prefix + std::string(
"notMMdestMat->expertStaticFillComplete")));
8859 esfc_params.set(
"Timer Label",prefix+std::string(
"notMM eSFC"));
8861 esfc_params.set(
"Timer Label",std::string(
"notMM eSFC"));
8864 if (!params.is_null ()) {
8865 esfc_params.set (
"compute global constants",
8866 params->get (
"compute global constants",
true));
8868 destMat->expertStaticFillComplete (MyDomainMap, MyRangeMap,
8869 MyImport, Teuchos::null,
8870 rcp (
new Teuchos::ParameterList (esfc_params)));
8874 std::ostringstream os;
8875 os << *verbosePrefix <<
"Done" << endl;
8876 std::cerr << os.str ();
8881 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8886 const Teuchos::RCP<const map_type>& domainMap,
8887 const Teuchos::RCP<const map_type>& rangeMap,
8888 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8890 transferAndFillComplete (destMatrix, importer, Teuchos::null, domainMap, rangeMap, params);
8893 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8899 const Teuchos::RCP<const map_type>& domainMap,
8900 const Teuchos::RCP<const map_type>& rangeMap,
8901 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8903 transferAndFillComplete (destMatrix, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
8906 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8911 const Teuchos::RCP<const map_type>& domainMap,
8912 const Teuchos::RCP<const map_type>& rangeMap,
8913 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8915 transferAndFillComplete (destMatrix, exporter, Teuchos::null, domainMap, rangeMap, params);
8918 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
8924 const Teuchos::RCP<const map_type>& domainMap,
8925 const Teuchos::RCP<const map_type>& rangeMap,
8926 const Teuchos::RCP<Teuchos::ParameterList>& params)
const
8928 transferAndFillComplete (destMatrix, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
8939#define TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR,LO,GO,NODE) \
8941 template class CrsMatrix< SCALAR , LO , GO , NODE >;
8943#define TPETRA_CRSMATRIX_CONVERT_INSTANT(SO,SI,LO,GO,NODE) \
8945 template Teuchos::RCP< CrsMatrix< SO , LO , GO , NODE > > \
8946 CrsMatrix< SI , LO , GO , NODE >::convert< SO > () const;
8948#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8950 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
8951 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
8952 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8953 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8954 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& importer, \
8955 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8956 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8957 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
8958 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8959 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8960 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
8961 const Teuchos::RCP<Teuchos::ParameterList>& params);
8963#define TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8965 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
8966 importAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
8967 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8968 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8969 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowImporter, \
8970 const Import<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8971 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8972 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainImporter, \
8973 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8974 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8975 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
8976 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8977 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8978 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
8979 const Teuchos::RCP<Teuchos::ParameterList>& params);
8982#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
8984 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
8985 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
8986 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8987 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8988 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& exporter, \
8989 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8990 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8991 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
8992 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
8993 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
8994 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
8995 const Teuchos::RCP<Teuchos::ParameterList>& params);
8997#define TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
8999 Teuchos::RCP<CrsMatrix<SCALAR, LO, GO, NODE> > \
9000 exportAndFillCompleteCrsMatrix (const Teuchos::RCP<const CrsMatrix<SCALAR, LO, GO, NODE> >& sourceMatrix, \
9001 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9002 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9003 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& rowExporter, \
9004 const Export<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9005 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9006 CrsMatrix<SCALAR, LO, GO, NODE>::node_type>& domainExporter, \
9007 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9008 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9009 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& domainMap, \
9010 const Teuchos::RCP<const Map<CrsMatrix<SCALAR, LO, GO, NODE>::local_ordinal_type, \
9011 CrsMatrix<SCALAR, LO, GO, NODE>::global_ordinal_type, \
9012 CrsMatrix<SCALAR, LO, GO, NODE>::node_type> >& rangeMap, \
9013 const Teuchos::RCP<Teuchos::ParameterList>& params);
9016#define TPETRA_CRSMATRIX_INSTANT(SCALAR, LO, GO ,NODE) \
9017 TPETRA_CRSMATRIX_MATRIX_INSTANT(SCALAR, LO, GO, NODE) \
9018 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9019 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT(SCALAR, LO, GO, NODE) \
9020 TPETRA_CRSMATRIX_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE) \
9021 TPETRA_CRSMATRIX_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(SCALAR, LO, GO, NODE)
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Declare and define Tpetra::Details::copyConvert, an implementation detail of Tpetra (in particular,...
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary,...
Functions for manipulating CRS arrays.
Declaration of a function that prints strings from each process.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Declaration of Tpetra::Details::iallreduce.
Declaration and definition of Tpetra::Details::leftScaleLocalCrsMatrix.
KOKKOS_FUNCTION size_t packRow(const LocalMapType &col_map, const Kokkos::View< Packet *, BufferDeviceType > &exports, const InputLidsType &lids_in, const InputPidsType &pids_in, const size_t offset, const size_t num_ent, const bool pack_pids)
Packs a single row of the CrsGraph.
Declaration and definition of Tpetra::Details::rightScaleLocalCrsMatrix.
KOKKOS_FUNCTION int unpackRow(const Kokkos::View< GO *, Device, Kokkos::MemoryUnmanaged > &gids_out, const Kokkos::View< int *, Device, Kokkos::MemoryUnmanaged > &pids_out, const Kokkos::View< const Packet *, BufferDevice > &imports, const size_t offset, const size_t num_ent)
Unpack a single row of a CrsGraph.
Utility functions for packing and unpacking sparse matrix entries.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
#define TPETRA_ABUSE_WARNING(throw_exception_test, Exception, msg)
Handle an abuse warning, according to HAVE_TPETRA_THROW_ABUSE_WARNINGS and HAVE_TPETRA_PRINT_ABUSE_WA...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
void reindexColumns(const Teuchos::RCP< const map_type > &newColMap, const Teuchos::RCP< const import_type > &newImport=Teuchos::null, const bool sortIndicesInEachRow=true)
Reindex the column indices in place, and replace the column Map. Optionally, replace the Import objec...
global_inds_dualv_type::t_host::const_type getGlobalIndsViewHost(const RowInfo &rowinfo) const
Get a const, globally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myR...
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Get the number of entries in the given row (local index).
local_inds_wdv_type lclIndsUnpacked_wdv
Local ordinals of column indices for all rows Valid when isLocallyIndexed is true If OptimizedStorage...
RowInfo getRowInfoFromGlobalRowIndex(const global_ordinal_type gblRow) const
Get information about the locally owned row with global index gblRow.
size_t findGlobalIndices(const RowInfo &rowInfo, const Teuchos::ArrayView< const global_ordinal_type > &indices, std::function< void(const size_t, const size_t, const size_t)> fun) const
Finds indices in the given row.
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
RowInfo getRowInfo(const local_ordinal_type myRow) const
Get information about the locally owned row with local index myRow.
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
bool isFillComplete() const override
Whether fillComplete() has been called and the graph is in compute mode.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
local_inds_dualv_type::t_host::const_type getLocalIndsViewHost(const RowInfo &rowinfo) const
Get a const, locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(myRo...
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
size_t insertGlobalIndicesImpl(const local_ordinal_type lclRow, const global_ordinal_type inputGblColInds[], const size_t numInputInds)
Insert global indices, using an input local row index.
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
local_inds_dualv_type::t_host getLocalIndsViewHostNonConst(const RowInfo &rowinfo)
Get a ReadWrite locally indexed view of the locally owned row myRow, such that rowinfo = getRowInfo(m...
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
bool isGloballyIndexed() const override
Whether the graph's column indices are stored as global indices.
bool isLocallyIndexed() const override
Whether the graph's column indices are stored as local indices.
size_t getLocalNumRows() const override
Returns the number of graph rows owned on the calling node.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries.
bool isGloballyIndexed() const override
Whether the matrix is globally indexed on the calling process.
std::map< GlobalOrdinal, std::pair< Teuchos::Array< GlobalOrdinal >, Teuchos::Array< Scalar > > > nonlocals_
Nonlocal data added using insertGlobalValues().
Details::EStorageStatus storageStatus_
Status of the matrix's storage, when not in a fill-complete state.
typename device_type::execution_space execution_space
The Kokkos execution space.
CrsGraph< LocalOrdinal, GlobalOrdinal, Node > crs_graph_type
The CrsGraph specialization suitable for this CrsMatrix specialization.
bool haveGlobalConstants() const
Returns true if globalConstants have been computed; false otherwise.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, over all processes in the matrix's communicator.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Number of entries in the sparse matrix in the given global row, on the calling (MPI) process.
GlobalOrdinal global_ordinal_type
The type of each global index in the matrix.
size_t getLocalNumCols() const override
The number of columns connected to the locally owned rows of this matrix.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
bool hasColMap() const override
Whether the matrix has a well-defined column Map.
size_t getNumEntriesInLocalRow(local_ordinal_type localRow) const override
Number of entries in the sparse matrix in the given local row, on the calling (MPI) process.
Teuchos::RCP< MV > exportMV_
Row Map MultiVector used in apply().
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
bool isFillActive() const
Whether the matrix is not fill complete.
global_size_t getGlobalNumCols() const override
The number of global columns in the matrix.
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
Teuchos::RCP< MV > importMV_
Column Map MultiVector used in apply().
size_t getLocalNumEntries() const override
The local number of entries in this matrix.
typename Node::device_type device_type
The Kokkos device type.
bool fillComplete_
Whether the matrix is fill complete.
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
size_t getLocalMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
void checkInternalState() const
Check that this object's state is sane; throw if it's not.
GlobalOrdinal getIndexBase() const override
The index base for global indices for this matrix.
Scalar scalar_type
The type of each entry in the matrix.
LocalOrdinal local_ordinal_type
The type of each local index in the matrix.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
typename Kokkos::ArithTraits< impl_scalar_type >::mag_type mag_type
Type of a norm result.
global_size_t getGlobalNumEntries() const override
The global number of entries in this matrix.
size_t getLocalNumRows() const override
The number of matrix rows owned by the calling process.
bool isFillComplete() const override
Whether the matrix is fill complete.
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
typename row_matrix_type::impl_scalar_type impl_scalar_type
The type used internally in place of Scalar.
local_matrix_device_type::values_type::const_type getLocalValuesDevice(Access::ReadOnlyStruct s) const
Get the Kokkos local values on device, read only.
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Resume operations that may change the values or structure of the matrix.
bool isStorageOptimized() const
Returns true if storage has been optimized.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t rowImbalanceThreshold()
Threshold for deciding if a local matrix is "imbalanced" in the number of entries per row....
bool isLocallyComplete() const
Is this Export or Import locally complete?
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, const CombineMode CM, const bool restrictedMode=false)
Export data into this object using an Export object ("forward mode").
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
bool isDistributed() const
Whether this is a globally distributed object.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
A parallel distribution of indices over processes.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
One or more distributed dense vectors.
void reduce()
Sum values of a locally replicated multivector across all processes.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
size_t getLocalLength() const
Local number of rows on the calling process.
size_t getNumVectors() const
Number of columns in the multivector.
dual_view_type::t_dev::const_type getLocalViewDevice(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on device. This requires that th...
dual_view_type::t_host::const_type getLocalViewHost(Access::ReadOnlyStruct) const
Return a read-only, up-to-date view of this MultiVector's local data on host. This requires that ther...
bool isConstantStride() const
Whether this multivector has constant stride between columns.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRangeMap() const =0
The Map associated with the range of this operator, which must be compatible with Y....
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getDomainMap() const =0
The Map associated with the domain of this operator, which must be compatible with X....
A read-only, row-oriented interface to a sparse matrix.
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes the distribution of rows over processes.
virtual void getGlobalRowCopy(GlobalOrdinal GlobalRow, nonconst_global_inds_host_view_type &Indices, nonconst_values_host_view_type &Values, size_t &NumEntries) const =0
Get a copy of the given global row's entries.
virtual size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const =0
The current number of entries on the calling process in the specified local row.
Abstract base class for objects that can be the source of an Import or Export operation.
A distributed dense vector.
Implementation details of Tpetra.
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices_wdv, const Padding &padding, const int my_rank, const bool verbose)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
void packCrsMatrixWithOwningPIDs(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, Kokkos::DualView< char *, typename DistObject< char, LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets)
Pack specified entries of the given local sparse matrix for communication.
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
size_t global_size_t
Global size_t object.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
void merge2(IT1 &indResultOut, IT2 &valResultOut, IT1 indBeg, IT1 indEnd, IT2 valBeg, IT2)
Merge values in place, additively, with the same index.
CombineMode
Rule for combining data in an Import or Export.
@ REPLACE
Replace existing values with new values.
@ ABSMAX
Replace old value with maximum of magnitudes of old and new values.
@ ADD_ASSIGN
Accumulate new values into existing values (may not be supported in all classes)
@ INSERT
Insert new values that don't currently exist.
@ ZERO
Replace old values with zero.
Functor for the the ABSMAX CombineMode of Import and Export operations.
Scalar operator()(const Scalar &x, const Scalar &y)
Return the maximum of the magnitudes (absolute values) of x and y.
Traits class for packing / unpacking data of type T.
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.