Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Map_def.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// ************************************************************************
38// @HEADER
39
44
45#ifndef TPETRA_MAP_DEF_HPP
46#define TPETRA_MAP_DEF_HPP
47
48#include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
50#include "Tpetra_Details_FixedHashTable.hpp"
53#include "Tpetra_Core.hpp"
54#include "Tpetra_Util.hpp"
55#include "Teuchos_as.hpp"
56#include "Teuchos_TypeNameTraits.hpp"
57#include "Teuchos_CommHelpers.hpp"
58#include "Tpetra_Details_mpiIsInitialized.hpp"
59#include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
61#include <memory>
62#include <sstream>
63#include <stdexcept>
64#include <typeinfo>
65
66namespace { // (anonymous)
67
68 void
69 checkMapInputArray (const char ctorName[],
70 const void* indexList,
71 const size_t indexListSize,
72 const Teuchos::Comm<int>* const comm)
73 {
75
76 const bool debug = Behavior::debug("Map");
77 if (debug) {
78 using Teuchos::outArg;
79 using Teuchos::REDUCE_MIN;
80 using Teuchos::reduceAll;
81 using std::endl;
82
83 const int myRank = comm == nullptr ? 0 : comm->getRank ();
84 const bool verbose = Behavior::verbose("Map");
85 std::ostringstream lclErrStrm;
86 int lclSuccess = 1;
87
88 if (indexListSize != 0 && indexList == nullptr) {
89 lclSuccess = 0;
90 if (verbose) {
91 lclErrStrm << "Proc " << myRank << ": indexList is null, "
92 "but indexListSize=" << indexListSize << " != 0." << endl;
93 }
94 }
95 int gblSuccess = 0; // output argument
96 reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
97 if (gblSuccess != 1) {
98 std::ostringstream gblErrStrm;
99 gblErrStrm << "Tpetra::Map constructor " << ctorName <<
100 " detected a problem with the input array "
101 "(raw array, Teuchos::ArrayView, or Kokkos::View) "
102 "of global indices." << endl;
103 if (verbose) {
104 using ::Tpetra::Details::gathervPrint;
105 gathervPrint (gblErrStrm, lclErrStrm.str (), *comm);
106 }
107 TEUCHOS_TEST_FOR_EXCEPTION
108 (true, std::invalid_argument, gblErrStrm.str ());
109 }
110 }
111 }
112} // namespace (anonymous)
113
114namespace Tpetra {
115
116 template <class LocalOrdinal, class GlobalOrdinal, class Node>
118 Map () :
119 comm_ (new Teuchos::SerialComm<int> ()),
120 indexBase_ (0),
121 numGlobalElements_ (0),
122 numLocalElements_ (0),
123 minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
124 maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
125 minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
126 maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
127 firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
128 lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
129 uniform_ (false), // trivially
130 contiguous_ (false),
131 distributed_ (false), // no communicator yet
132 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
133 {
135 }
136
137
138 template <class LocalOrdinal, class GlobalOrdinal, class Node>
140 Map (const global_size_t numGlobalElements,
141 const global_ordinal_type indexBase,
142 const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
143 const LocalGlobal lOrG) :
144 comm_ (comm),
145 uniform_ (true),
146 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
147 {
148 using Teuchos::as;
149 using Teuchos::broadcast;
150 using Teuchos::outArg;
151 using Teuchos::reduceAll;
152 using Teuchos::REDUCE_MIN;
153 using Teuchos::REDUCE_MAX;
154 using Teuchos::typeName;
155 using std::endl;
156 using GO = global_ordinal_type;
157 using GST = global_size_t;
158 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
159 const char funcName[] = "Map(gblNumInds,indexBase,comm,LG)";
160 const char exPfx[] =
161 "Tpetra::Map::Map(gblNumInds,indexBase,comm,LG): ";
162
163 const bool debug = Details::Behavior::debug("Map");
164 const bool verbose = Details::Behavior::verbose("Map");
165 std::unique_ptr<std::string> prefix;
166 if (verbose) {
167 prefix = Details::createPrefix(
168 comm_.getRawPtr(), "Map", funcName);
169 std::ostringstream os;
170 os << *prefix << "Start" << endl;
171 std::cerr << os.str();
172 }
174
175 // In debug mode only, check whether numGlobalElements and
176 // indexBase are the same over all processes in the communicator.
177 if (debug) {
178 GST proc0NumGlobalElements = numGlobalElements;
179 broadcast(*comm_, 0, outArg(proc0NumGlobalElements));
180 GST minNumGlobalElements = numGlobalElements;
181 GST maxNumGlobalElements = numGlobalElements;
182 reduceAll(*comm, REDUCE_MIN, numGlobalElements,
183 outArg(minNumGlobalElements));
184 reduceAll(*comm, REDUCE_MAX, numGlobalElements,
185 outArg(maxNumGlobalElements));
186 TEUCHOS_TEST_FOR_EXCEPTION
187 (minNumGlobalElements != maxNumGlobalElements ||
188 numGlobalElements != minNumGlobalElements,
189 std::invalid_argument, exPfx << "All processes must "
190 "provide the same number of global elements. Process 0 set "
191 "numGlobalElements="<< proc0NumGlobalElements << ". The "
192 "calling process " << comm->getRank() << " set "
193 "numGlobalElements=" << numGlobalElements << ". The min "
194 "and max values over all processes are "
195 << minNumGlobalElements << " resp. " << maxNumGlobalElements
196 << ".");
197
198 GO proc0IndexBase = indexBase;
199 broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
200 GO minIndexBase = indexBase;
201 GO maxIndexBase = indexBase;
202 reduceAll(*comm, REDUCE_MIN, indexBase, outArg(minIndexBase));
203 reduceAll(*comm, REDUCE_MAX, indexBase, outArg(maxIndexBase));
204 TEUCHOS_TEST_FOR_EXCEPTION
205 (minIndexBase != maxIndexBase || indexBase != minIndexBase,
206 std::invalid_argument, exPfx << "All processes must "
207 "provide the same indexBase argument. Process 0 set "
208 "indexBase=" << proc0IndexBase << ". The calling process "
209 << comm->getRank() << " set indexBase=" << indexBase
210 << ". The min and max values over all processes are "
211 << minIndexBase << " resp. " << maxIndexBase << ".");
212 }
213
214 // Distribute the elements across the processes in the given
215 // communicator so that global IDs (GIDs) are
216 //
217 // - Nonoverlapping (only one process owns each GID)
218 // - Contiguous (the sequence of GIDs is nondecreasing, and no two
219 // adjacent GIDs differ by more than one)
220 // - As evenly distributed as possible (the numbers of GIDs on two
221 // different processes do not differ by more than one)
222
223 // All processes have the same numGlobalElements, but we still
224 // need to check that it is valid. numGlobalElements must be
225 // positive and not the "invalid" value (GSTI).
226 //
227 // This comparison looks funny, but it avoids compiler warnings
228 // for comparing unsigned integers (numGlobalElements_in is a
229 // GST, which is unsigned) while still working if we
230 // later decide to make GST signed.
231 TEUCHOS_TEST_FOR_EXCEPTION(
232 (numGlobalElements < 1 && numGlobalElements != 0),
233 std::invalid_argument, exPfx << "numGlobalElements (= "
234 << numGlobalElements << ") must be nonnegative.");
235
236 TEUCHOS_TEST_FOR_EXCEPTION
237 (numGlobalElements == GSTI, std::invalid_argument, exPfx <<
238 "You provided numGlobalElements = Teuchos::OrdinalTraits<"
239 "Tpetra::global_size_t>::invalid(). This version of the "
240 "constructor requires a valid value of numGlobalElements. "
241 "You probably mistook this constructor for the \"contiguous "
242 "nonuniform\" constructor, which can compute the global "
243 "number of elements for you if you set numGlobalElements to "
244 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid().");
245
246 size_t numLocalElements = 0; // will set below
247 if (lOrG == GloballyDistributed) {
248 // Compute numLocalElements:
249 //
250 // If numGlobalElements == numProcs * B + remainder,
251 // then Proc r gets B+1 elements if r < remainder,
252 // and B elements if r >= remainder.
253 //
254 // This strategy is valid for any value of numGlobalElements and
255 // numProcs, including the following border cases:
256 // - numProcs == 1
257 // - numLocalElements < numProcs
258 //
259 // In the former case, remainder == 0 && numGlobalElements ==
260 // numLocalElements. In the latter case, remainder ==
261 // numGlobalElements && numLocalElements is either 0 or 1.
262 const GST numProcs = static_cast<GST> (comm_->getSize ());
263 const GST myRank = static_cast<GST> (comm_->getRank ());
264 const GST quotient = numGlobalElements / numProcs;
265 const GST remainder = numGlobalElements - quotient * numProcs;
266
267 GO startIndex;
268 if (myRank < remainder) {
269 numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
270 // myRank was originally an int, so it should never overflow
271 // reasonable GO types.
272 startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
273 } else {
274 numLocalElements = as<size_t> (quotient);
275 startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
276 as<GO> (remainder);
277 }
278
279 minMyGID_ = indexBase + startIndex;
280 maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
281 minAllGID_ = indexBase;
282 maxAllGID_ = indexBase + numGlobalElements - 1;
283 distributed_ = (numProcs > 1);
284 }
285 else { // lOrG == LocallyReplicated
286 numLocalElements = as<size_t> (numGlobalElements);
287 minMyGID_ = indexBase;
288 maxMyGID_ = indexBase + numGlobalElements - 1;
289 distributed_ = false;
290 }
291
292 minAllGID_ = indexBase;
293 maxAllGID_ = indexBase + numGlobalElements - 1;
294 indexBase_ = indexBase;
295 numGlobalElements_ = numGlobalElements;
296 numLocalElements_ = numLocalElements;
297 firstContiguousGID_ = minMyGID_;
298 lastContiguousGID_ = maxMyGID_;
299 contiguous_ = true;
300
301 // Create the Directory on demand in getRemoteIndexList().
302 //setupDirectory ();
303
304 if (verbose) {
305 std::ostringstream os;
306 os << *prefix << "Done" << endl;
307 std::cerr << os.str();
308 }
309 }
310
311
312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
314 Map (const global_size_t numGlobalElements,
315 const size_t numLocalElements,
316 const global_ordinal_type indexBase,
317 const Teuchos::RCP<const Teuchos::Comm<int> > &comm) :
318 comm_ (comm),
319 uniform_ (false),
320 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
321 {
322 using Teuchos::as;
323 using Teuchos::broadcast;
324 using Teuchos::outArg;
325 using Teuchos::reduceAll;
326 using Teuchos::REDUCE_MIN;
327 using Teuchos::REDUCE_MAX;
328 using Teuchos::REDUCE_SUM;
329 using Teuchos::scan;
330 using std::endl;
331 using GO = global_ordinal_type;
332 using GST = global_size_t;
333 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
334 const char funcName[] =
335 "Map(gblNumInds,lclNumInds,indexBase,comm)";
336 const char exPfx[] =
337 "Tpetra::Map::Map(gblNumInds,lclNumInds,indexBase,comm): ";
338 const char suffix[] =
339 ". Please report this bug to the Tpetra developers.";
340
341 const bool debug = Details::Behavior::debug("Map");
342 const bool verbose = Details::Behavior::verbose("Map");
343 std::unique_ptr<std::string> prefix;
344 if (verbose) {
345 prefix = Details::createPrefix(
346 comm_.getRawPtr(), "Map", funcName);
347 std::ostringstream os;
348 os << *prefix << "Start" << endl;
349 std::cerr << os.str();
350 }
352
353 // Global sum of numLocalElements over all processes.
354 // Keep this for later debug checks.
355 GST debugGlobalSum {};
356 if (debug) {
357 debugGlobalSum = initialNonuniformDebugCheck(exPfx,
358 numGlobalElements, numLocalElements, indexBase, comm);
359 }
360
361 // Distribute the elements across the nodes so that they are
362 // - non-overlapping
363 // - contiguous
364
365 // This differs from the first Map constructor (that only takes a
366 // global number of elements) in that the user has specified the
367 // number of local elements, so that the elements are not
368 // (necessarily) evenly distributed over the processes.
369
370 // Compute my local offset. This is an inclusive scan, so to get
371 // the final offset, we subtract off the input.
372 GO scanResult = 0;
373 scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
374 const GO myOffset = scanResult - numLocalElements;
375
376 if (numGlobalElements != GSTI) {
377 numGlobalElements_ = numGlobalElements; // Use the user's value.
378 }
379 else {
380 // Inclusive scan means that the last process has the final sum.
381 // Rather than doing a reduceAll to get the sum of
382 // numLocalElements, we can just have the last process broadcast
383 // its result. That saves us a round of log(numProcs) messages.
384 const int numProcs = comm->getSize ();
385 GST globalSum = scanResult;
386 if (numProcs > 1) {
387 broadcast (*comm, numProcs - 1, outArg (globalSum));
388 }
389 numGlobalElements_ = globalSum;
390
391 if (debug) {
392 // No need for an all-reduce here; both come from collectives.
393 TEUCHOS_TEST_FOR_EXCEPTION
394 (globalSum != debugGlobalSum, std::logic_error, exPfx <<
395 "globalSum = " << globalSum << " != debugGlobalSum = " <<
396 debugGlobalSum << suffix);
397 }
398 }
399 numLocalElements_ = numLocalElements;
400 indexBase_ = indexBase;
401 minAllGID_ = (numGlobalElements_ == 0) ?
402 std::numeric_limits<GO>::max () :
403 indexBase;
404 maxAllGID_ = (numGlobalElements_ == 0) ?
405 std::numeric_limits<GO>::lowest () :
406 indexBase + GO(numGlobalElements_) - GO(1);
407 minMyGID_ = (numLocalElements_ == 0) ?
408 std::numeric_limits<GO>::max () :
409 indexBase + GO(myOffset);
410 maxMyGID_ = (numLocalElements_ == 0) ?
411 std::numeric_limits<GO>::lowest () :
412 indexBase + myOffset + GO(numLocalElements) - GO(1);
413 firstContiguousGID_ = minMyGID_;
414 lastContiguousGID_ = maxMyGID_;
415 contiguous_ = true;
416 distributed_ = checkIsDist ();
417
418 // Create the Directory on demand in getRemoteIndexList().
419 //setupDirectory ();
420
421 if (verbose) {
422 std::ostringstream os;
423 os << *prefix << "Done" << endl;
424 std::cerr << os.str();
425 }
426 }
427
428 template <class LocalOrdinal, class GlobalOrdinal, class Node>
430 Map<LocalOrdinal,GlobalOrdinal,Node>::
431 initialNonuniformDebugCheck(
432 const char errorMessagePrefix[],
433 const global_size_t numGlobalElements,
434 const size_t numLocalElements,
435 const global_ordinal_type indexBase,
436 const Teuchos::RCP<const Teuchos::Comm<int>>& comm) const
437 {
438 const bool debug = Details::Behavior::debug("Map");
439 if (! debug) {
440 return global_size_t(0);
441 }
442
443 using Teuchos::broadcast;
444 using Teuchos::outArg;
445 using Teuchos::ptr;
446 using Teuchos::REDUCE_MAX;
447 using Teuchos::REDUCE_MIN;
448 using Teuchos::REDUCE_SUM;
449 using Teuchos::reduceAll;
450 using GO = global_ordinal_type;
451 using GST = global_size_t;
452 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
453
454 // The user has specified the distribution of indices over the
455 // processes. The distribution is not necessarily contiguous or
456 // equally shared over the processes.
457 //
458 // We assume that the number of local elements can be stored in a
459 // size_t. The instance member numLocalElements_ is a size_t, so
460 // this variable and that should have the same type.
461
462 GST debugGlobalSum = 0; // Will be global sum of numLocalElements
463 reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
464 outArg (debugGlobalSum));
465 // In debug mode only, check whether numGlobalElements and
466 // indexBase are the same over all processes in the communicator.
467 {
468 GST proc0NumGlobalElements = numGlobalElements;
469 broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
470 GST minNumGlobalElements = numGlobalElements;
471 GST maxNumGlobalElements = numGlobalElements;
472 reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
473 outArg (minNumGlobalElements));
474 reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
475 outArg (maxNumGlobalElements));
476 TEUCHOS_TEST_FOR_EXCEPTION
477 (minNumGlobalElements != maxNumGlobalElements ||
478 numGlobalElements != minNumGlobalElements,
479 std::invalid_argument, errorMessagePrefix << "All processes "
480 "must provide the same number of global elements, even if "
481 "that argument is "
482 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
483 "(which signals that the Map should compute the global "
484 "number of elements). Process 0 set numGlobalElements"
485 "=" << proc0NumGlobalElements << ". The calling process "
486 << comm->getRank() << " set numGlobalElements=" <<
487 numGlobalElements << ". The min and max values over all "
488 "processes are " << minNumGlobalElements << " resp. " <<
489 maxNumGlobalElements << ".");
490
491 GO proc0IndexBase = indexBase;
492 broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
493 GO minIndexBase = indexBase;
494 GO maxIndexBase = indexBase;
495 reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
496 reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
497 TEUCHOS_TEST_FOR_EXCEPTION
498 (minIndexBase != maxIndexBase || indexBase != minIndexBase,
499 std::invalid_argument, errorMessagePrefix <<
500 "All processes must provide the same indexBase argument. "
501 "Process 0 set indexBase = " << proc0IndexBase << ". The "
502 "calling process " << comm->getRank() << " set indexBase="
503 << indexBase << ". The min and max values over all "
504 "processes are " << minIndexBase << " resp. " << maxIndexBase
505 << ".");
506
507 // Make sure that the sum of numLocalElements over all processes
508 // equals numGlobalElements.
509 TEUCHOS_TEST_FOR_EXCEPTION
510 (numGlobalElements != GSTI &&
511 debugGlobalSum != numGlobalElements, std::invalid_argument,
512 errorMessagePrefix << "The sum of each process' number of "
513 "indices over all processes, " << debugGlobalSum << ", != "
514 << "numGlobalElements=" << numGlobalElements << ". If you "
515 "would like this constructor to compute numGlobalElements "
516 "for you, you may set numGlobalElements="
517 "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() "
518 "on input. Please note that this is NOT necessarily -1.");
519 }
520 return debugGlobalSum;
521 }
522
523 template <class LocalOrdinal, class GlobalOrdinal, class Node>
524 void
525 Map<LocalOrdinal,GlobalOrdinal,Node>::
526 initWithNonownedHostIndexList(
527 const char errorMessagePrefix[],
528 const global_size_t numGlobalElements,
529 const Kokkos::View<const global_ordinal_type*,
530 Kokkos::LayoutLeft,
531 Kokkos::HostSpace,
532 Kokkos::MemoryUnmanaged>& entryList_host,
533 const global_ordinal_type indexBase,
534 const Teuchos::RCP<const Teuchos::Comm<int>>& comm)
535 {
536 using Kokkos::LayoutLeft;
537 using Kokkos::subview;
538 using Kokkos::View;
539 using Kokkos::view_alloc;
540 using Kokkos::WithoutInitializing;
541 using Teuchos::as;
542 using Teuchos::broadcast;
543 using Teuchos::outArg;
544 using Teuchos::ptr;
545 using Teuchos::REDUCE_MAX;
546 using Teuchos::REDUCE_MIN;
547 using Teuchos::REDUCE_SUM;
548 using Teuchos::reduceAll;
549 using LO = local_ordinal_type;
550 using GO = global_ordinal_type;
551 using GST = global_size_t;
552 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
553
554 // Make sure that Kokkos has been initialized (Github Issue #513).
555 TEUCHOS_TEST_FOR_EXCEPTION
556 (! Kokkos::is_initialized (), std::runtime_error,
557 errorMessagePrefix << "The Kokkos execution space "
558 << Teuchos::TypeNameTraits<execution_space>::name()
559 << " has not been initialized. "
560 "Please initialize it before creating a Map.")
561
562 // The user has specified the distribution of indices over the
563 // processes, via the input array of global indices on each
564 // process. The distribution is not necessarily contiguous or
565 // equally shared over the processes.
566
567 // The length of the input array on this process is the number of
568 // local indices to associate with this process, even though the
569 // input array contains global indices. We assume that the number
570 // of local indices on a process can be stored in a size_t;
571 // numLocalElements_ is a size_t, so this variable and that should
572 // have the same type.
573 const size_t numLocalElements(entryList_host.size());
574
575 initialNonuniformDebugCheck(errorMessagePrefix, numGlobalElements,
576 numLocalElements, indexBase, comm);
577
578 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
579 // reduction is redundant, since the directory Map will have to do
580 // the same thing. Thus, we could do the scan and broadcast for
581 // the directory Map here, and give the computed offsets to the
582 // directory Map's constructor. However, a reduction costs less
583 // than a scan and broadcast, so this still saves time if users of
584 // this Map don't ever need the Directory (i.e., if they never
585 // call getRemoteIndexList on this Map).
586 if (numGlobalElements != GSTI) {
587 numGlobalElements_ = numGlobalElements; // Use the user's value.
588 }
589 else { // The user wants us to compute the sum.
590 reduceAll(*comm, REDUCE_SUM,
591 static_cast<GST>(numLocalElements),
592 outArg(numGlobalElements_));
593 }
594
595 // mfh 20 Feb 2013: We've never quite done the right thing for
596 // duplicate GIDs here. Duplicate GIDs have always been counted
597 // distinctly in numLocalElements_, and thus should get a
598 // different LID. However, we've always used std::map or a hash
599 // table for the GID -> LID lookup table, so distinct GIDs always
600 // map to the same LID. Furthermore, the order of the input GID
601 // list matters, so it's not desirable to sort for determining
602 // uniqueness.
603 //
604 // I've chosen for now to write this code as if the input GID list
605 // contains no duplicates. If this is not desired, we could use
606 // the lookup table itself to determine uniqueness: If we haven't
607 // seen the GID before, it gets a new LID and it's added to the
608 // LID -> GID and GID -> LID tables. If we have seen the GID
609 // before, it doesn't get added to either table. I would
610 // implement this, but it would cost more to do the double lookups
611 // in the table (one to check, and one to insert).
612 //
613 // More importantly, since we build the GID -> LID table in (a
614 // thread-) parallel (way), the order in which duplicate GIDs may
615 // get inserted is not defined. This would make the assignment of
616 // LID to GID nondeterministic.
617
618 numLocalElements_ = numLocalElements;
619 indexBase_ = indexBase;
620
621 minMyGID_ = indexBase_;
622 maxMyGID_ = indexBase_;
623
624 // NOTE (mfh 27 May 2015): While finding the initial contiguous
625 // GID range requires looking at all the GIDs in the range,
626 // dismissing an interval of GIDs only requires looking at the
627 // first and last GIDs. Thus, we could do binary search backwards
628 // from the end in order to catch the common case of a contiguous
629 // interval followed by noncontiguous entries. On the other hand,
630 // we could just expose this case explicitly as yet another Map
631 // constructor, and avoid the trouble of detecting it.
632 if (numLocalElements_ > 0) {
633 // Find contiguous GID range, with the restriction that the
634 // beginning of the range starts with the first entry. While
635 // doing so, fill in the LID -> GID table.
636 typename decltype (lgMap_)::non_const_type lgMap
637 (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
638 auto lgMap_host =
639 Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
640
641 // The input array entryList_host is already on host, so we
642 // don't need to take a host view of it.
643 // auto entryList_host =
644 // Kokkos::create_mirror_view (Kokkos::HostSpace (), entryList);
645 // Kokkos::deep_copy (entryList_host, entryList);
646
647 firstContiguousGID_ = entryList_host[0];
648 lastContiguousGID_ = firstContiguousGID_+1;
649
650 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
651 // anyway, so we have to look at them all. The logical way to
652 // find the first noncontiguous entry would thus be to "reduce,"
653 // where the local reduction result is whether entryList[i] + 1
654 // == entryList[i+1].
655
656 lgMap_host[0] = firstContiguousGID_;
657 size_t i = 1;
658 for ( ; i < numLocalElements_; ++i) {
659 const GO curGid = entryList_host[i];
660 const LO curLid = as<LO> (i);
661
662 if (lastContiguousGID_ != curGid) break;
663
664 // Add the entry to the LID->GID table only after we know that
665 // the current GID is in the initial contiguous sequence, so
666 // that we don't repeat adding it in the first iteration of
667 // the loop below over the remaining noncontiguous GIDs.
668 lgMap_host[curLid] = curGid;
669 ++lastContiguousGID_;
670 }
671 --lastContiguousGID_;
672
673 // [firstContiguousGID_, lastContigousGID_] is the initial
674 // sequence of contiguous GIDs. We can start the min and max
675 // GID using this range.
676 minMyGID_ = firstContiguousGID_;
677 maxMyGID_ = lastContiguousGID_;
678
679 // Compute the GID -> LID lookup table, _not_ including the
680 // initial sequence of contiguous GIDs.
681 {
682 const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
683 auto nonContigGids_host = subview (entryList_host, ncRange);
684 TEUCHOS_TEST_FOR_EXCEPTION
685 (static_cast<size_t> (nonContigGids_host.extent (0)) !=
686 static_cast<size_t> (entryList_host.extent (0) - i),
687 std::logic_error, "Tpetra::Map noncontiguous constructor: "
688 "nonContigGids_host.extent(0) = "
689 << nonContigGids_host.extent (0)
690 << " != entryList_host.extent(0) - i = "
691 << (entryList_host.extent (0) - i) << " = "
692 << entryList_host.extent (0) << " - " << i
693 << ". Please report this bug to the Tpetra developers.");
694
695 // FixedHashTable's constructor expects an owned device View,
696 // so we must deep-copy the subview of the input indices.
697 View<GO*, LayoutLeft, device_type>
698 nonContigGids (view_alloc ("nonContigGids", WithoutInitializing),
699 nonContigGids_host.size ());
700 // DEEP_COPY REVIEW - HOST-TO-DEVICE
701 Kokkos::deep_copy (execution_space(), nonContigGids, nonContigGids_host);
702 Kokkos::fence(); // for UVM issues below - which will be refatored soon so FixedHashTable can build as pure CudaSpace - then I think remove this fence
703
704 glMap_ = global_to_local_table_type(nonContigGids,
705 firstContiguousGID_,
706 lastContiguousGID_,
707 static_cast<LO> (i));
708 // Make host version - when memory spaces match these just do trivial assignment
709 glMapHost_ = global_to_local_table_host_type(glMap_);
710 }
711
712 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
713 // table above, we have to look at all the (noncontiguous) input
714 // indices anyway. Thus, why not have the constructor compute
715 // and return the min and max?
716
717 for ( ; i < numLocalElements_; ++i) {
718 const GO curGid = entryList_host[i];
719 const LO curLid = static_cast<LO> (i);
720 lgMap_host[curLid] = curGid; // LID -> GID table
722 // While iterating through entryList, we compute its
723 // (process-local) min and max elements.
724 if (curGid < minMyGID_) {
725 minMyGID_ = curGid;
726 }
727 if (curGid > maxMyGID_) {
728 maxMyGID_ = curGid;
729 }
730 }
731
732 // We filled lgMap on host above; now sync back to device.
733 // DEEP_COPY REVIEW - HOST-TO-DEVICE
734 Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
735
736 // "Commit" the local-to-global lookup table we filled in above.
737 lgMap_ = lgMap;
738 // We've already created this, so use it.
739 lgMapHost_ = lgMap_host;
740 }
741 else {
742 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
743 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
744 // This insures tests for GIDs in the range
745 // [firstContiguousGID_, lastContiguousGID_] fail for processes
746 // with no local elements.
747 firstContiguousGID_ = indexBase_+1;
748 lastContiguousGID_ = indexBase_;
749 // glMap_ was default constructed, so it's already empty.
750 }
751
752 // Compute the min and max of all processes' GIDs. If
753 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
754 // are both indexBase_. This is wrong, but fixing it would
755 // require either a fancy sparse all-reduce, or a custom reduction
756 // operator that ignores invalid values ("invalid" means
757 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
758 //
759 // Also, while we're at it, use the same all-reduce to figure out
760 // if the Map is distributed. "Distributed" means that there is
761 // at least one process with a number of local elements less than
762 // the number of global elements.
763 //
764 // We're computing the min and max of all processes' GIDs using a
765 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
766 // and y are signed). (This lets us combine the min and max into
767 // a single all-reduce.) If each process sets localDist=1 if its
768 // number of local elements is strictly less than the number of
769 // global elements, and localDist=0 otherwise, then a MAX
770 // all-reduce on localDist tells us if the Map is distributed (1
771 // if yes, 0 if no). Thus, we can append localDist onto the end
772 // of the data and get the global result from the all-reduce.
773 if (std::numeric_limits<GO>::is_signed) {
774 // Does my process NOT own all the elements?
775 const GO localDist =
776 (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
777
778 GO minMaxInput[3];
779 minMaxInput[0] = -minMyGID_;
780 minMaxInput[1] = maxMyGID_;
781 minMaxInput[2] = localDist;
782
783 GO minMaxOutput[3];
784 minMaxOutput[0] = 0;
785 minMaxOutput[1] = 0;
786 minMaxOutput[2] = 0;
787 reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
788 minAllGID_ = -minMaxOutput[0];
789 maxAllGID_ = minMaxOutput[1];
790 const GO globalDist = minMaxOutput[2];
791 distributed_ = (comm_->getSize () > 1 && globalDist == 1);
792 }
793 else { // unsigned; use two reductions
794 // This is always correct, no matter the signedness of GO.
795 reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
796 reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
797 distributed_ = checkIsDist ();
798 }
799
800 contiguous_ = false; // "Contiguous" is conservative.
801
802 TEUCHOS_TEST_FOR_EXCEPTION(
803 minAllGID_ < indexBase_,
804 std::invalid_argument,
805 "Tpetra::Map constructor (noncontiguous): "
806 "Minimum global ID = " << minAllGID_ << " over all process(es) is "
807 "less than the given indexBase = " << indexBase_ << ".");
808
809 // Create the Directory on demand in getRemoteIndexList().
810 //setupDirectory ();
811 }
813 template <class LocalOrdinal, class GlobalOrdinal, class Node>
815 Map (const global_size_t numGlobalElements,
816 const GlobalOrdinal indexList[],
817 const LocalOrdinal indexListSize,
818 const GlobalOrdinal indexBase,
819 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
820 comm_ (comm),
821 uniform_ (false),
822 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
823 {
824 using std::endl;
825 const char funcName[] =
826 "Map(gblNumInds,indexList,indexListSize,indexBase,comm)";
827
828 const bool verbose = Details::Behavior::verbose("Map");
829 std::unique_ptr<std::string> prefix;
830 if (verbose) {
831 prefix = Details::createPrefix(
832 comm_.getRawPtr(), "Map", funcName);
833 std::ostringstream os;
834 os << *prefix << "Start" << endl;
835 std::cerr << os.str();
836 }
838 checkMapInputArray ("(GST, const GO[], LO, GO, comm)",
839 indexList, static_cast<size_t> (indexListSize),
840 comm.getRawPtr ());
841 // Not quite sure if I trust all code to behave correctly if the
842 // pointer is nonnull but the array length is nonzero, so I'll
843 // make sure the raw pointer is null if the length is zero.
844 const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
845 Kokkos::View<const GlobalOrdinal*,
846 Kokkos::LayoutLeft,
847 Kokkos::HostSpace,
848 Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
849 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
850 indexBase, comm);
851 if (verbose) {
852 std::ostringstream os;
853 os << *prefix << "Done" << endl;
854 std::cerr << os.str();
855 }
856 }
857
858 template <class LocalOrdinal, class GlobalOrdinal, class Node>
860 Map (const global_size_t numGlobalElements,
861 const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
862 const GlobalOrdinal indexBase,
863 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
864 comm_ (comm),
865 uniform_ (false),
866 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
867 {
868 using std::endl;
869 const char funcName[] =
870 "Map(gblNumInds,entryList(Teuchos::ArrayView),indexBase,comm)";
871
872 const bool verbose = Details::Behavior::verbose("Map");
873 std::unique_ptr<std::string> prefix;
874 if (verbose) {
875 prefix = Details::createPrefix(
876 comm_.getRawPtr(), "Map", funcName);
877 std::ostringstream os;
878 os << *prefix << "Start" << endl;
879 std::cerr << os.str();
880 }
882 const size_t numLclInds = static_cast<size_t> (entryList.size ());
883 checkMapInputArray ("(GST, ArrayView, GO, comm)",
884 entryList.getRawPtr (), numLclInds,
885 comm.getRawPtr ());
886 // Not quite sure if I trust both ArrayView and View to behave
887 // correctly if the pointer is nonnull but the array length is
888 // nonzero, so I'll make sure it's null if the length is zero.
889 const GlobalOrdinal* const indsRaw =
890 numLclInds == 0 ? NULL : entryList.getRawPtr ();
891 Kokkos::View<const GlobalOrdinal*,
892 Kokkos::LayoutLeft,
893 Kokkos::HostSpace,
894 Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
895 initWithNonownedHostIndexList(funcName, numGlobalElements, inds,
896 indexBase, comm);
897 if (verbose) {
898 std::ostringstream os;
899 os << *prefix << "Done" << endl;
900 std::cerr << os.str();
901 }
902 }
903
904 template <class LocalOrdinal, class GlobalOrdinal, class Node>
906 Map (const global_size_t numGlobalElements,
907 const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
908 const GlobalOrdinal indexBase,
909 const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
910 comm_ (comm),
911 uniform_ (false),
912 directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
913 {
914 using Kokkos::LayoutLeft;
915 using Kokkos::subview;
916 using Kokkos::View;
917 using Kokkos::view_alloc;
918 using Kokkos::WithoutInitializing;
919 using Teuchos::arcp;
920 using Teuchos::ArrayView;
921 using Teuchos::as;
922 using Teuchos::broadcast;
923 using Teuchos::outArg;
924 using Teuchos::ptr;
925 using Teuchos::REDUCE_MAX;
926 using Teuchos::REDUCE_MIN;
927 using Teuchos::REDUCE_SUM;
928 using Teuchos::reduceAll;
929 using Teuchos::typeName;
930 using std::endl;
931 using LO = local_ordinal_type;
932 using GO = global_ordinal_type;
933 using GST = global_size_t;
934 const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
935 const char funcName[] =
936 "Map(gblNumInds,entryList(Kokkos::View),indexBase,comm)";
937
938 const bool verbose = Details::Behavior::verbose("Map");
939 std::unique_ptr<std::string> prefix;
940 if (verbose) {
941 prefix = Details::createPrefix(
942 comm_.getRawPtr(), "Map", funcName);
943 std::ostringstream os;
944 os << *prefix << "Start" << endl;
945 std::cerr << os.str();
946 }
948 checkMapInputArray ("(GST, Kokkos::View, GO, comm)",
949 entryList.data (),
950 static_cast<size_t> (entryList.extent (0)),
951 comm.getRawPtr ());
952
953 // The user has specified the distribution of indices over the
954 // processes, via the input array of global indices on each
955 // process. The distribution is not necessarily contiguous or
956 // equally shared over the processes.
957
958 // The length of the input array on this process is the number of
959 // local indices to associate with this process, even though the
960 // input array contains global indices. We assume that the number
961 // of local indices on a process can be stored in a size_t;
962 // numLocalElements_ is a size_t, so this variable and that should
963 // have the same type.
964 const size_t numLocalElements(entryList.size());
965
966 initialNonuniformDebugCheck(funcName, numGlobalElements,
967 numLocalElements, indexBase, comm);
968
969 // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
970 // reduction is redundant, since the directory Map will have to do
971 // the same thing. Thus, we could do the scan and broadcast for
972 // the directory Map here, and give the computed offsets to the
973 // directory Map's constructor. However, a reduction costs less
974 // than a scan and broadcast, so this still saves time if users of
975 // this Map don't ever need the Directory (i.e., if they never
976 // call getRemoteIndexList on this Map).
977 if (numGlobalElements != GSTI) {
978 numGlobalElements_ = numGlobalElements; // Use the user's value.
979 }
980 else { // The user wants us to compute the sum.
981 reduceAll(*comm, REDUCE_SUM,
982 static_cast<GST>(numLocalElements),
983 outArg(numGlobalElements_));
984 }
985
986 // mfh 20 Feb 2013: We've never quite done the right thing for
987 // duplicate GIDs here. Duplicate GIDs have always been counted
988 // distinctly in numLocalElements_, and thus should get a
989 // different LID. However, we've always used std::map or a hash
990 // table for the GID -> LID lookup table, so distinct GIDs always
991 // map to the same LID. Furthermore, the order of the input GID
992 // list matters, so it's not desirable to sort for determining
993 // uniqueness.
994 //
995 // I've chosen for now to write this code as if the input GID list
996 // contains no duplicates. If this is not desired, we could use
997 // the lookup table itself to determine uniqueness: If we haven't
998 // seen the GID before, it gets a new LID and it's added to the
999 // LID -> GID and GID -> LID tables. If we have seen the GID
1000 // before, it doesn't get added to either table. I would
1001 // implement this, but it would cost more to do the double lookups
1002 // in the table (one to check, and one to insert).
1003 //
1004 // More importantly, since we build the GID -> LID table in (a
1005 // thread-) parallel (way), the order in which duplicate GIDs may
1006 // get inserted is not defined. This would make the assignment of
1007 // LID to GID nondeterministic.
1008
1009 numLocalElements_ = numLocalElements;
1010 indexBase_ = indexBase;
1011
1012 minMyGID_ = indexBase_;
1013 maxMyGID_ = indexBase_;
1014
1015 // NOTE (mfh 27 May 2015): While finding the initial contiguous
1016 // GID range requires looking at all the GIDs in the range,
1017 // dismissing an interval of GIDs only requires looking at the
1018 // first and last GIDs. Thus, we could do binary search backwards
1019 // from the end in order to catch the common case of a contiguous
1020 // interval followed by noncontiguous entries. On the other hand,
1021 // we could just expose this case explicitly as yet another Map
1022 // constructor, and avoid the trouble of detecting it.
1023 if (numLocalElements_ > 0) {
1024 // Find contiguous GID range, with the restriction that the
1025 // beginning of the range starts with the first entry. While
1026 // doing so, fill in the LID -> GID table.
1027 typename decltype (lgMap_)::non_const_type lgMap
1028 (view_alloc ("lgMap", WithoutInitializing), numLocalElements_);
1029 auto lgMap_host =
1030 Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1031
1032 using array_layout =
1033 typename View<const GO*, device_type>::array_layout;
1034 View<GO*, array_layout, Kokkos::HostSpace> entryList_host
1035 (view_alloc ("entryList_host", WithoutInitializing),
1036 entryList.extent(0));
1037 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1038 Kokkos::deep_copy (execution_space(), entryList_host, entryList);
1039 Kokkos::fence(); // UVM follows
1040 firstContiguousGID_ = entryList_host[0];
1041 lastContiguousGID_ = firstContiguousGID_+1;
1042
1043 // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
1044 // anyway, so we have to look at them all. The logical way to
1045 // find the first noncontiguous entry would thus be to "reduce,"
1046 // where the local reduction result is whether entryList[i] + 1
1047 // == entryList[i+1].
1048
1049 lgMap_host[0] = firstContiguousGID_;
1050 size_t i = 1;
1051 for ( ; i < numLocalElements_; ++i) {
1052 const GO curGid = entryList_host[i];
1053 const LO curLid = as<LO> (i);
1054
1055 if (lastContiguousGID_ != curGid) break;
1056
1057 // Add the entry to the LID->GID table only after we know that
1058 // the current GID is in the initial contiguous sequence, so
1059 // that we don't repeat adding it in the first iteration of
1060 // the loop below over the remaining noncontiguous GIDs.
1061 lgMap_host[curLid] = curGid;
1062 ++lastContiguousGID_;
1063 }
1064 --lastContiguousGID_;
1065
1066 // [firstContiguousGID_, lastContigousGID_] is the initial
1067 // sequence of contiguous GIDs. We can start the min and max
1068 // GID using this range.
1069 minMyGID_ = firstContiguousGID_;
1070 maxMyGID_ = lastContiguousGID_;
1071
1072 // Compute the GID -> LID lookup table, _not_ including the
1073 // initial sequence of contiguous GIDs.
1074 {
1075 const std::pair<size_t, size_t> ncRange (i, entryList.extent (0));
1076 auto nonContigGids = subview (entryList, ncRange);
1077 TEUCHOS_TEST_FOR_EXCEPTION
1078 (static_cast<size_t> (nonContigGids.extent (0)) !=
1079 static_cast<size_t> (entryList.extent (0) - i),
1080 std::logic_error, "Tpetra::Map noncontiguous constructor: "
1081 "nonContigGids.extent(0) = "
1082 << nonContigGids.extent (0)
1083 << " != entryList.extent(0) - i = "
1084 << (entryList.extent (0) - i) << " = "
1085 << entryList.extent (0) << " - " << i
1086 << ". Please report this bug to the Tpetra developers.");
1087
1088 glMap_ = global_to_local_table_type(nonContigGids,
1089 firstContiguousGID_,
1090 lastContiguousGID_,
1091 static_cast<LO> (i));
1092 // Make host version - when memory spaces match these just do trivial assignment
1093 glMapHost_ = global_to_local_table_host_type(glMap_);
1094 }
1095
1096 // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
1097 // table above, we have to look at all the (noncontiguous) input
1098 // indices anyway. Thus, why not have the constructor compute
1099 // and return the min and max?
1100
1101 for ( ; i < numLocalElements_; ++i) {
1102 const GO curGid = entryList_host[i];
1103 const LO curLid = static_cast<LO> (i);
1104 lgMap_host[curLid] = curGid; // LID -> GID table
1105
1106 // While iterating through entryList, we compute its
1107 // (process-local) min and max elements.
1108 if (curGid < minMyGID_) {
1109 minMyGID_ = curGid;
1110 }
1111 if (curGid > maxMyGID_) {
1112 maxMyGID_ = curGid;
1113 }
1114 }
1115
1116 // We filled lgMap on host above; now sync back to device.
1117 // DEEP_COPY REVIEW - HOST-TO-DEVICE
1118 Kokkos::deep_copy (execution_space(), lgMap, lgMap_host);
1119
1120 // "Commit" the local-to-global lookup table we filled in above.
1121 lgMap_ = lgMap;
1122 // We've already created this, so use it.
1123 lgMapHost_ = lgMap_host;
1124 }
1125 else {
1126 minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
1127 maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
1128 // This insures tests for GIDs in the range
1129 // [firstContiguousGID_, lastContiguousGID_] fail for processes
1130 // with no local elements.
1131 firstContiguousGID_ = indexBase_+1;
1132 lastContiguousGID_ = indexBase_;
1133 // glMap_ was default constructed, so it's already empty.
1134 }
1135
1136 // Compute the min and max of all processes' GIDs. If
1137 // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
1138 // are both indexBase_. This is wrong, but fixing it would
1139 // require either a fancy sparse all-reduce, or a custom reduction
1140 // operator that ignores invalid values ("invalid" means
1141 // Tpetra::Details::OrdinalTraits<GO>::invalid()).
1142 //
1143 // Also, while we're at it, use the same all-reduce to figure out
1144 // if the Map is distributed. "Distributed" means that there is
1145 // at least one process with a number of local elements less than
1146 // the number of global elements.
1147 //
1148 // We're computing the min and max of all processes' GIDs using a
1149 // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
1150 // and y are signed). (This lets us combine the min and max into
1151 // a single all-reduce.) If each process sets localDist=1 if its
1152 // number of local elements is strictly less than the number of
1153 // global elements, and localDist=0 otherwise, then a MAX
1154 // all-reduce on localDist tells us if the Map is distributed (1
1155 // if yes, 0 if no). Thus, we can append localDist onto the end
1156 // of the data and get the global result from the all-reduce.
1157 if (std::numeric_limits<GO>::is_signed) {
1158 // Does my process NOT own all the elements?
1159 const GO localDist =
1160 (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
1161
1162 GO minMaxInput[3];
1163 minMaxInput[0] = -minMyGID_;
1164 minMaxInput[1] = maxMyGID_;
1165 minMaxInput[2] = localDist;
1166
1167 GO minMaxOutput[3];
1168 minMaxOutput[0] = 0;
1169 minMaxOutput[1] = 0;
1170 minMaxOutput[2] = 0;
1171 reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
1172 minAllGID_ = -minMaxOutput[0];
1173 maxAllGID_ = minMaxOutput[1];
1174 const GO globalDist = minMaxOutput[2];
1175 distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1176 }
1177 else { // unsigned; use two reductions
1178 // This is always correct, no matter the signedness of GO.
1179 reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1180 reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1181 distributed_ = checkIsDist ();
1182 }
1183
1184 contiguous_ = false; // "Contiguous" is conservative.
1185
1186 TEUCHOS_TEST_FOR_EXCEPTION(
1187 minAllGID_ < indexBase_,
1188 std::invalid_argument,
1189 "Tpetra::Map constructor (noncontiguous): "
1190 "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1191 "less than the given indexBase = " << indexBase_ << ".");
1192
1193 // Create the Directory on demand in getRemoteIndexList().
1194 //setupDirectory ();
1195
1196 if (verbose) {
1197 std::ostringstream os;
1198 os << *prefix << "Done" << endl;
1199 std::cerr << os.str();
1200 }
1201 }
1202
1203
1204 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1206 {
1207 if (! Kokkos::is_initialized ()) {
1208 std::ostringstream os;
1209 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1210 "Kokkos::finalize() has been called. This is user error! There are "
1211 "two likely causes: " << std::endl <<
1212 " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1213 << std::endl <<
1214 " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1215 "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1216 "or Tpetra::finalize()." << std::endl << std::endl <<
1217 "Don't do either of these! Please refer to GitHib Issue #2372."
1218 << std::endl;
1219 ::Tpetra::Details::printOnce (std::cerr, os.str (),
1220 this->getComm ().getRawPtr ());
1221 }
1222 else {
1223 using ::Tpetra::Details::mpiIsInitialized;
1224 using ::Tpetra::Details::mpiIsFinalized;
1225 using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1226
1227 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1228 if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1229 mpiIsInitialized () && mpiIsFinalized ()) {
1230 // Tpetra itself does not require MPI, even if building with
1231 // MPI. It is legal to create Tpetra objects that do not use
1232 // MPI, even in an MPI program. However, calling Tpetra stuff
1233 // after MPI_Finalize() has been called is a bad idea, since
1234 // some Tpetra defaults may use MPI if available.
1235 std::ostringstream os;
1236 os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1237 "MPI_Finalize() has been called. This is user error! There are "
1238 "two likely causes: " << std::endl <<
1239 " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1240 << std::endl <<
1241 " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1242 "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1243 "Tpetra::finalize()." << std::endl << std::endl <<
1244 "Don't do either of these! Please refer to GitHib Issue #2372."
1245 << std::endl;
1246 ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1247 }
1248 }
1249 // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1250 // because Tpetra does not yet require Tpetra::initialize /
1251 // Tpetra::finalize.
1252 }
1253
1254 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1255 bool
1257 {
1258 TEUCHOS_TEST_FOR_EXCEPTION(
1259 getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1260 "getComm() returns null. Please report this bug to the Tpetra "
1261 "developers.");
1262
1263 // This is a collective operation, if it hasn't been called before.
1264 setupDirectory ();
1265 return directory_->isOneToOne (*this);
1266 }
1267
1268 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1269 LocalOrdinal
1271 getLocalElement (GlobalOrdinal globalIndex) const
1272 {
1273 if (isContiguous ()) {
1274 if (globalIndex < getMinGlobalIndex () ||
1275 globalIndex > getMaxGlobalIndex ()) {
1276 return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1277 }
1278 return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1279 }
1280 else if (globalIndex >= firstContiguousGID_ &&
1281 globalIndex <= lastContiguousGID_) {
1282 return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1283 }
1284 else {
1285 // If the given global index is not in the table, this returns
1286 // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1287 // glMapHost_ is Host and does not assume UVM
1288 return glMapHost_.get (globalIndex);
1289 }
1290 }
1291
1292 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1293 GlobalOrdinal
1295 getGlobalElement (LocalOrdinal localIndex) const
1296 {
1297 if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1298 return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1299 }
1300 if (isContiguous ()) {
1301 return getMinGlobalIndex () + localIndex;
1302 }
1303 else {
1304 // This is a host Kokkos::View access, with no RCP or ArrayRCP
1305 // involvement. As a result, it is thread safe.
1306 //
1307 // lgMapHost_ is a host pointer; this does NOT assume UVM.
1308 return lgMapHost_[localIndex];
1309 }
1310 }
1311
1312 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1313 bool
1315 isNodeLocalElement (LocalOrdinal localIndex) const
1316 {
1317 if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1318 return false;
1319 } else {
1320 return true;
1321 }
1322 }
1323
1324 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1325 bool
1327 isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1328 return this->getLocalElement (globalIndex) !=
1329 Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1330 }
1331
1332 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1334 return uniform_;
1335 }
1336
1337 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1339 return contiguous_;
1340 }
1341
1342
1343 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1346 getLocalMap () const
1347 {
1348 return local_map_type (glMap_, lgMap_, getIndexBase (),
1349 getMinGlobalIndex (), getMaxGlobalIndex (),
1350 firstContiguousGID_, lastContiguousGID_,
1351 getLocalNumElements (), isContiguous ());
1352 }
1353
1354 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1355 bool
1358 {
1359 using Teuchos::outArg;
1360 using Teuchos::REDUCE_MIN;
1361 using Teuchos::reduceAll;
1362 //
1363 // Tests that avoid the Boolean all-reduce below by using
1364 // globally consistent quantities.
1365 //
1366 if (this == &map) {
1367 // Pointer equality on one process always implies pointer
1368 // equality on all processes, since Map is immutable.
1369 return true;
1370 }
1371 else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1372 // The two communicators have different numbers of processes.
1373 // It's not correct to call isCompatible() in that case. This
1374 // may result in the all-reduce hanging below.
1375 return false;
1376 }
1377 else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1378 // Two Maps are definitely NOT compatible if they have different
1379 // global numbers of indices.
1380 return false;
1381 }
1382 else if (isContiguous () && isUniform () &&
1383 map.isContiguous () && map.isUniform ()) {
1384 // Contiguous uniform Maps with the same number of processes in
1385 // their communicators, and with the same global numbers of
1386 // indices, are always compatible.
1387 return true;
1388 }
1389 else if (! isContiguous () && ! map.isContiguous () &&
1390 lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1391 lgMap_.data () == map.lgMap_.data ()) {
1392 // Noncontiguous Maps whose global index lists are nonempty and
1393 // have the same pointer must be the same (and therefore
1394 // contiguous).
1395 //
1396 // Nonempty is important. For example, consider a communicator
1397 // with two processes, and two Maps that share this
1398 // communicator, with zero global indices on the first process,
1399 // and different nonzero numbers of global indices on the second
1400 // process. In that case, on the first process, the pointers
1401 // would both be NULL.
1402 return true;
1403 }
1404
1405 TEUCHOS_TEST_FOR_EXCEPTION(
1406 getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1407 "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1408 "checked that this condition is true above, but it's false here. "
1409 "Please report this bug to the Tpetra developers.");
1410
1411 // Do both Maps have the same number of indices on each process?
1412 const int locallyCompat =
1413 (getLocalNumElements () == map.getLocalNumElements ()) ? 1 : 0;
1414
1415 int globallyCompat = 0;
1416 reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1417 return (globallyCompat == 1);
1418 }
1419
1420 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1421 bool
1424 {
1425 using Teuchos::ArrayView;
1426 using GO = global_ordinal_type;
1427 using size_type = typename ArrayView<const GO>::size_type;
1428
1429 // If both Maps are contiguous, we can compare their GID ranges
1430 // easily by looking at the min and max GID on this process.
1431 // Otherwise, we'll compare their GID lists. If only one Map is
1432 // contiguous, then we only have to call getLocalElementList() on
1433 // the noncontiguous Map. (It's best to avoid calling it on a
1434 // contiguous Map, since it results in unnecessary storage that
1435 // persists for the lifetime of the Map.)
1436
1437 if (this == &map) {
1438 // Pointer equality on one process always implies pointer
1439 // equality on all processes, since Map is immutable.
1440 return true;
1441 }
1442 else if (getLocalNumElements () != map.getLocalNumElements ()) {
1443 return false;
1444 }
1445 else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1446 getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1447 return false;
1448 }
1449 else {
1450 if (isContiguous ()) {
1451 if (map.isContiguous ()) {
1452 return true; // min and max match, so the ranges match.
1453 }
1454 else { // *this is contiguous, but map is not contiguous
1455 TEUCHOS_TEST_FOR_EXCEPTION(
1456 ! this->isContiguous () || map.isContiguous (), std::logic_error,
1457 "Tpetra::Map::locallySameAs: BUG");
1458 ArrayView<const GO> rhsElts = map.getLocalElementList ();
1459 const GO minLhsGid = this->getMinGlobalIndex ();
1460 const size_type numRhsElts = rhsElts.size ();
1461 for (size_type k = 0; k < numRhsElts; ++k) {
1462 const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1463 if (curLhsGid != rhsElts[k]) {
1464 return false; // stop on first mismatch
1465 }
1466 }
1467 return true;
1468 }
1469 }
1470 else if (map.isContiguous ()) { // *this is not contiguous, but map is
1471 TEUCHOS_TEST_FOR_EXCEPTION(
1472 this->isContiguous () || ! map.isContiguous (), std::logic_error,
1473 "Tpetra::Map::locallySameAs: BUG");
1474 ArrayView<const GO> lhsElts = this->getLocalElementList ();
1475 const GO minRhsGid = map.getMinGlobalIndex ();
1476 const size_type numLhsElts = lhsElts.size ();
1477 for (size_type k = 0; k < numLhsElts; ++k) {
1478 const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1479 if (curRhsGid != lhsElts[k]) {
1480 return false; // stop on first mismatch
1481 }
1482 }
1483 return true;
1484 }
1485 else if (this->lgMap_.data () == map.lgMap_.data ()) {
1486 // Pointers to LID->GID "map" (actually just an array) are the
1487 // same, and the number of GIDs are the same.
1488 return this->getLocalNumElements () == map.getLocalNumElements ();
1489 }
1490 else { // we actually have to compare the GIDs
1491 if (this->getLocalNumElements () != map.getLocalNumElements ()) {
1492 return false; // We already checked above, but check just in case
1493 }
1494 else {
1495 ArrayView<const GO> lhsElts = getLocalElementList ();
1496 ArrayView<const GO> rhsElts = map.getLocalElementList ();
1497
1498 // std::equal requires that the latter range is as large as
1499 // the former. We know the ranges have equal length, because
1500 // they have the same number of local entries.
1501 return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1502 }
1503 }
1504 }
1505 }
1506
1507 template <class LocalOrdinal,class GlobalOrdinal, class Node>
1508 bool
1511 {
1512 if (this == &map)
1513 return true;
1514
1515 // We are going to check if lmap1 is fitted into lmap2:
1516 // Is lmap1 (map) a subset of lmap2 (this)?
1517 // And do the first lmap1.getLocalNumElements() global elements
1518 // of lmap1,lmap2 owned on each process exactly match?
1519 auto lmap1 = map.getLocalMap();
1520 auto lmap2 = this->getLocalMap();
1521
1522 auto numLocalElements1 = lmap1.getLocalNumElements();
1523 auto numLocalElements2 = lmap2.getLocalNumElements();
1524
1525 if (numLocalElements1 > numLocalElements2) {
1526 // There are more indices in the first map on this process than in second map.
1527 return false;
1528 }
1529
1530 if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1531 // When both Maps are contiguous, just check the interval inclusion.
1532 return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1533 (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1534 }
1535
1536 if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1537 lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1538 // The second map does not include the first map bounds, and thus some of
1539 // the first map global indices are not in the second map.
1540 return false;
1541 }
1542
1543 using LO = local_ordinal_type;
1544 using range_type =
1545 Kokkos::RangePolicy<LO, typename node_type::execution_space>;
1546
1547 // Check all elements.
1548 LO numDiff = 0;
1549 Kokkos::parallel_reduce(
1550 "isLocallyFitted",
1551 range_type(0, numLocalElements1),
1552 KOKKOS_LAMBDA (const LO i, LO& diff) {
1553 diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1554 }, numDiff);
1555
1556 return (numDiff == 0);
1557 }
1558
1559 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1560 bool
1563 {
1564 using Teuchos::outArg;
1565 using Teuchos::REDUCE_MIN;
1566 using Teuchos::reduceAll;
1567 //
1568 // Tests that avoid the Boolean all-reduce below by using
1569 // globally consistent quantities.
1570 //
1571 if (this == &map) {
1572 // Pointer equality on one process always implies pointer
1573 // equality on all processes, since Map is immutable.
1574 return true;
1575 }
1576 else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1577 // The two communicators have different numbers of processes.
1578 // It's not correct to call isSameAs() in that case. This
1579 // may result in the all-reduce hanging below.
1580 return false;
1581 }
1582 else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1583 // Two Maps are definitely NOT the same if they have different
1584 // global numbers of indices.
1585 return false;
1586 }
1587 else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1588 getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1589 getIndexBase () != map.getIndexBase ()) {
1590 // If the global min or max global index doesn't match, or if
1591 // the index base doesn't match, then the Maps aren't the same.
1592 return false;
1593 }
1594 else if (isDistributed () != map.isDistributed ()) {
1595 // One Map is distributed and the other is not, which means that
1596 // the Maps aren't the same.
1597 return false;
1598 }
1599 else if (isContiguous () && isUniform () &&
1600 map.isContiguous () && map.isUniform ()) {
1601 // Contiguous uniform Maps with the same number of processes in
1602 // their communicators, with the same global numbers of indices,
1603 // and with matching index bases and ranges, must be the same.
1604 return true;
1605 }
1606
1607 // The two communicators must have the same number of processes,
1608 // with process ranks occurring in the same order. This uses
1609 // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1610 // "Operations that access communicators are local and their
1611 // execution does not require interprocess communication."
1612 // However, just to be sure, I'll put this call after the above
1613 // tests that don't communicate.
1614 if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1615 return false;
1616 }
1617
1618 // If we get this far, we need to check local properties and then
1619 // communicate local sameness across all processes.
1620 const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1621
1622 // Return true if and only if all processes report local sameness.
1623 int isSame_gbl = 0;
1624 reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1625 return isSame_gbl == 1;
1626 }
1627
1628 namespace { // (anonymous)
1629 template <class LO, class GO, class DT>
1630 class FillLgMap {
1631 public:
1632 FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1633 const GO startGid) :
1634 lgMap_ (lgMap), startGid_ (startGid)
1635 {
1636 Kokkos::RangePolicy<LO, typename DT::execution_space>
1637 range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1638 Kokkos::parallel_for (range, *this);
1639 }
1640
1641 KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1642 lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1643 }
1644
1645 private:
1646 const Kokkos::View<GO*, DT> lgMap_;
1647 const GO startGid_;
1648 };
1649
1650 } // namespace (anonymous)
1651
1652
1653 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1654 typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1656 {
1657 using std::endl;
1658 using LO = local_ordinal_type;
1659 using GO = global_ordinal_type;
1660 using const_lg_view_type = decltype(lgMap_);
1661 using lg_view_type = typename const_lg_view_type::non_const_type;
1662 const bool debug = Details::Behavior::debug("Map");
1663 const bool verbose = Details::Behavior::verbose("Map");
1664
1665 std::unique_ptr<std::string> prefix;
1666 if (verbose) {
1667 prefix = Details::createPrefix(
1668 comm_.getRawPtr(), "Map", "getMyGlobalIndices");
1669 std::ostringstream os;
1670 os << *prefix << "Start" << endl;
1671 std::cerr << os.str();
1672 }
1673
1674 // If the local-to-global mapping doesn't exist yet, and if we
1675 // have local entries, then create and fill the local-to-global
1676 // mapping.
1677 const bool needToCreateLocalToGlobalMapping =
1678 lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1679
1680 if (needToCreateLocalToGlobalMapping) {
1681 if (verbose) {
1682 std::ostringstream os;
1683 os << *prefix << "Need to create lgMap" << endl;
1684 std::cerr << os.str();
1685 }
1686 if (debug) {
1687 // The local-to-global mapping should have been set up already
1688 // for a noncontiguous map.
1689 TEUCHOS_TEST_FOR_EXCEPTION
1690 (! isContiguous(), std::logic_error,
1691 "Tpetra::Map::getMyGlobalIndices: The local-to-global "
1692 "mapping (lgMap_) should have been set up already for a "
1693 "noncontiguous Map. Please report this bug to the Tpetra "
1694 "developers.");
1695 }
1696 const LO numElts = static_cast<LO> (getLocalNumElements ());
1697
1698 using Kokkos::view_alloc;
1699 using Kokkos::WithoutInitializing;
1700 lg_view_type lgMap ("lgMap", numElts);
1701 if (verbose) {
1702 std::ostringstream os;
1703 os << *prefix << "Fill lgMap" << endl;
1704 std::cerr << os.str();
1705 }
1706 FillLgMap<LO, GO, no_uvm_device_type> fillIt (lgMap, minMyGID_);
1707
1708 if (verbose) {
1709 std::ostringstream os;
1710 os << *prefix << "Copy lgMap to lgMapHost" << endl;
1711 std::cerr << os.str();
1712 }
1713
1714 auto lgMapHost =
1715 Kokkos::create_mirror_view (Kokkos::HostSpace (), lgMap);
1716 // DEEP_COPY REVIEW - DEVICE-TO-HOST
1717 Kokkos::deep_copy (execution_space(), lgMapHost, lgMap);
1718
1719 // "Commit" the local-to-global lookup table we filled in above.
1720 lgMap_ = lgMap;
1721 lgMapHost_ = lgMapHost;
1722 }
1723
1724 if (verbose) {
1725 std::ostringstream os;
1726 os << *prefix << "Done" << endl;
1727 std::cerr << os.str();
1728 }
1729 return lgMapHost_;
1730 }
1731
1732 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1733 Teuchos::ArrayView<const GlobalOrdinal>
1735 {
1736 using GO = global_ordinal_type;
1737
1738 // If the local-to-global mapping doesn't exist yet, and if we
1739 // have local entries, then create and fill the local-to-global
1740 // mapping.
1741 (void) this->getMyGlobalIndices ();
1742
1743 // This does NOT assume UVM; lgMapHost_ is a host pointer.
1744 const GO* lgMapHostRawPtr = lgMapHost_.data ();
1745 // The third argument forces ArrayView not to try to track memory
1746 // in a debug build. We have to use it because the memory does
1747 // not belong to a Teuchos memory management class.
1748 return Teuchos::ArrayView<const GO>(
1749 lgMapHostRawPtr,
1750 lgMapHost_.extent (0),
1751 Teuchos::RCP_DISABLE_NODE_LOOKUP);
1752 }
1753
1754 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1756 return distributed_;
1757 }
1758
1759 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1761 using Teuchos::TypeNameTraits;
1762 std::ostringstream os;
1763
1764 os << "Tpetra::Map: {"
1765 << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1766 << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1767 << ", NodeType: " << TypeNameTraits<Node>::name ();
1768 if (this->getObjectLabel () != "") {
1769 os << ", Label: \"" << this->getObjectLabel () << "\"";
1770 }
1771 os << ", Global number of entries: " << getGlobalNumElements ()
1772 << ", Number of processes: " << getComm ()->getSize ()
1773 << ", Uniform: " << (isUniform () ? "true" : "false")
1774 << ", Contiguous: " << (isContiguous () ? "true" : "false")
1775 << ", Distributed: " << (isDistributed () ? "true" : "false")
1776 << "}";
1777 return os.str ();
1778 }
1779
1784 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1785 std::string
1787 localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1788 {
1789 using LO = local_ordinal_type;
1790 using std::endl;
1791
1792 // This preserves current behavior of Map.
1793 if (vl < Teuchos::VERB_HIGH) {
1794 return std::string ();
1795 }
1796 auto outStringP = Teuchos::rcp (new std::ostringstream ());
1797 Teuchos::RCP<Teuchos::FancyOStream> outp =
1798 Teuchos::getFancyOStream (outStringP);
1799 Teuchos::FancyOStream& out = *outp;
1800
1801 auto comm = this->getComm ();
1802 const int myRank = comm->getRank ();
1803 const int numProcs = comm->getSize ();
1804 out << "Process " << myRank << " of " << numProcs << ":" << endl;
1805 Teuchos::OSTab tab1 (out);
1806
1807 const LO numEnt = static_cast<LO> (this->getLocalNumElements ());
1808 out << "My number of entries: " << numEnt << endl
1809 << "My minimum global index: " << this->getMinGlobalIndex () << endl
1810 << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1811
1812 if (vl == Teuchos::VERB_EXTREME) {
1813 out << "My global indices: [";
1814 const LO minLclInd = this->getMinLocalIndex ();
1815 for (LO k = 0; k < numEnt; ++k) {
1816 out << minLclInd + this->getGlobalElement (k);
1817 if (k + 1 < numEnt) {
1818 out << ", ";
1819 }
1820 }
1821 out << "]" << endl;
1822 }
1823
1824 out.flush (); // make sure the ostringstream got everything
1825 return outStringP->str ();
1826 }
1827
1828 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1829 void
1831 describe (Teuchos::FancyOStream &out,
1832 const Teuchos::EVerbosityLevel verbLevel) const
1833 {
1834 using Teuchos::TypeNameTraits;
1835 using Teuchos::VERB_DEFAULT;
1836 using Teuchos::VERB_NONE;
1837 using Teuchos::VERB_LOW;
1838 using Teuchos::VERB_HIGH;
1839 using std::endl;
1840 using LO = local_ordinal_type;
1841 using GO = global_ordinal_type;
1842 const Teuchos::EVerbosityLevel vl =
1843 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1844
1845 if (vl == VERB_NONE) {
1846 return; // don't print anything
1847 }
1848 // If this Map's Comm is null, then the Map does not participate
1849 // in collective operations with the other processes. In that
1850 // case, it is not even legal to call this method. The reasonable
1851 // thing to do in that case is nothing.
1852 auto comm = this->getComm ();
1853 if (comm.is_null ()) {
1854 return;
1855 }
1856 const int myRank = comm->getRank ();
1857 const int numProcs = comm->getSize ();
1858
1859 // Only Process 0 should touch the output stream, but this method
1860 // in general may need to do communication. Thus, we may need to
1861 // preserve the current tab level across multiple "if (myRank ==
1862 // 0) { ... }" inner scopes. This is why we sometimes create
1863 // OSTab instances by pointer, instead of by value. We only need
1864 // to create them by pointer if the tab level must persist through
1865 // multiple inner scopes.
1866 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1867
1868 if (myRank == 0) {
1869 // At every verbosity level but VERB_NONE, Process 0 prints.
1870 // By convention, describe() always begins with a tab before
1871 // printing.
1872 tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1873 out << "\"Tpetra::Map\":" << endl;
1874 tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1875 {
1876 out << "Template parameters:" << endl;
1877 Teuchos::OSTab tab2 (out);
1878 out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1879 << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1880 << "Node: " << TypeNameTraits<Node>::name () << endl;
1881 }
1882 const std::string label = this->getObjectLabel ();
1883 if (label != "") {
1884 out << "Label: \"" << label << "\"" << endl;
1885 }
1886 out << "Global number of entries: " << getGlobalNumElements () << endl
1887 << "Minimum global index: " << getMinAllGlobalIndex () << endl
1888 << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1889 << "Index base: " << getIndexBase () << endl
1890 << "Number of processes: " << numProcs << endl
1891 << "Uniform: " << (isUniform () ? "true" : "false") << endl
1892 << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1893 << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1894 }
1895
1896 // This is collective over the Map's communicator.
1897 if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1898 const std::string lclStr = this->localDescribeToString (vl);
1899 Tpetra::Details::gathervPrint (out, lclStr, *comm);
1900 }
1901 }
1902
1903 template <class LocalOrdinal, class GlobalOrdinal, class Node>
1904 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1906 replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1907 {
1908 using Teuchos::RCP;
1909 using Teuchos::rcp;
1910 using GST = global_size_t;
1911 using LO = local_ordinal_type;
1912 using GO = global_ordinal_type;
1913 using map_type = Map<LO, GO, Node>;
1914
1915 // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1916 // the Map by calling its ordinary public constructor, using the
1917 // original Map's data. This only involves O(1) all-reduces over
1918 // the new communicator, which in the common case only includes a
1919 // small number of processes.
1920
1921 // Create the Map to return.
1922 if (newComm.is_null () || newComm->getSize () < 1) {
1923 return Teuchos::null; // my process does not participate in the new Map
1924 }
1925 else if (newComm->getSize () == 1) {
1926 // The case where the new communicator has only one process is
1927 // easy. We don't have to communicate to get all the
1928 // information we need. Use the default comm to create the new
1929 // Map, then fill in all the fields directly.
1930 RCP<map_type> newMap (new map_type ());
1931
1932 newMap->comm_ = newComm;
1933 // mfh 07 Oct 2016: Preserve original behavior, even though the
1934 // original index base may no longer be the globally min global
1935 // index. See #616 for why this doesn't matter so much anymore.
1936 newMap->indexBase_ = this->indexBase_;
1937 newMap->numGlobalElements_ = this->numLocalElements_;
1938 newMap->numLocalElements_ = this->numLocalElements_;
1939 newMap->minMyGID_ = this->minMyGID_;
1940 newMap->maxMyGID_ = this->maxMyGID_;
1941 newMap->minAllGID_ = this->minMyGID_;
1942 newMap->maxAllGID_ = this->maxMyGID_;
1943 newMap->firstContiguousGID_ = this->firstContiguousGID_;
1944 newMap->lastContiguousGID_ = this->lastContiguousGID_;
1945 // Since the new communicator has only one process, neither
1946 // uniformity nor contiguity have changed.
1947 newMap->uniform_ = this->uniform_;
1948 newMap->contiguous_ = this->contiguous_;
1949 // The new communicator only has one process, so the new Map is
1950 // not distributed.
1951 newMap->distributed_ = false;
1952 newMap->lgMap_ = this->lgMap_;
1953 newMap->lgMapHost_ = this->lgMapHost_;
1954 newMap->glMap_ = this->glMap_;
1955 newMap->glMapHost_ = this->glMapHost_;
1956 // It's OK not to initialize the new Map's Directory.
1957 // This is initialized lazily, on first call to getRemoteIndexList.
1958
1959 return newMap;
1960 }
1961 else { // newComm->getSize() != 1
1962 // Even if the original Map is contiguous, the new Map might not
1963 // be, especially if the excluded processes have ranks != 0 or
1964 // newComm->getSize()-1. The common case for this method is to
1965 // exclude many (possibly even all but one) processes, so it
1966 // likely doesn't pay to do the global communication (over the
1967 // original communicator) to figure out whether we can optimize
1968 // the result Map. Thus, we just set up the result Map as
1969 // noncontiguous.
1970 //
1971 // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1972 // the global-to-local table, etc. Optimize this code path to
1973 // avoid unnecessary local work.
1974
1975 // Make Map (re)compute the global number of elements.
1976 const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1977 // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1978 // to use the noncontiguous Map constructor, since the new Map
1979 // might not be contiguous. Even if the old Map was contiguous,
1980 // some process in the "middle" might have been excluded. If we
1981 // want to avoid local work, we either have to do the setup by
1982 // hand, or write a new Map constructor.
1983#if 1
1984 // The disabled code here throws the following exception in
1985 // Map's replaceCommWithSubset test:
1986 //
1987 // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ())
1988 // 10:
1989 // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1990 // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1991
1992 auto lgMap = this->getMyGlobalIndices ();
1993 using size_type =
1994 typename std::decay<decltype (lgMap.extent (0)) >::type;
1995 const size_type lclNumInds =
1996 static_cast<size_type> (this->getLocalNumElements ());
1997 using Teuchos::TypeNameTraits;
1998 TEUCHOS_TEST_FOR_EXCEPTION
1999 (lgMap.extent (0) != lclNumInds, std::logic_error,
2000 "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
2001 "has length " << lgMap.extent (0) << " (of type " <<
2002 TypeNameTraits<size_type>::name () << ") != this->getLocalNumElements()"
2003 " = " << this->getLocalNumElements () << ". The latter, upon being "
2004 "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
2005 "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
2006 "developers.");
2007#else
2008 Teuchos::ArrayView<const GO> lgMap = this->getLocalElementList ();
2009#endif // 1
2010
2011 const GO indexBase = this->getIndexBase ();
2012 // map stores HostSpace of CudaSpace but constructor is still CudaUVMSpace
2013 auto lgMap_device = Kokkos::create_mirror_view_and_copy(device_type(), lgMap);
2014 return rcp (new map_type (RECOMPUTE, lgMap_device, indexBase, newComm));
2015 }
2016 }
2017
2018 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2019 Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
2021 removeEmptyProcesses () const
2022 {
2023 using Teuchos::Comm;
2024 using Teuchos::null;
2025 using Teuchos::outArg;
2026 using Teuchos::RCP;
2027 using Teuchos::rcp;
2028 using Teuchos::REDUCE_MIN;
2029 using Teuchos::reduceAll;
2030
2031 // Create the new communicator. split() returns a valid
2032 // communicator on all processes. On processes where color == 0,
2033 // ignore the result. Passing key == 0 tells MPI to order the
2034 // processes in the new communicator by their rank in the old
2035 // communicator.
2036 const int color = (numLocalElements_ == 0) ? 0 : 1;
2037 // MPI_Comm_split must be called collectively over the original
2038 // communicator. We can't just call it on processes with color
2039 // one, even though we will ignore its result on processes with
2040 // color zero.
2041 RCP<const Comm<int> > newComm = comm_->split (color, 0);
2042 if (color == 0) {
2043 newComm = null;
2044 }
2045
2046 // Create the Map to return.
2047 if (newComm.is_null ()) {
2048 return null; // my process does not participate in the new Map
2049 } else {
2050 RCP<Map> map = rcp (new Map ());
2051
2052 map->comm_ = newComm;
2053 map->indexBase_ = indexBase_;
2054 map->numGlobalElements_ = numGlobalElements_;
2055 map->numLocalElements_ = numLocalElements_;
2056 map->minMyGID_ = minMyGID_;
2057 map->maxMyGID_ = maxMyGID_;
2058 map->minAllGID_ = minAllGID_;
2059 map->maxAllGID_ = maxAllGID_;
2060 map->firstContiguousGID_= firstContiguousGID_;
2061 map->lastContiguousGID_ = lastContiguousGID_;
2062
2063 // Uniformity and contiguity have not changed. The directory
2064 // has changed, but we've taken care of that above.
2065 map->uniform_ = uniform_;
2066 map->contiguous_ = contiguous_;
2067
2068 // If the original Map was NOT distributed, then the new Map
2069 // cannot be distributed.
2070 //
2071 // If the number of processes in the new communicator is 1, then
2072 // the new Map is not distributed.
2073 //
2074 // Otherwise, we have to check the new Map using an all-reduce
2075 // (over the new communicator). For example, the original Map
2076 // may have had some processes with zero elements, and all other
2077 // processes with the same number of elements as in the whole
2078 // Map. That Map is technically distributed, because of the
2079 // processes with zero elements. Removing those processes would
2080 // make the new Map locally replicated.
2081 if (! distributed_ || newComm->getSize () == 1) {
2082 map->distributed_ = false;
2083 } else {
2084 const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
2085 int allProcsOwnAllGids = 0;
2086 reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
2087 map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
2088 }
2089
2090 map->lgMap_ = lgMap_;
2091 map->lgMapHost_ = lgMapHost_;
2092 map->glMap_ = glMap_;
2093 map->glMapHost_ = glMapHost_;
2094
2095 // Map's default constructor creates an uninitialized Directory.
2096 // The Directory will be initialized on demand in
2097 // getRemoteIndexList().
2098 //
2099 // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
2100 // directory more efficiently than just recreating it. If
2101 // directory recreation proves a bottleneck, we can always
2102 // revisit this. On the other hand, Directory creation is only
2103 // collective over the new, presumably much smaller
2104 // communicator, so it may not be worth the effort to optimize.
2105
2106 return map;
2107 }
2108 }
2109
2110 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2111 void
2113 {
2114 TEUCHOS_TEST_FOR_EXCEPTION(
2115 directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
2116 "The Directory is null. "
2117 "Please report this bug to the Tpetra developers.");
2118
2119 // Only create the Directory if it hasn't been created yet.
2120 // This is a collective operation.
2121 if (! directory_->initialized ()) {
2122 directory_->initialize (*this);
2123 }
2124 }
2125
2126 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2129 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
2130 const Teuchos::ArrayView<int>& PIDs,
2131 const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
2132 {
2133 using Tpetra::Details::OrdinalTraits;
2135 using std::endl;
2136 using size_type = Teuchos::ArrayView<int>::size_type;
2137
2138 const bool verbose = Details::Behavior::verbose("Map");
2139 const size_t maxNumToPrint = verbose ?
2141 std::unique_ptr<std::string> prefix;
2142 if (verbose) {
2143 prefix = Details::createPrefix(comm_.getRawPtr(),
2144 "Map", "getRemoteIndexList(GIDs,PIDs,LIDs)");
2145 std::ostringstream os;
2146 os << *prefix << "Start: ";
2147 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2148 os << endl;
2149 std::cerr << os.str();
2150 }
2151
2152 // Empty Maps (i.e., containing no indices on any processes in the
2153 // Map's communicator) are perfectly valid. In that case, if the
2154 // input GID list is nonempty, we fill the output arrays with
2155 // invalid values, and return IDNotPresent to notify the caller.
2156 // It's perfectly valid to give getRemoteIndexList GIDs that the
2157 // Map doesn't own. SubmapImport test 2 needs this functionality.
2158 if (getGlobalNumElements () == 0) {
2159 if (GIDs.size () == 0) {
2160 if (verbose) {
2161 std::ostringstream os;
2162 os << *prefix << "Done; both Map & input are empty" << endl;
2163 std::cerr << os.str();
2164 }
2165 return AllIDsPresent; // trivially
2166 }
2167 else {
2168 if (verbose) {
2169 std::ostringstream os;
2170 os << *prefix << "Done: Map is empty on all processes, "
2171 "so all output PIDs & LIDs are invalid (-1)." << endl;
2172 std::cerr << os.str();
2173 }
2174 for (size_type k = 0; k < PIDs.size (); ++k) {
2175 PIDs[k] = OrdinalTraits<int>::invalid ();
2176 }
2177 for (size_type k = 0; k < LIDs.size (); ++k) {
2178 LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
2179 }
2180 return IDNotPresent;
2181 }
2182 }
2183
2184 // getRemoteIndexList must be called collectively, and Directory
2185 // initialization is collective too, so it's OK to initialize the
2186 // Directory on demand.
2187
2188 if (verbose) {
2189 std::ostringstream os;
2190 os << *prefix << "Call setupDirectory" << endl;
2191 std::cerr << os.str();
2192 }
2193 setupDirectory();
2194 if (verbose) {
2195 std::ostringstream os;
2196 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2197 std::cerr << os.str();
2198 }
2199 const Tpetra::LookupStatus retVal =
2200 directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
2201 if (verbose) {
2202 std::ostringstream os;
2203 os << *prefix << "Done; getDirectoryEntries returned "
2204 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2205 << "; ";
2206 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2207 os << ", ";
2208 verbosePrintArray(os, LIDs, "LIDs", maxNumToPrint);
2209 os << endl;
2210 std::cerr << os.str();
2211 }
2212 return retVal;
2213 }
2214
2215 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2218 getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
2219 const Teuchos::ArrayView<int> & PIDs) const
2220 {
2222 using std::endl;
2223
2224 const bool verbose = Details::Behavior::verbose("Map");
2225 const size_t maxNumToPrint = verbose ?
2227 std::unique_ptr<std::string> prefix;
2228 if (verbose) {
2229 prefix = Details::createPrefix(comm_.getRawPtr(),
2230 "Map", "getRemoteIndexList(GIDs,PIDs)");
2231 std::ostringstream os;
2232 os << *prefix << "Start: ";
2233 verbosePrintArray(os, GIDs, "GIDs", maxNumToPrint);
2234 os << endl;
2235 std::cerr << os.str();
2236 }
2237
2238 if (getGlobalNumElements () == 0) {
2239 if (GIDs.size () == 0) {
2240 if (verbose) {
2241 std::ostringstream os;
2242 os << *prefix << "Done; both Map & input are empty" << endl;
2243 std::cerr << os.str();
2244 }
2245 return AllIDsPresent; // trivially
2246 }
2247 else {
2248 if (verbose) {
2249 std::ostringstream os;
2250 os << *prefix << "Done: Map is empty on all processes, "
2251 "so all output PIDs are invalid (-1)." << endl;
2252 std::cerr << os.str();
2253 }
2254 for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
2255 PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
2256 }
2257 return IDNotPresent;
2258 }
2259 }
2260
2261 // getRemoteIndexList must be called collectively, and Directory
2262 // initialization is collective too, so it's OK to initialize the
2263 // Directory on demand.
2264
2265 if (verbose) {
2266 std::ostringstream os;
2267 os << *prefix << "Call setupDirectory" << endl;
2268 std::cerr << os.str();
2269 }
2270 setupDirectory();
2271 if (verbose) {
2272 std::ostringstream os;
2273 os << *prefix << "Call directory_->getDirectoryEntries" << endl;
2274 std::cerr << os.str();
2275 }
2276 const Tpetra::LookupStatus retVal =
2277 directory_->getDirectoryEntries(*this, GIDs, PIDs);
2278 if (verbose) {
2279 std::ostringstream os;
2280 os << *prefix << "Done; getDirectoryEntries returned "
2281 << (retVal == IDNotPresent ? "IDNotPresent" : "AllIDsPresent")
2282 << "; ";
2283 verbosePrintArray(os, PIDs, "PIDs", maxNumToPrint);
2284 os << endl;
2285 std::cerr << os.str();
2286 }
2287 return retVal;
2288 }
2289
2290 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2291 Teuchos::RCP<const Teuchos::Comm<int> >
2293 return comm_;
2294 }
2295
2296 template <class LocalOrdinal,class GlobalOrdinal, class Node>
2297 bool
2299 checkIsDist() const
2300 {
2301 using Teuchos::as;
2302 using Teuchos::outArg;
2303 using Teuchos::REDUCE_MIN;
2304 using Teuchos::reduceAll;
2305 using std::endl;
2306
2307 const bool verbose = Details::Behavior::verbose("Map");
2308 std::unique_ptr<std::string> prefix;
2309 if (verbose) {
2310 prefix = Details::createPrefix(
2311 comm_.getRawPtr(), "Map", "checkIsDist");
2312 std::ostringstream os;
2313 os << *prefix << "Start" << endl;
2314 std::cerr << os.str();
2315 }
2316
2317 bool global = false;
2318 if (comm_->getSize () > 1) {
2319 // The communicator has more than one process, but that doesn't
2320 // necessarily mean the Map is distributed.
2321 int localRep = 0;
2322 if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
2323 // The number of local elements on this process equals the
2324 // number of global elements.
2325 //
2326 // NOTE (mfh 22 Nov 2011) Does this still work if there were
2327 // duplicates in the global ID list on input (the third Map
2328 // constructor), so that the number of local elements (which
2329 // are not duplicated) on this process could be less than the
2330 // number of global elements, even if this process owns all
2331 // the elements?
2332 localRep = 1;
2333 }
2334 int allLocalRep;
2335 reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2336 if (allLocalRep != 1) {
2337 // At least one process does not own all the elements.
2338 // This makes the Map a distributed Map.
2339 global = true;
2340 }
2341 }
2342 // If the communicator has only one process, then the Map is not
2343 // distributed.
2344
2345 if (verbose) {
2346 std::ostringstream os;
2347 os << *prefix << "Done; global=" << (global ? "true" : "false")
2348 << endl;
2349 std::cerr << os.str();
2350 }
2351 return global;
2352 }
2353
2354} // namespace Tpetra
2355
2356template <class LocalOrdinal, class GlobalOrdinal>
2357Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2358Tpetra::createLocalMap (const size_t numElements,
2359 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2360{
2361 typedef LocalOrdinal LO;
2362 typedef GlobalOrdinal GO;
2363 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2364 return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2365}
2366
2367template <class LocalOrdinal, class GlobalOrdinal>
2368Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2369Tpetra::createUniformContigMap (const global_size_t numElements,
2370 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2371{
2372 typedef LocalOrdinal LO;
2373 typedef GlobalOrdinal GO;
2374 using NT = typename ::Tpetra::Map<LO, GO>::node_type;
2375 return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2376}
2377
2378template <class LocalOrdinal, class GlobalOrdinal, class Node>
2379Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2380Tpetra::createUniformContigMapWithNode (const global_size_t numElements,
2381 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2382)
2383{
2384 using Teuchos::rcp;
2386 const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2387
2388 return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2389}
2390
2391template <class LocalOrdinal, class GlobalOrdinal, class Node>
2392Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2393Tpetra::createLocalMapWithNode (const size_t numElements,
2394 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2395)
2396{
2398 using Teuchos::rcp;
2400 const GlobalOrdinal indexBase = 0;
2401 const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2402
2403 return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2404}
2405
2406template <class LocalOrdinal, class GlobalOrdinal, class Node>
2407Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2409 const size_t localNumElements,
2410 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2411)
2412{
2413 using Teuchos::rcp;
2415 const GlobalOrdinal indexBase = 0;
2416
2417 return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2418}
2419
2420template <class LocalOrdinal, class GlobalOrdinal>
2421Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2423 const size_t localNumElements,
2424 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2425{
2426 typedef LocalOrdinal LO;
2427 typedef GlobalOrdinal GO;
2428 using NT = typename Tpetra::Map<LO, GO>::node_type;
2429
2430 return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2431}
2432
2433template <class LocalOrdinal, class GlobalOrdinal>
2434Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal> >
2435Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2436 const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2437{
2438 typedef LocalOrdinal LO;
2439 typedef GlobalOrdinal GO;
2440 using NT = typename Tpetra::Map<LO, GO>::node_type;
2441
2442 return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2443}
2444
2445template <class LocalOrdinal, class GlobalOrdinal, class Node>
2446Teuchos::RCP< const Tpetra::Map<LocalOrdinal, GlobalOrdinal, Node> >
2447Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2448 const Teuchos::RCP<const Teuchos::Comm<int> >& comm
2449)
2450{
2451 using Teuchos::rcp;
2453 using GST = Tpetra::global_size_t;
2454 const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2455 // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2456 // shouldn't be zero, given that the index base is supposed to equal
2457 // the globally min global index?
2458 const GlobalOrdinal indexBase = 0;
2459
2460 return rcp (new map_type (INV, elementList, indexBase, comm));
2461}
2462
2463template<class LO, class GO, class NT>
2464Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2465Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2466{
2467 using Details::verbosePrintArray;
2468 using Teuchos::Array;
2469 using Teuchos::ArrayView;
2470 using Teuchos::as;
2471 using Teuchos::rcp;
2472 using std::cerr;
2473 using std::endl;
2474 using map_type = Tpetra::Map<LO, GO, NT>;
2475 using GST = global_size_t;
2476
2477 const bool verbose = Details::Behavior::verbose("Map");
2478 std::unique_ptr<std::string> prefix;
2479 if (verbose) {
2480 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2481 prefix = Details::createPrefix(
2482 comm.getRawPtr(), "createOneToOne(Map)");
2483 std::ostringstream os;
2484 os << *prefix << "Start" << endl;
2485 cerr << os.str();
2486 }
2487 const size_t maxNumToPrint = verbose ?
2488 Details::Behavior::verbosePrintCountThreshold() : size_t(0);
2489 const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2490 const int myRank = M->getComm ()->getRank ();
2491
2492 // Bypasses for special cases where either M is known to be
2493 // one-to-one, or the one-to-one version of M is easy to compute.
2494 // This is why we take M as an RCP, not as a const reference -- so
2495 // that we can return M itself if it is 1-to-1.
2496 if (! M->isDistributed ()) {
2497 // For a locally replicated Map, we assume that users want to push
2498 // all the GIDs to Process 0.
2499
2500 // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2501 // you think it should return, in this special case of a locally
2502 // replicated contiguous Map.
2503 const GST numGlobalEntries = M->getGlobalNumElements ();
2504 if (M->isContiguous()) {
2505 const size_t numLocalEntries =
2506 (myRank == 0) ? as<size_t>(numGlobalEntries) : size_t(0);
2507 if (verbose) {
2508 std::ostringstream os;
2509 os << *prefix << "Input is locally replicated & contiguous; "
2510 "numLocalEntries=" << numLocalEntries << endl;
2511 cerr << os.str ();
2512 }
2513 auto retMap =
2514 rcp(new map_type(numGlobalEntries, numLocalEntries,
2515 M->getIndexBase(), M->getComm()));
2516 if (verbose) {
2517 std::ostringstream os;
2518 os << *prefix << "Done" << endl;
2519 cerr << os.str ();
2520 }
2521 return retMap;
2522 }
2523 else {
2524 if (verbose) {
2525 std::ostringstream os;
2526 os << *prefix << "Input is locally replicated & noncontiguous"
2527 << endl;
2528 cerr << os.str ();
2529 }
2530 ArrayView<const GO> myGids =
2531 (myRank == 0) ? M->getLocalElementList() : Teuchos::null;
2532 auto retMap =
2533 rcp(new map_type(GINV, myGids(), M->getIndexBase(),
2534 M->getComm()));
2535 if (verbose) {
2536 std::ostringstream os;
2537 os << *prefix << "Done" << endl;
2538 cerr << os.str ();
2539 }
2540 return retMap;
2541 }
2542 }
2543 else if (M->isContiguous ()) {
2544 if (verbose) {
2545 std::ostringstream os;
2546 os << *prefix << "Input is distributed & contiguous" << endl;
2547 cerr << os.str ();
2548 }
2549 // Contiguous, distributed Maps are one-to-one by construction.
2550 // (Locally replicated Maps can be contiguous.)
2551 return M;
2552 }
2553 else {
2554 if (verbose) {
2555 std::ostringstream os;
2556 os << *prefix << "Input is distributed & noncontiguous" << endl;
2557 cerr << os.str ();
2558 }
2560 const size_t numMyElems = M->getLocalNumElements ();
2561 ArrayView<const GO> myElems = M->getLocalElementList ();
2562 Array<int> owner_procs_vec (numMyElems);
2563
2564 if (verbose) {
2565 std::ostringstream os;
2566 os << *prefix << "Call Directory::getDirectoryEntries: ";
2567 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2568 os << endl;
2569 cerr << os.str();
2570 }
2571 directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2572 if (verbose) {
2573 std::ostringstream os;
2574 os << *prefix << "getDirectoryEntries result: ";
2575 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2576 os << endl;
2577 cerr << os.str();
2578 }
2579
2580 Array<GO> myOwned_vec (numMyElems);
2581 size_t numMyOwnedElems = 0;
2582 for (size_t i = 0; i < numMyElems; ++i) {
2583 const GO GID = myElems[i];
2584 const int owner = owner_procs_vec[i];
2585
2586 if (myRank == owner) {
2587 myOwned_vec[numMyOwnedElems++] = GID;
2588 }
2589 }
2590 myOwned_vec.resize (numMyOwnedElems);
2591
2592 if (verbose) {
2593 std::ostringstream os;
2594 os << *prefix << "Create Map: ";
2595 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2596 os << endl;
2597 cerr << os.str();
2598 }
2599 auto retMap = rcp(new map_type(GINV, myOwned_vec(),
2600 M->getIndexBase(), M->getComm()));
2601 if (verbose) {
2602 std::ostringstream os;
2603 os << *prefix << "Done" << endl;
2604 cerr << os.str();
2605 }
2606 return retMap;
2607 }
2608}
2609
2610template<class LocalOrdinal, class GlobalOrdinal, class Node>
2611Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2614{
2615 using Details::Behavior;
2616 using Details::verbosePrintArray;
2617 using Teuchos::Array;
2618 using Teuchos::ArrayView;
2619 using Teuchos::RCP;
2620 using Teuchos::rcp;
2621 using Teuchos::toString;
2622 using std::cerr;
2623 using std::endl;
2624 using LO = LocalOrdinal;
2625 using GO = GlobalOrdinal;
2626 using map_type = Tpetra::Map<LO, GO, Node>;
2627
2628 const bool verbose = Behavior::verbose("Map");
2629 std::unique_ptr<std::string> prefix;
2630 if (verbose) {
2631 auto comm = M.is_null() ? Teuchos::null : M->getComm();
2632 prefix = Details::createPrefix(
2633 comm.getRawPtr(), "createOneToOne(Map,TieBreak)");
2634 std::ostringstream os;
2635 os << *prefix << "Start" << endl;
2636 cerr << os.str();
2637 }
2638 const size_t maxNumToPrint = verbose ?
2639 Behavior::verbosePrintCountThreshold() : size_t(0);
2640
2641 // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2642 // Maps (which are 1-to-1 by construction).
2643
2645 if (verbose) {
2646 std::ostringstream os;
2647 os << *prefix << "Initialize Directory" << endl;
2648 cerr << os.str ();
2649 }
2650 directory.initialize (*M, tie_break);
2651 if (verbose) {
2652 std::ostringstream os;
2653 os << *prefix << "Done initializing Directory" << endl;
2654 cerr << os.str ();
2655 }
2656 size_t numMyElems = M->getLocalNumElements ();
2657 ArrayView<const GO> myElems = M->getLocalElementList ();
2658 Array<int> owner_procs_vec (numMyElems);
2659 if (verbose) {
2660 std::ostringstream os;
2661 os << *prefix << "Call Directory::getDirectoryEntries: ";
2662 verbosePrintArray(os, myElems, "GIDs", maxNumToPrint);
2663 os << endl;
2664 cerr << os.str();
2665 }
2666 directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2667 if (verbose) {
2668 std::ostringstream os;
2669 os << *prefix << "getDirectoryEntries result: ";
2670 verbosePrintArray(os, owner_procs_vec, "PIDs", maxNumToPrint);
2671 os << endl;
2672 cerr << os.str();
2673 }
2674
2675 const int myRank = M->getComm()->getRank();
2676 Array<GO> myOwned_vec (numMyElems);
2677 size_t numMyOwnedElems = 0;
2678 for (size_t i = 0; i < numMyElems; ++i) {
2679 const GO GID = myElems[i];
2680 const int owner = owner_procs_vec[i];
2681 if (myRank == owner) {
2682 myOwned_vec[numMyOwnedElems++] = GID;
2683 }
2684 }
2685 myOwned_vec.resize (numMyOwnedElems);
2686
2687 // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2688 // valid for the new Map. Why can't we reuse it?
2689 const global_size_t GINV =
2690 Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2691 if (verbose) {
2692 std::ostringstream os;
2693 os << *prefix << "Create Map: ";
2694 verbosePrintArray(os, myOwned_vec, "GIDs", maxNumToPrint);
2695 os << endl;
2696 cerr << os.str();
2697 }
2698 RCP<const map_type> retMap
2699 (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2700 M->getComm ()));
2701 if (verbose) {
2702 std::ostringstream os;
2703 os << *prefix << "Done" << endl;
2704 cerr << os.str();
2705 }
2706 return retMap;
2707}
2708
2709//
2710// Explicit instantiation macro
2711//
2712// Must be expanded from within the Tpetra namespace!
2713//
2714
2716
2717#define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2718 \
2719 template class Map< LO , GO , NODE >; \
2720 \
2721 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2722 createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2723 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2724 \
2725 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2726 createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2727 const size_t localNumElements, \
2728 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2729 \
2730 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2731 createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2732 const Teuchos::RCP<const Teuchos::Comm<int> > &comm); \
2733 \
2734 template Teuchos::RCP< const Map<LO,GO,NODE> > \
2735 createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2736 const Teuchos::RCP< const Teuchos::Comm< int > >& comm); \
2737 \
2738 template Teuchos::RCP<const Map<LO,GO,NODE> > \
2739 createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2740 \
2741 template Teuchos::RCP<const Map<LO,GO,NODE> > \
2742 createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2743 const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2744
2745
2747#define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2748 template Teuchos::RCP< const Map<LO,GO> > \
2749 createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2750 \
2751 template Teuchos::RCP< const Map<LO,GO> > \
2752 createContigMap<LO,GO>( global_size_t, size_t, \
2753 const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2754 \
2755 template Teuchos::RCP< const Map<LO,GO> > \
2756 createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2757 const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2758 \
2759 template Teuchos::RCP< const Map<LO,GO> > \
2760 createUniformContigMap<LO,GO>( const global_size_t, \
2761 const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2762
2763#endif // TPETRA_MAP_DEF_HPP
Functions for initializing and finalizing Tpetra.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Declaration of a function that prints strings from each process.
Declaration of Tpetra::Details::initializeKokkos.
Declaration of Tpetra::Details::printOnce.
Stand-alone utility functions and macros.
Description of Tpetra's behavior.
static bool debug()
Whether Tpetra is in debug mode.
static bool verbose()
Whether Tpetra is in verbose mode.
static size_t verbosePrintCountThreshold()
Number of entries below which arrays, lists, etc. will be printed in debug mode.
"Local" part of Map suitable for Kokkos kernels.
Interface for breaking ties in ownership.
Implement mapping from global ID to process ID and local ID.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
void initialize(const map_type &map)
Initialize the Directory with its Map.
A parallel distribution of indices over processes.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
bool isOneToOne() const
Whether the Map is one to one.
std::string description() const
Implementation of Teuchos::Describable.
Teuchos::ArrayView< const global_ordinal_type > getLocalElementList() const
Return a NONOWNING view of the global indices owned by this process.
global_ordinal_type getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
Node node_type
Legacy typedef that will go away at some point.
Map()
Default constructor (that does nothing).
GlobalOrdinal global_ordinal_type
The type of global indices.
LookupStatus getRemoteIndexList(const Teuchos::ArrayView< const global_ordinal_type > &GIDList, const Teuchos::ArrayView< int > &nodeIDList, const Teuchos::ArrayView< local_ordinal_type > &LIDList) const
Return the process ranks and corresponding local indices for the given global indices.
bool isNodeLocalElement(local_ordinal_type localIndex) const
Whether the given local index is valid for this Map on the calling process.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
typename device_type::execution_space execution_space
The Kokkos execution space.
LO local_ordinal_type
The type of local indices.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > removeEmptyProcesses() const
Advanced methods.
global_ordinal_type getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
bool isCompatible(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is compatible with this Map.
global_ordinal_type getIndexBase() const
The index base for this Map.
bool locallySameAs(const Map< local_ordinal_type, global_ordinal_type, node_type > &map) const
Is this Map locally the same as the input Map?
bool isLocallyFitted(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is locally fitted to this Map.
virtual ~Map()
Destructor (virtual for memory safety of derived classes).
global_ordinal_type getMinGlobalIndex() const
The minimum global index owned by the calling process.
local_ordinal_type getLocalElement(global_ordinal_type globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Map< local_ordinal_type, global_ordinal_type, Node > > replaceCommWithSubset(const Teuchos::RCP< const Teuchos::Comm< int > > &newComm) const
Replace this Map's communicator with a subset communicator.
global_ordinal_type getMaxGlobalIndex() const
The maximum global index owned by the calling process.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Describe this object in a human-readable way to the given output stream.
bool isNodeGlobalElement(global_ordinal_type globalIndex) const
Whether the given global index is owned by this Map on the calling process.
bool isSameAs(const Map< local_ordinal_type, global_ordinal_type, Node > &map) const
True if and only if map is identical to this Map.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
global_indices_array_type getMyGlobalIndices() const
Return a view of the global indices owned by this process.
typename Node::device_type device_type
This class' Kokkos::Device specialization.
Implementation details of Tpetra.
void verbosePrintArray(std::ostream &out, const ArrayType &x, const char name[], const size_t maxNumToPrint)
Print min(x.size(), maxNumToPrint) entries of x.
void printOnce(std::ostream &out, const std::string &s, const Teuchos::Comm< int > *comm)
Print on one process of the given communicator, or at least try to do so (if MPI is not initialized).
std::unique_ptr< std::string > createPrefix(const int myRank, const char prefix[])
Create string prefix for each line of verbose output.
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:64
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...