Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_makeColMap_def.hpp
Go to the documentation of this file.
1#ifndef TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
2#define TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
3
4// @HEADER
5// ***********************************************************************
6//
7// Tpetra: Templated Linear Algebra Services Package
8// Copyright (2008) Sandia Corporation
9//
10// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
11// the U.S. Government retains certain rights in this software.
12//
13// Redistribution and use in source and binary forms, with or without
14// modification, are permitted provided that the following conditions are
15// met:
16//
17// 1. Redistributions of source code must retain the above copyright
18// notice, this list of conditions and the following disclaimer.
19//
20// 2. Redistributions in binary form must reproduce the above copyright
21// notice, this list of conditions and the following disclaimer in the
22// documentation and/or other materials provided with the distribution.
23//
24// 3. Neither the name of the Corporation nor the names of the
25// contributors may be used to endorse or promote products derived from
26// this software without specific prior written permission.
27//
28// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
29// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
32// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
33// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
34// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
35// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
36// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39//
40// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
41//
42// ************************************************************************
43// @HEADER
44
55
56#include "Tpetra_RowGraph.hpp"
57#include "Tpetra_Util.hpp"
58#include "Teuchos_Array.hpp"
59#include "Kokkos_Bitset.hpp"
60#include <set>
61#include <vector>
62
63namespace Tpetra {
64namespace Details {
65
66template <class LO, class GO, class NT>
67int
68makeColMapImpl(Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
69 Teuchos::Array<int>& remotePIDs,
70 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
71 size_t numLocalColGIDs,
72 size_t numRemoteColGIDs,
73 std::set<GO>& RemoteGIDSet,
74 std::vector<GO>& RemoteGIDUnorderedVector,
75 std::vector<bool>& GIDisLocal,
76 const bool sortEachProcsGids,
77 std::ostream* errStrm)
78{
79 using Teuchos::Array;
80 using Teuchos::ArrayView;
81 using Teuchos::rcp;
82 using std::endl;
83 int errCode = 0;
84 const char prefix[] = "Tpetra::Details::makeColMapImpl: ";
85 typedef ::Tpetra::Map<LO, GO, NT> map_type;
86 // Possible short-circuit for serial scenario:
87 //
88 // If all domain GIDs are present as column indices, then set
89 // ColMap=DomainMap. By construction, LocalGIDs is a subset of
90 // DomainGIDs.
91 //
92 // If we have
93 // * Number of remote GIDs is 0, so that ColGIDs == LocalGIDs,
94 // and
95 // * Number of local GIDs is number of domain GIDs
96 // then
97 // * LocalGIDs \subset DomainGIDs && size(LocalGIDs) ==
98 // size(DomainGIDs) => DomainGIDs == LocalGIDs == ColGIDs
99 // on the calling process.
100 //
101 // We will concern ourselves only with the special case of a
102 // serial DomainMap, obviating the need for communication.
103 //
104 // If
105 // * DomainMap has a serial communicator
106 // then we can set the column Map as the domain Map
107 // return. Benefit: this graph won't need an Import object
108 // later.
109 //
110 // Note, for a serial domain map, there can be no RemoteGIDs,
111 // because there are no remote processes. Likely explanations
112 // for this are:
113 // * user submitted erroneous column indices
114 // * user submitted erroneous domain Map
115 if (domMap->getComm ()->getSize () == 1) {
116 if (numRemoteColGIDs != 0) {
117 errCode = -2;
118 if (errStrm != NULL) {
119 *errStrm << prefix << "The domain Map only has one process, but "
120 << numRemoteColGIDs << " column "
121 << (numRemoteColGIDs != 1 ? "indices are" : "index is")
122 << " not in the domain Map. Either these indices are "
123 "invalid or the domain Map is invalid. Remember that nonsquare "
124 "matrices, or matrices where the row and range Maps differ, "
125 "require calling the version of fillComplete that takes the "
126 "domain and range Maps as input." << endl;
127 }
128 }
129 if (numLocalColGIDs == domMap->getLocalNumElements ()) {
130 colMap = domMap; // shallow copy
131 return errCode;
132 }
133 }
134
135 // Populate myColumns with a list of all column GIDs. Put
136 // locally owned (in the domain Map) GIDs at the front: they
137 // correspond to "same" and "permuted" entries between the
138 // column Map and the domain Map. Put remote GIDs at the back.
139 Array<GO> myColumns(numLocalColGIDs + numRemoteColGIDs);
140 // get pointers into myColumns for each part
141 ArrayView<GO> LocalColGIDs = myColumns (0, numLocalColGIDs);
142 ArrayView<GO> remoteColGIDs = myColumns (numLocalColGIDs, numRemoteColGIDs);
143
144 // Copy the remote GIDs into myColumns
145 if (sortEachProcsGids) {
146 // The std::set puts GIDs in increasing order.
147 std::copy (RemoteGIDSet.begin(), RemoteGIDSet.end(),
148 remoteColGIDs.begin());
149 }
150 else {
151 // Respect the originally encountered order.
152 std::copy (RemoteGIDUnorderedVector.begin(),
153 RemoteGIDUnorderedVector.end(), remoteColGIDs.begin());
154 }
155
156 // Make a list of process ranks corresponding to the remote GIDs.
157 // remotePIDs is an output argument of getRemoteIndexList below;
158 // its initial contents don't matter.
159 if (static_cast<size_t> (remotePIDs.size ()) != numRemoteColGIDs) {
160 remotePIDs.resize (numRemoteColGIDs);
161 }
162 // Look up the remote process' ranks in the domain Map.
163 {
164 const LookupStatus stat =
165 domMap->getRemoteIndexList (remoteColGIDs, remotePIDs ());
166
167 // If any process returns IDNotPresent, then at least one of
168 // the remote indices was not present in the domain Map. This
169 // means that the Import object cannot be constructed, because
170 // of incongruity between the column Map and domain Map.
171 // This has two likely causes:
172 // - The user has made a mistake in the column indices
173 // - The user has made a mistake with respect to the domain Map
174 if (stat == IDNotPresent) {
175 if (errStrm != NULL) {
176 *errStrm << prefix << "Some column indices are not in the domain Map."
177 "Either these column indices are invalid or the domain Map is "
178 "invalid. Likely cause: For a nonsquare matrix, you must give the "
179 "domain and range Maps as input to fillComplete." << endl;
180 }
181 // Don't return yet, because not all processes may have
182 // encountered this error state. This function ends with an
183 // all-reduce, so we have to make sure that everybody gets to
184 // that point. The resulting Map may be wrong, but at least
185 // nothing should crash.
186 errCode = -3;
187 }
188 }
189
190 // Sort incoming remote column indices by their owning process
191 // rank, so that all columns coming from a given remote process
192 // are contiguous. This means the Import's Distributor doesn't
193 // need to reorder data.
194 //
195 // NOTE (mfh 02 Sep 2014) This needs to be a stable sort, so that
196 // it respects either of the possible orderings of GIDs (sorted,
197 // or original order) specified above.
198 sort2 (remotePIDs.begin (), remotePIDs.end (), remoteColGIDs.begin ());
199
200 // Copy the local GIDs into myColumns. Two cases:
201 // 1. If the number of Local column GIDs is the same as the number
202 // of Local domain GIDs, we can simply read the domain GIDs
203 // into the front part of ColIndices (see logic above from the
204 // serial short circuit case)
205 // 2. We step through the GIDs of the DomainMap, checking to see
206 // if each domain GID is a column GID. We want to do this to
207 // maintain a consistent ordering of GIDs between the columns
208 // and the domain.
209
210 const size_t numDomainElts = domMap->getLocalNumElements ();
211 if (numLocalColGIDs == numDomainElts) {
212 // If the number of locally owned GIDs are the same as the
213 // number of local domain Map elements, then the local domain
214 // Map elements are the same as the locally owned GIDs.
215 if (domMap->isContiguous ()) {
216 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
217 // the domain Map is contiguous, it's more efficient to avoid
218 // calling getLocalElementList(), since that permanently
219 // constructs and caches the GID list in the contiguous Map.
220 GO curColMapGid = domMap->getMinGlobalIndex ();
221 for (size_t k = 0; k < numLocalColGIDs; ++k, ++curColMapGid) {
222 LocalColGIDs[k] = curColMapGid;
223 }
224 }
225 else {
226 ArrayView<const GO> domainElts = domMap->getLocalElementList ();
227 std::copy (domainElts.begin(), domainElts.end(), LocalColGIDs.begin());
228 }
229 }
230 else {
231 // Count the number of locally owned GIDs, both to keep track of
232 // the current array index, and as a sanity check.
233 size_t numLocalCount = 0;
234 if (domMap->isContiguous ()) {
235 // NOTE (mfh 03 Mar 2013, 02 Sep 2014) In the common case that
236 // the domain Map is contiguous, it's more efficient to avoid
237 // calling getLocalElementList(), since that permanently
238 // constructs and caches the GID list in the contiguous Map.
239 GO curColMapGid = domMap->getMinGlobalIndex ();
240 for (size_t i = 0; i < numDomainElts; ++i, ++curColMapGid) {
241 if (GIDisLocal[i]) {
242 LocalColGIDs[numLocalCount++] = curColMapGid;
243 }
244 }
245 }
246 else {
247 ArrayView<const GO> domainElts = domMap->getLocalElementList ();
248 for (size_t i = 0; i < numDomainElts; ++i) {
249 if (GIDisLocal[i]) {
250 LocalColGIDs[numLocalCount++] = domainElts[i];
251 }
252 }
253 }
254 if (numLocalCount != numLocalColGIDs) {
255 if (errStrm != NULL) {
256 *errStrm << prefix << "numLocalCount = " << numLocalCount
257 << " != numLocalColGIDs = " << numLocalColGIDs
258 << ". This should never happen. "
259 "Please report this bug to the Tpetra developers." << endl;
260 }
261 // Don't return yet, because not all processes may have
262 // encountered this error state. This function ends with an
263 // all-reduce, so we have to make sure that everybody gets to
264 // that point.
265 errCode = -4;
266 }
267 }
268
269 // FIXME (mfh 03 Apr 2013) Now would be a good time to use the
270 // information we collected above to construct the Import. In
271 // particular, building an Import requires:
272 //
273 // 1. numSameIDs (length of initial contiguous sequence of GIDs
274 // on this process that are the same in both Maps; this
275 // equals the number of domain Map elements on this process)
276 //
277 // 2. permuteToLIDs and permuteFromLIDs (both empty in this
278 // case, since there's no permutation going on; the column
279 // Map starts with the domain Map's GIDs, and immediately
280 // after them come the remote GIDs)
281 //
282 // 3. remoteGIDs (exactly those GIDs that we found out above
283 // were not in the domain Map) and remoteLIDs (which we could
284 // have gotten above by using the three-argument version of
285 // getRemoteIndexList() that computes local indices as well
286 // as process ranks, instead of the two-argument version that
287 // was used above)
288 //
289 // 4. remotePIDs (which we have from the getRemoteIndexList() call
290 // above) -- NOTE (mfh 14 Sep 2017) Fix for Trilinos GitHub
291 // Issue #628 (https://github.com/trilinos/Trilinos/issues/628)
292 // addresses this. remotePIDs is now an output argument of
293 // both this function and CrsGraph::makeColMap, and
294 // CrsGraph::makeImportExport now has an option to use that
295 // information, if available (that is, if makeColMap was
296 // actually called, which it would not be if the graph already
297 // had a column Map).
298 //
299 // 5. Apply the permutation from sorting remotePIDs to both
300 // remoteGIDs and remoteLIDs (by calling sort3 above instead of
301 // sort2), instead of just to remoteLIDs alone.
302 //
303 // 6. Everything after the sort3 call in Import::setupExport():
304 // a. Create the Distributor via createFromRecvs(), which
305 // computes exportGIDs and exportPIDs
306 // b. Compute exportLIDs from exportGIDs (by asking the
307 // source Map, in this case the domain Map, to convert
308 // global to local)
309 //
310 // Steps 1-5 come for free, since we must do that work anyway in
311 // order to compute the column Map. In particular, Step 3 is
312 // even more expensive than Step 6a, since it involves both
313 // creating and using a new Distributor object.
314 const global_size_t INV =
315 Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
316 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
317 // be the same as the Map's min GID? If the first column is empty
318 // (contains no entries), then the column Map's min GID won't
319 // necessarily be the same as the domain Map's index base.
320 const GO indexBase = domMap->getIndexBase ();
321 colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
322 return errCode;
323}
324
325template <class LO, class GO, class NT>
326int
327makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& colMap,
328 Teuchos::Array<int>& remotePIDs,
329 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& domMap,
330 const RowGraph<LO, GO, NT>& graph,
331 const bool sortEachProcsGids,
332 std::ostream* errStrm)
333{
334 using Teuchos::Array;
335 using Teuchos::ArrayView;
336 using Teuchos::rcp;
337 using std::endl;
338 typedef ::Tpetra::Map<LO, GO, NT> map_type;
339 const char prefix[] = "Tpetra::Details::makeColMap: ";
340 int errCode = 0;
341
342 // If the input domain Map or its communicator is null on the
343 // calling process, then the calling process does not participate in
344 // the returned column Map. Thus, we can set the returned column
345 // Map to null on those processes, and return immediately. This is
346 // not an error condition, as long as when domMap and its
347 // communicator are NOT null, the graph's other Maps and
348 // communicator are not also null.
349 if (domMap.is_null () || domMap->getComm ().is_null ()) {
350 colMap = Teuchos::null;
351 return errCode;
352 }
353
354 Array<GO> myColumns;
355 if (graph.isLocallyIndexed ()) {
356 colMap = graph.getColMap ();
357 // If the graph is locally indexed, it had better have a column Map.
358 // The extra check for ! graph.hasColMap() is conservative.
359 if (colMap.is_null () || ! graph.hasColMap ()) {
360 errCode = -1;
361 if (errStrm != NULL) {
362 *errStrm << prefix << "The graph is locally indexed on the calling "
363 "process, but has no column Map (either getColMap() returns null, "
364 "or hasColMap() returns false)." << endl;
365 }
366 // Under this error condition, this process will not fill
367 // myColumns. The resulting new column Map will be incorrect,
368 // but at least we won't hang, and this process will report the
369 // error.
370 }
371 else {
372 // The graph already has a column Map, and is locally indexed on
373 // the calling process. However, it may be globally indexed (or
374 // neither locally nor globally indexed) on other processes.
375 // Assume that we want to recreate the column Map.
376 if (colMap->isContiguous ()) {
377 // The number of indices on each process must fit in LO.
378 const LO numCurGids = static_cast<LO> (colMap->getLocalNumElements ());
379 myColumns.resize (numCurGids);
380 const GO myFirstGblInd = colMap->getMinGlobalIndex ();
381 for (LO k = 0; k < numCurGids; ++k) {
382 myColumns[k] = myFirstGblInd + static_cast<GO> (k);
383 }
384 }
385 else { // the column Map is NOT contiguous
386 ArrayView<const GO> curGids = graph.getColMap ()->getLocalElementList ();
387 // The number of indices on each process must fit in LO.
388 const LO numCurGids = static_cast<LO> (curGids.size ());
389 myColumns.resize (numCurGids);
390 for (LO k = 0; k < numCurGids; ++k) {
391 myColumns[k] = curGids[k];
392 }
393 } // whether the graph's current column Map is contiguous
394 } // does the graph currently have a column Map?
395 }
396 else if (graph.isGloballyIndexed ()) {
397 // Go through all the rows, finding the populated column indices.
398 //
399 // Our final list of indices for the column Map constructor will
400 // have the following properties (all of which are with respect to
401 // the calling process):
402 //
403 // 1. Indices in the domain Map go first.
404 // 2. Indices not in the domain Map follow, ordered first
405 // contiguously by their owning process rank (in the domain
406 // Map), then in increasing order within that.
407 // 3. No duplicate indices.
408 //
409 // This imitates the ordering used by Aztec(OO) and Epetra.
410 // Storing indices owned by the same process (in the domain Map)
411 // contiguously permits the use of contiguous send and receive
412 // buffers.
413 //
414 // We begin by partitioning the column indices into "local" GIDs
415 // (owned by the domain Map) and "remote" GIDs (not owned by the
416 // domain Map). We use the same order for local GIDs as the
417 // domain Map, so we track them in place in their array. We use
418 // an std::set (RemoteGIDSet) to keep track of remote GIDs, so
419 // that we don't have to merge duplicates later.
420 const LO LINV = Tpetra::Details::OrdinalTraits<LO>::invalid ();
421 size_t numLocalColGIDs = 0;
422 size_t numRemoteColGIDs = 0;
423
424 // GIDisLocal[lid] is false if and only if local index lid in the
425 // domain Map is remote (not local).
426 std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
427 std::set<GO> RemoteGIDSet;
428 // This preserves the not-sorted Epetra order of GIDs.
429 // We only use this if sortEachProcsGids is false.
430 std::vector<GO> RemoteGIDUnorderedVector;
431
432 if (! graph.getRowMap ().is_null ()) {
433 const Tpetra::Map<LO, GO, NT>& rowMap = * (graph.getRowMap ());
434 const LO lclNumRows = rowMap.getLocalNumElements ();
435 for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
436 const GO gblRow = rowMap.getGlobalElement (lclRow);
437 typename RowGraph<LO,GO,NT>::global_inds_host_view_type rowGids;
438 graph.getGlobalRowView (gblRow, rowGids);
439
440 const LO numEnt = static_cast<LO> (rowGids.size ());
441 if (numEnt != 0) {
442 for (LO k = 0; k < numEnt; ++k) {
443 const GO gid = rowGids[k];
444 const LO lid = domMap->getLocalElement (gid);
445 if (lid != LINV) {
446 const bool alreadyFound = GIDisLocal[lid];
447 if (! alreadyFound) {
448 GIDisLocal[lid] = true;
449 ++numLocalColGIDs;
450 }
451 }
452 else {
453 const bool notAlreadyFound = RemoteGIDSet.insert (gid).second;
454 if (notAlreadyFound) { // gid did not exist in the set before
455 if (! sortEachProcsGids) {
456 // The user doesn't want to sort remote GIDs (for each
457 // remote process); they want us to keep remote GIDs
458 // in their original order. We do this by stuffing
459 // each remote GID into an array as we encounter it
460 // for the first time. The std::set helpfully tracks
461 // first encounters.
462 RemoteGIDUnorderedVector.push_back (gid);
463 }
464 ++numRemoteColGIDs;
465 }
466 }
467 } // for each entry k in row r
468 } // if row r contains > 0 entries
469 } // for each locally owned row r
470 } // if the graph has a nonnull row Map
471
472 return makeColMapImpl<LO, GO, NT>(
473 colMap, remotePIDs,
474 domMap,
475 numLocalColGIDs, numRemoteColGIDs,
476 RemoteGIDSet, RemoteGIDUnorderedVector, GIDisLocal,
477 sortEachProcsGids, errStrm);
478
479 } // if the graph is globally indexed
480 else {
481 // If we reach this point, the graph is neither locally nor
482 // globally indexed. Thus, the graph is empty on this process
483 // (per the usual legacy Petra convention), so myColumns will be
484 // left empty.
485 ; // do nothing
486 }
487
488 const global_size_t INV =
489 Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
490 // FIXME (mfh 05 Mar 2014) Doesn't the index base of a Map have to
491 // be the same as the Map's min GID? If the first column is empty
492 // (contains no entries), then the column Map's min GID won't
493 // necessarily be the same as the domain Map's index base.
494 const GO indexBase = domMap->getIndexBase ();
495 colMap = rcp (new map_type (INV, myColumns, indexBase, domMap->getComm ()));
496 return errCode;
497}
498
499template<typename GOView, typename bitset_t>
500struct GatherPresentEntries
501{
502 using GO = typename GOView::non_const_value_type;
503
504 GatherPresentEntries(GO minGID_, const GOView& gids_, const bitset_t& present_)
505 : minGID(minGID_), gids(gids_), present(present_)
506 {}
507
508 KOKKOS_INLINE_FUNCTION void operator()(const GO i) const
509 {
510 present.set(gids(i) - minGID);
511 }
512
513 GO minGID;
514 GOView gids;
515 bitset_t present;
516};
517
518template<typename LO, typename GO, typename device_t, typename LocalMapType, typename const_bitset_t, bool doingRemotes>
519struct ListGIDs
520{
521 using mem_space = typename device_t::memory_space;
522 using GOView = Kokkos::View<GO*, mem_space>;
523 using SingleView = Kokkos::View<GO, mem_space>;
524
525 ListGIDs(GO minGID_, GOView& gidList_, SingleView& numElems_, const_bitset_t& present_, const LocalMapType& localDomainMap_)
526 : minGID(minGID_), gidList(gidList_), numElems(numElems_), present(present_), localDomainMap(localDomainMap_)
527 {}
528
529 KOKKOS_INLINE_FUNCTION void operator()(const GO i, GO& lcount, const bool finalPass) const
530 {
531 bool isRemote = localDomainMap.getLocalElement(i + minGID) == ::Tpetra::Details::OrdinalTraits<LO>::invalid();
532 if(present.test(i) && doingRemotes == isRemote)
533 {
534 if(finalPass)
535 {
536 //lcount is the index where this GID should be inserted in gidList.
537 gidList(lcount) = minGID + i;
538 }
539 lcount++;
540 }
541 if((i == static_cast<GO>(present.size() - 1)) && finalPass)
542 {
543 //Set the number of inserted indices in a single-element view
544 numElems() = lcount;
545 }
546 }
547
548 GO minGID;
549 GOView gidList;
550 SingleView numElems;
551 const_bitset_t present;
552 const LocalMapType localDomainMap;
553};
554
555template<typename GO, typename mem_space>
556struct MinMaxReduceFunctor
557{
558 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
559 using GOView = Kokkos::View<GO*, mem_space>;
560
561 MinMaxReduceFunctor(const GOView& gids_)
562 : gids(gids_)
563 {}
564
565 KOKKOS_INLINE_FUNCTION void operator()(const GO i, MinMaxValue& lminmax) const
566 {
567 GO gid = gids(i);
568 if(gid < lminmax.min_val)
569 lminmax.min_val = gid;
570 if(gid > lminmax.max_val)
571 lminmax.max_val = gid;
572 };
573
574 const GOView gids;
575};
576
577template <class LO, class GO, class NT>
578int
579makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& colMap,
580 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT>>& domMap,
581 Kokkos::View<GO*, typename NT::memory_space> gids,
582 std::ostream* errStrm)
583{
584 using Teuchos::RCP;
585 using Teuchos::Array;
586 using Kokkos::RangePolicy;
587 using device_t = typename NT::device_type;
588 using exec_space = typename device_t::execution_space;
589 using memory_space = typename device_t::memory_space;
590 // Note BMK 5-2021: this is deliberately not just device_t.
591 // Bitset cannot use HIPHostPinnedSpace currently, so this needs to
592 // use the default memory space for HIP (HIPSpace). Using the default mem
593 // space is fine for all other backends too. This bitset type is only used
594 // in this function so it won't cause type mismatches.
595 using bitset_t = Kokkos::Bitset<typename exec_space::memory_space>;
596 using const_bitset_t = Kokkos::ConstBitset<typename exec_space::memory_space>;
597 using GOView = Kokkos::View<GO*, memory_space>;
598 using SingleView = Kokkos::View<GO, memory_space>;
599 using map_type = Tpetra::Map<LO, GO, NT>;
600 using LocalMap = typename map_type::local_map_type;
601 GO nentries = gids.extent(0);
602 GO minGID = Teuchos::OrdinalTraits<GO>::max();
603 GO maxGID = 0;
604 using MinMaxValue = typename Kokkos::MinMax<GO>::value_type;
605 MinMaxValue minMaxGID = {minGID, maxGID};
606 Kokkos::parallel_reduce(RangePolicy<exec_space>(0, nentries),
607 MinMaxReduceFunctor<GO, memory_space>(gids),
608 Kokkos::MinMax<GO>(minMaxGID));
609 minGID = minMaxGID.min_val; maxGID = minMaxGID.max_val;
610 //Now, know the full range of input GIDs.
611 //Determine the set of GIDs in the column map using a dense bitset, which corresponds to the range [minGID, maxGID]
612 bitset_t presentGIDs(maxGID - minGID + 1);
613 Kokkos::parallel_for(RangePolicy<exec_space>(0, nentries), GatherPresentEntries<GOView, bitset_t>(minGID, gids, presentGIDs));
614 const_bitset_t constPresentGIDs(presentGIDs);
615 //Get the set of local and remote GIDs on device
616 SingleView numLocals("Num local GIDs");
617 SingleView numRemotes("Num remote GIDs");
618 GOView localGIDView(Kokkos::ViewAllocateWithoutInitializing("Local GIDs"), constPresentGIDs.count());
619 GOView remoteGIDView(Kokkos::ViewAllocateWithoutInitializing("Remote GIDs"), constPresentGIDs.count());
620 LocalMap localDomMap = domMap->getLocalMap();
621 //This lists the locally owned GIDs in localGIDView
622 Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
623 ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, false>
624 (minGID, localGIDView, numLocals, constPresentGIDs, localDomMap));
625 //And this lists the remote GIDs in remoteGIDView
626 Kokkos::parallel_scan(RangePolicy<exec_space>(0, constPresentGIDs.size()),
627 ListGIDs<LO, GO, device_t, LocalMap, const_bitset_t, true>
628 (minGID, remoteGIDView, numRemotes, constPresentGIDs, localDomMap));
629 //Pull down the sizes
630 GO numLocalColGIDs = 0;
631 GO numRemoteColGIDs = 0;
632 // DEEP_COPY REVIEW - DEVICE-TO-VALUE
633 Kokkos::deep_copy(exec_space(), numLocalColGIDs, numLocals);
634 // DEEP_COPY REVIEW - DEVICE-TO-numLocalColGIDs
635 Kokkos::deep_copy(exec_space(), numRemoteColGIDs, numRemotes);
636 //Pull down the remote lists
637 auto localsHost = Kokkos::create_mirror_view(localGIDView);
638 auto remotesHost = Kokkos::create_mirror_view(remoteGIDView);
639 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
640 Kokkos::deep_copy(exec_space(), localsHost, localGIDView);
641 // DEEP_COPY REVIEW - DEVICE-TO-HOSTMIRROR
642 Kokkos::deep_copy(exec_space(), remotesHost, remoteGIDView);
643 // CAG: This fence was found to be required on Cuda with UVM=on.
644 Kokkos::fence();
645 //Finally, populate the STL structures which hold the index lists
646 std::set<GO> RemoteGIDSet;
647 std::vector<GO> RemoteGIDUnorderedVector;
648 std::vector<bool> GIDisLocal (domMap->getLocalNumElements (), false);
649 for(GO i = 0; i < numLocalColGIDs; i++)
650 {
651 GO gid = localsHost(i);
652 //Already know that gid is locally owned, so this index will never be invalid().
653 //makeColMapImpl uses this and the domain map to get the the local GID list.
654 GIDisLocal[domMap->getLocalElement(gid)] = true;
655 }
656 RemoteGIDUnorderedVector.reserve(numRemoteColGIDs);
657 for(GO i = 0; i < numRemoteColGIDs; i++)
658 {
659 GO gid = remotesHost(i);
660 RemoteGIDSet.insert(gid);
661 RemoteGIDUnorderedVector.push_back(gid);
662 }
663 //remotePIDs will be discarded in this version of makeColMap
664 Array<int> remotePIDs;
665 //Find the min and max GID
666 return makeColMapImpl<LO, GO, NT>(
667 colMap,
668 remotePIDs,
669 domMap,
670 static_cast<size_t>(numLocalColGIDs),
671 static_cast<size_t>(numRemoteColGIDs),
672 RemoteGIDSet,
673 RemoteGIDUnorderedVector,
674 GIDisLocal,
675 true, //always sort remotes
676 errStrm);
677}
678
679} // namespace Details
680} // namespace Tpetra
681
682//
683// Explicit instantiation macros
684//
685// Must be expanded from within the Tpetra namespace!
686//
687#define TPETRA_DETAILS_MAKECOLMAP_INSTANT(LO,GO,NT) \
688 namespace Details { \
689 template int \
690 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
691 Teuchos::Array<int>&, \
692 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
693 const RowGraph<LO, GO, NT>&, \
694 const bool, \
695 std::ostream*); \
696 template int \
697 makeColMap (Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
698 const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >&, \
699 Kokkos::View<GO*, typename NT::memory_space>, \
700 std::ostream*); \
701 }
702
703#endif // TPETRA_DETAILS_MAKECOLMAP_DEF_HPP
Stand-alone utility functions and macros.
"Local" part of Map suitable for Kokkos kernels.
A parallel distribution of indices over processes.
global_ordinal_type getGlobalElement(local_ordinal_type localIndex) const
The global index corresponding to the given local index.
size_t getLocalNumElements() const
The number of elements belonging to the calling process.
An abstract interface for graphs accessed by rows.
virtual bool hasColMap() const =0
Whether the graph has a well-defined column Map.
virtual bool isGloballyIndexed() const =0
If graph indices are in the global range, this function returns true. Otherwise, this function return...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getRowMap() const =0
The Map that describes this graph's distribution of rows over processes.
virtual void getGlobalRowView(const GlobalOrdinal gblRow, global_inds_host_view_type &gblColInds) const =0
Get a const, non-persisting view of the given global row's global column indices, as a Teuchos::Array...
virtual bool isLocallyIndexed() const =0
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
virtual Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > getColMap() const =0
The Map that describes this graph's distribution of columns over processes.
Implementation details of Tpetra.
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
size_t global_size_t
Global size_t object.