Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_ComputeGatherMap.hpp
Go to the documentation of this file.
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef __Tpetra_ComputeGatherMap_hpp
43#define __Tpetra_ComputeGatherMap_hpp
44
49#include "Tpetra_Map.hpp"
50#include <numeric>
51
52// Macro that marks a function as "possibly unused," in order to
53// suppress build warnings.
54#if ! defined(TRILINOS_UNUSED_FUNCTION)
55# if defined(__GNUC__) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
56# define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
57# elif defined(__clang__)
58# if __has_attribute(unused)
59# define TRILINOS_UNUSED_FUNCTION __attribute__((__unused__))
60# else
61# define TRILINOS_UNUSED_FUNCTION
62# endif // Clang has 'unused' attribute
63# elif defined(__IBMCPP__)
64// IBM's C++ compiler for Blue Gene/Q (V12.1) implements 'used' but not 'unused'.
65//
66// http://pic.dhe.ibm.com/infocenter/compbg/v121v141/index.jsp
67# define TRILINOS_UNUSED_FUNCTION
68# else // some other compiler
69# define TRILINOS_UNUSED_FUNCTION
70# endif
71#endif // ! defined(TRILINOS_UNUSED_FUNCTION)
72
73
74namespace Tpetra {
75 namespace Details {
76
77 namespace {
78#ifdef HAVE_MPI
79 // We're communicating integers of type IntType. Figure
80 // out the right MPI_Datatype for IntType. Usually it
81 // is int or long, so these are good enough for now.
82 template<class IntType> TRILINOS_UNUSED_FUNCTION MPI_Datatype
83 getMpiDatatype () {
84 TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
85 "Not implemented for IntType != int, long, or long long");
86 }
87
88 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
89 getMpiDatatype<int> () { return MPI_INT; }
90
91 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
92 getMpiDatatype<long> () { return MPI_LONG; }
93
94 template<> TRILINOS_UNUSED_FUNCTION MPI_Datatype
95 getMpiDatatype<long long> () { return MPI_LONG_LONG; }
96#endif // HAVE_MPI
97
98 template<class IntType>
99 void
100 gather (const IntType sendbuf[], const int sendcnt,
101 IntType recvbuf[], const int recvcnt,
102 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
103 {
104 using Teuchos::RCP;
105 using Teuchos::rcp_dynamic_cast;
106#ifdef HAVE_MPI
107 using Teuchos::MpiComm;
108
109 // Get the raw MPI communicator, so we don't have to wrap MPI_Gather in Teuchos.
110 RCP<const MpiComm<int> > mpiComm = rcp_dynamic_cast<const MpiComm<int> > (comm);
111 if (! mpiComm.is_null ()) {
112 MPI_Datatype sendtype = getMpiDatatype<IntType> ();
113 MPI_Datatype recvtype = sendtype;
114 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
115 const int err = MPI_Gather (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
116 sendcnt,
117 sendtype,
118 reinterpret_cast<void*> (recvbuf),
119 recvcnt,
120 recvtype,
121 root,
122 rawMpiComm);
123 TEUCHOS_TEST_FOR_EXCEPTION(
124 err != MPI_SUCCESS, std::runtime_error, "MPI_Gather failed");
125 return;
126 }
127#endif // HAVE_MPI
128
129 TEUCHOS_TEST_FOR_EXCEPTION(
130 recvcnt > sendcnt, std::invalid_argument,
131 "gather: If the input communicator contains only one process, "
132 "then you cannot receive more entries than you send. "
133 "You aim to receive " << recvcnt << " entries, "
134 "but to send " << sendcnt << ".");
135 // Serial communicator case: just copy. recvcnt is the
136 // amount to receive, so it's the amount to copy.
137 std::copy (sendbuf, sendbuf + recvcnt, recvbuf);
138 }
139
140 template<class IntType>
141 void
142 gatherv (const IntType sendbuf[], const int sendcnt,
143 IntType recvbuf[], const int recvcnts[], const int displs[],
144 const int root, const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
145 {
146 using Teuchos::RCP;
147 using Teuchos::rcp_dynamic_cast;
148#ifdef HAVE_MPI
149 using Teuchos::MpiComm;
150
151 // Get the raw MPI communicator, so we don't have to wrap
152 // MPI_Gather in Teuchos.
153 RCP<const MpiComm<int> > mpiComm =
154 rcp_dynamic_cast<const MpiComm<int> > (comm);
155 if (! mpiComm.is_null ()) {
156 MPI_Datatype sendtype = getMpiDatatype<IntType> ();
157 MPI_Datatype recvtype = sendtype;
158 MPI_Comm rawMpiComm = * (mpiComm->getRawMpiComm ());
159 const int err = MPI_Gatherv (reinterpret_cast<void*> (const_cast<IntType*> (sendbuf)),
160 sendcnt,
161 sendtype,
162 reinterpret_cast<void*> (recvbuf),
163 const_cast<int*> (recvcnts),
164 const_cast<int*> (displs),
165 recvtype,
166 root,
167 rawMpiComm);
168 TEUCHOS_TEST_FOR_EXCEPTION(
169 err != MPI_SUCCESS, std::runtime_error, "MPI_Gatherv failed");
170 return;
171 }
172#endif // HAVE_MPI
173 TEUCHOS_TEST_FOR_EXCEPTION(
174 recvcnts[0] > sendcnt,
175 std::invalid_argument,
176 "gatherv: If the input communicator contains only one process, "
177 "then you cannot receive more entries than you send. "
178 "You aim to receive " << recvcnts[0] << " entries, "
179 "but to send " << sendcnt << ".");
180 // Serial communicator case: just copy. recvcnts[0] is the
181 // amount to receive, so it's the amount to copy. Start
182 // writing to recvbuf at the offset displs[0].
183 std::copy (sendbuf, sendbuf + recvcnts[0], recvbuf + displs[0]);
184 }
185 } // namespace (anonymous)
186
187
188 // Given an arbitrary Map, compute a Map containing all the GIDs
189 // in the same order as in (the one-to-one version of) map, but
190 // all owned exclusively by Proc 0.
191 template<class MapType>
192 Teuchos::RCP<const MapType>
193 computeGatherMap (Teuchos::RCP<const MapType> map,
194 const Teuchos::RCP<Teuchos::FancyOStream>& err,
195 const bool dbg=false)
196 {
199 using Teuchos::Array;
200 using Teuchos::ArrayRCP;
201 using Teuchos::ArrayView;
202 using Teuchos::as;
203 using Teuchos::Comm;
204 using Teuchos::RCP;
205 using std::endl;
206 typedef typename MapType::local_ordinal_type LO;
207 typedef typename MapType::global_ordinal_type GO;
208 typedef typename MapType::node_type NT;
209
210 RCP<const Comm<int> > comm = map->getComm ();
211 const int numProcs = comm->getSize ();
212 const int myRank = comm->getRank ();
213
214 if (! err.is_null ()) {
215 err->pushTab ();
216 }
217 if (dbg) {
218 *err << myRank << ": computeGatherMap:" << endl;
219 }
220 if (! err.is_null ()) {
221 err->pushTab ();
222 }
223
224 RCP<const MapType> oneToOneMap;
225 if (map->isContiguous ()) {
226 oneToOneMap = map; // contiguous Maps are always 1-to-1
227 } else {
228 if (dbg) {
229 *err << myRank << ": computeGatherMap: Calling createOneToOne" << endl;
230 }
231 // It could be that Map is one-to-one, but the class doesn't
232 // give us a way to test this, other than to create the
233 // one-to-one Map.
234 oneToOneMap = createOneToOne<LO, GO, NT> (map);
235 }
236
237 RCP<const MapType> gatherMap;
238 if (numProcs == 1) {
239 gatherMap = oneToOneMap;
240 } else {
241 if (dbg) {
242 *err << myRank << ": computeGatherMap: Gathering Map counts" << endl;
243 }
244 // Gather each process' count of Map elements to Proc 0,
245 // into the recvCounts array. This will tell Proc 0 how
246 // many GIDs to expect from each process when calling
247 // MPI_Gatherv. Counts and offsets are all int, because
248 // that's what MPI uses. Teuchos::as will at least prevent
249 // bad casts to int in a debug build.
250 //
251 // Yeah, it's not memory scalable. Guess what: We're going
252 // to bring ALL the data to Proc 0, not just the receive
253 // counts. The first MPI_Gather is only the beginning...
254 // Seriously, if you want to make this memory scalable, the
255 // right thing to do (after the Export to the one-to-one
256 // Map) is to go round robin through the processes, having
257 // each send a chunk of data (with its GIDs, to get the
258 // order of rows right) at a time, and overlapping writing
259 // to the file (resp. reading from it) with receiving (resp.
260 // sending) the next chunk.
261 const int myEltCount = as<int> (oneToOneMap->getLocalNumElements ());
262 Array<int> recvCounts (numProcs);
263 const int rootProc = 0;
264 gather<int> (&myEltCount, 1, recvCounts.getRawPtr (), 1, rootProc, comm);
265
266 ArrayView<const GO> myGlobalElts = oneToOneMap->getLocalElementList ();
267 const int numMyGlobalElts = as<int> (myGlobalElts.size ());
268 // Only Proc 0 needs to receive and store all the GIDs (from
269 // all processes).
270 ArrayRCP<GO> allGlobalElts;
271 if (myRank == 0) {
272 allGlobalElts = Teuchos::arcp<GO> (oneToOneMap->getGlobalNumElements ());
273 std::fill (allGlobalElts.begin (), allGlobalElts.end (), 0);
274 }
275
276 if (dbg) {
277 *err << myRank << ": computeGatherMap: Computing MPI_Gatherv "
278 "displacements" << endl;
279 }
280 // Displacements for gatherv() in this case (where we are
281 // gathering into a contiguous array) are an exclusive
282 // partial sum (first entry is zero, second starts the
283 // partial sum) of recvCounts.
284 Array<int> displs (numProcs, 0);
285 std::partial_sum (recvCounts.begin (), recvCounts.end () - 1,
286 displs.begin () + 1);
287 if (dbg) {
288 *err << myRank << ": computeGatherMap: Calling MPI_Gatherv" << endl;
289 }
290 gatherv<GO> (myGlobalElts.getRawPtr (), numMyGlobalElts,
291 allGlobalElts.getRawPtr (), recvCounts.getRawPtr (),
292 displs.getRawPtr (), rootProc, comm);
293
294 if (dbg) {
295 *err << myRank << ": computeGatherMap: Creating gather Map" << endl;
296 }
297 // Create a Map with all the GIDs, in the same order as in
298 // the one-to-one Map, but owned by Proc 0.
299 ArrayView<const GO> allElts (NULL, 0);
300 if (myRank == 0) {
301 allElts = allGlobalElts ();
302 }
303 const global_size_t INVALID = Teuchos::OrdinalTraits<global_size_t>::invalid ();
304 gatherMap = rcp (new MapType (INVALID, allElts,
305 oneToOneMap->getIndexBase (),
306 comm));
307 }
308 if (! err.is_null ()) {
309 err->popTab ();
310 }
311 if (dbg) {
312 *err << myRank << ": computeGatherMap: done" << endl;
313 }
314 if (! err.is_null ()) {
315 err->popTab ();
316 }
317 return gatherMap;
318 }
319
320 } // namespace Details
321} // namespace Tpetra
322
323#endif // __Tpetra_ComputeGatherMap_hpp
Implementation details of Tpetra.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...