Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
TpetraExt_MatrixMatrix_OpenMP.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
43#define TPETRA_MATRIXMATRIX_OPENMP_DEF_HPP
44
45#ifdef HAVE_TPETRA_INST_OPENMP
46namespace Tpetra {
47namespace MMdetails {
48
49/*********************************************************************************************************/
50// MMM KernelWrappers for Partial Specialization to OpenMP
51template<class Scalar,
52 class LocalOrdinal,
53 class GlobalOrdinal, class LocalOrdinalViewType>
54struct KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
55 static inline void mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
56 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
57 const LocalOrdinalViewType & Acol2Brow,
58 const LocalOrdinalViewType & Acol2Irow,
59 const LocalOrdinalViewType & Bcol2Ccol,
60 const LocalOrdinalViewType & Icol2Ccol,
61 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
62 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
63 const std::string& label = std::string(),
64 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
65
66 static inline void mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
67 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
68 const LocalOrdinalViewType & Acol2Brow,
69 const LocalOrdinalViewType & Acol2Irow,
70 const LocalOrdinalViewType & Bcol2Ccol,
71 const LocalOrdinalViewType & Icol2Ccol,
72 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
73 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
74 const std::string& label = std::string(),
75 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
76
77
78};
79
80
81// Jacobi KernelWrappers for Partial Specialization to OpenMP
82template<class Scalar,
83 class LocalOrdinal,
84 class GlobalOrdinal, class LocalOrdinalViewType>
85struct KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
86 static inline void jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
87 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
88 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
89 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
90 const LocalOrdinalViewType & Acol2Brow,
91 const LocalOrdinalViewType & Acol2Irow,
92 const LocalOrdinalViewType & Bcol2Ccol,
93 const LocalOrdinalViewType & Icol2Ccol,
94 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
95 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
96 const std::string& label = std::string(),
97 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
98
99 static inline void jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
100 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
101 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
102 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
103 const LocalOrdinalViewType & Acol2Brow,
104 const LocalOrdinalViewType & Acol2Irow,
105 const LocalOrdinalViewType & Bcol2Ccol,
106 const LocalOrdinalViewType & Icol2Ccol,
107 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
108 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
109 const std::string& label = std::string(),
110 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
111
112 static inline void jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
113 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
114 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
115 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
116 const LocalOrdinalViewType & Acol2Brow,
117 const LocalOrdinalViewType & Acol2Irow,
118 const LocalOrdinalViewType & Bcol2Ccol,
119 const LocalOrdinalViewType & Icol2Ccol,
120 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
121 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
122 const std::string& label = std::string(),
123 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
124
125};
126
127
128// Triple-Product KernelWrappers for Partial Specialization to OpenMP
129template<class Scalar,
130 class LocalOrdinal,
131 class GlobalOrdinal, class LocalOrdinalViewType>
132struct KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType> {
133 static inline void mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
134 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
135 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
136 const LocalOrdinalViewType & Acol2Prow,
137 const LocalOrdinalViewType & Acol2PIrow,
138 const LocalOrdinalViewType & Pcol2Ccol,
139 const LocalOrdinalViewType & PIcol2Ccol,
140 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
141 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
142 const std::string& label = std::string(),
143 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
144
145static inline void mult_R_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
146 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
147 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
148 const LocalOrdinalViewType & Acol2Prow,
149 const LocalOrdinalViewType & Acol2PIrow,
150 const LocalOrdinalViewType & Pcol2Ccol,
151 const LocalOrdinalViewType & PIcol2Ccol,
152 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
153 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
154 const std::string& label = std::string(),
155 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
156
157 static inline void mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
158 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
159 const LocalOrdinalViewType & Acol2Prow,
160 const LocalOrdinalViewType & Acol2PIrow,
161 const LocalOrdinalViewType & Pcol2Ccol,
162 const LocalOrdinalViewType & PIcol2Ccol,
163 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
164 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
165 const std::string& label = std::string(),
166 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
167
168 static inline void mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
169 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
170 const LocalOrdinalViewType & Acol2Prow,
171 const LocalOrdinalViewType & Acol2PIrow,
172 const LocalOrdinalViewType & Pcol2Ccol,
173 const LocalOrdinalViewType & PIcol2Ccol,
174 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
175 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
176 const std::string& label = std::string(),
177 const Teuchos::RCP<Teuchos::ParameterList>& params = Teuchos::null);
178};
179
180
181/*********************************************************************************************************/
182template<class Scalar,
183 class LocalOrdinal,
184 class GlobalOrdinal,
185 class LocalOrdinalViewType>
186void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
187 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
188 const LocalOrdinalViewType & Acol2Brow,
189 const LocalOrdinalViewType & Acol2Irow,
190 const LocalOrdinalViewType & Bcol2Ccol,
191 const LocalOrdinalViewType & Icol2Ccol,
192 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
193 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
194 const std::string& label,
195 const Teuchos::RCP<Teuchos::ParameterList>& params) {
196
197#ifdef HAVE_TPETRA_MMM_TIMINGS
198 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
199 using Teuchos::TimeMonitor;
200 Teuchos::RCP<TimeMonitor> MM;
201#endif
202
203 // Node-specific code
204 std::string nodename("OpenMP");
205
206 // Lots and lots of typedefs
207 using Teuchos::RCP;
209 typedef typename KCRS::device_type device_t;
210 typedef typename KCRS::StaticCrsGraphType graph_t;
211 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
212 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
213 typedef typename KCRS::values_type::non_const_type scalar_view_t;
214
215 // Options
216 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
217 std::string myalg("SPGEMM_KK_MEMORY");
218
219
220 if(!params.is_null()) {
221 if(params->isParameter("openmp: algorithm"))
222 myalg = params->get("openmp: algorithm",myalg);
223 if(params->isParameter("openmp: team work size"))
224 team_work_size = params->get("openmp: team work size",team_work_size);
225 }
226
227 if(myalg == "LTG") {
228 // Use the LTG kernel if requested
229 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_newmatrix_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
230 }
231 else {
232 // Use the Kokkos-Kernels OpenMP Kernel
233#ifdef HAVE_TPETRA_MMM_TIMINGS
234 MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPWrapper"))));
235#endif
236 // KokkosKernelsHandle
237 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
238 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
239 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space > KernelHandle;
240
241 // Grab the Kokkos::SparseCrsMatrices
242 const KCRS & Ak = Aview.origMatrix->getLocalMatrixDevice();
243 // const KCRS & Bk = Bview.origMatrix->getLocalMatrixDevice();
244
245 // Get the algorithm mode
246 std::string alg = nodename+std::string(" algorithm");
247 // printf("DEBUG: Using kernel: %s\n",myalg.c_str());
248 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
249 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
250
251 // Merge the B and Bimport matrices
252 const KCRS Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
253
254#ifdef HAVE_TPETRA_MMM_TIMINGS
255 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPCore"))));
256#endif
257
258 // Do the multiply on whatever we've got
259 typename KernelHandle::nnz_lno_t AnumRows = Ak.numRows();
260 // typename KernelHandle::nnz_lno_t BnumRows = Bmerged->numRows();
261 // typename KernelHandle::nnz_lno_t BnumCols = Bmerged->numCols();
262 typename KernelHandle::nnz_lno_t BnumRows = Bmerged.numRows();
263 typename KernelHandle::nnz_lno_t BnumCols = Bmerged.numCols();
264
265
266 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
267 lno_nnz_view_t entriesC;
268 scalar_view_t valuesC;
269 KernelHandle kh;
270 kh.create_spgemm_handle(alg_enum);
271 kh.set_team_work_size(team_work_size);
272 // KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged->graph.row_map,Bmerged->graph.entries,false,row_mapC);
273 KokkosSparse::Experimental::spgemm_symbolic(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,false,Bmerged.graph.row_map,Bmerged.graph.entries,false,row_mapC);
274
275 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
276 if (c_nnz_size){
277 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
278 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
279 }
280 // KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged->graph.row_map,Bmerged->graph.entries,Bmerged->values,false,row_mapC,entriesC,valuesC);
281 KokkosSparse::Experimental::spgemm_numeric(&kh,AnumRows,BnumRows,BnumCols,Ak.graph.row_map,Ak.graph.entries,Ak.values,false,Bmerged.graph.row_map,Bmerged.graph.entries,Bmerged.values,false,row_mapC,entriesC,valuesC);
282 kh.destroy_spgemm_handle();
283
284#ifdef HAVE_TPETRA_MMM_TIMINGS
285 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPSort"))));
286#endif
287 // Sort & set values
288 if (params.is_null() || params->get("sort entries",true))
289 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
290 C.setAllValues(row_mapC,entriesC,valuesC);
291
292 }// end OMP KokkosKernels loop
293
294#ifdef HAVE_TPETRA_MMM_TIMINGS
295 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Newmatrix OpenMPESFC"))));
296#endif
297
298 // Final Fillcomplete
299 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
300 labelList->set("Timer Label",label);
301 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
302 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
303 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
304
305#if 0
306 {
307 Teuchos::ArrayRCP< const size_t > Crowptr;
308 Teuchos::ArrayRCP< const LocalOrdinal > Ccolind;
309 Teuchos::ArrayRCP< const Scalar > Cvalues;
310 C.getAllValues(Crowptr,Ccolind,Cvalues);
311
312 // DEBUG
313 int MyPID = C->getComm()->getRank();
314 printf("[%d] Crowptr = ",MyPID);
315 for(size_t i=0; i<(size_t) Crowptr.size(); i++) {
316 printf("%3d ",(int)Crowptr.getConst()[i]);
317 }
318 printf("\n");
319 printf("[%d] Ccolind = ",MyPID);
320 for(size_t i=0; i<(size_t)Ccolind.size(); i++) {
321 printf("%3d ",(int)Ccolind.getConst()[i]);
322 }
323 printf("\n");
324 fflush(stdout);
325 // END DEBUG
326 }
327#endif
328}
329
330
331/*********************************************************************************************************/
332template<class Scalar,
333 class LocalOrdinal,
334 class GlobalOrdinal,
335 class LocalOrdinalViewType>
336void KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_A_B_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
337 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
338 const LocalOrdinalViewType & Acol2Brow,
339 const LocalOrdinalViewType & Acol2Irow,
340 const LocalOrdinalViewType & Bcol2Ccol,
341 const LocalOrdinalViewType & Icol2Ccol,
342 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
343 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
344 const std::string& label,
345 const Teuchos::RCP<Teuchos::ParameterList>& params) {
346#ifdef HAVE_TPETRA_MMM_TIMINGS
347 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
348 using Teuchos::TimeMonitor;
349 Teuchos::RCP<TimeMonitor> MM;
350#endif
351
352 // Lots and lots of typedefs
353 using Teuchos::RCP;
354
355 // Options
356 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
357 std::string myalg("LTG");
358 if(!params.is_null()) {
359 if(params->isParameter("openmp: algorithm"))
360 myalg = params->get("openmp: algorithm",myalg);
361 if(params->isParameter("openmp: team work size"))
362 team_work_size = params->get("openmp: team work size",team_work_size);
363 }
364
365 if(myalg == "LTG") {
366 // Use the LTG kernel if requested
367 ::Tpetra::MatrixMatrix::ExtraKernels::mult_A_B_reuse_LowThreadGustavsonKernel(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
368 }
369 else {
370 throw std::runtime_error("Tpetra::MatrixMatrix::MMM reuse unknown kernel");
371 }
372
373#ifdef HAVE_TPETRA_MMM_TIMINGS
374 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("MMM Reuse OpenMPESFC"))));
375#endif
376 C.fillComplete(C.getDomainMap(), C.getRangeMap());
377}
378
379
380/*********************************************************************************************************/
381template<class Scalar,
382 class LocalOrdinal,
383 class GlobalOrdinal,
384 class LocalOrdinalViewType>
385void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
386 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
387 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
388 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
389 const LocalOrdinalViewType & Acol2Brow,
390 const LocalOrdinalViewType & Acol2Irow,
391 const LocalOrdinalViewType & Bcol2Ccol,
392 const LocalOrdinalViewType & Icol2Ccol,
393 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
394 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
395 const std::string& label,
396 const Teuchos::RCP<Teuchos::ParameterList>& params) {
397
398#ifdef HAVE_TPETRA_MMM_TIMINGS
399 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
400 using Teuchos::TimeMonitor;
401 Teuchos::RCP<TimeMonitor> MM;
402#endif
403
404 // Node-specific code
405 using Teuchos::RCP;
406
407 // Options
408 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
409 std::string myalg("LTG");
410 if(!params.is_null()) {
411 if(params->isParameter("openmp: jacobi algorithm"))
412 myalg = params->get("openmp: jacobi algorithm",myalg);
413 if(params->isParameter("openmp: team work size"))
414 team_work_size = params->get("openmp: team work size",team_work_size);
415 }
416
417 if(myalg == "LTG") {
418 // Use the LTG kernel if requested
419 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
420 }
421 else if(myalg == "MSAK") {
422 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_newmatrix_MultiplyScaleAddKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
423 }
424 else if(myalg == "KK") {
425 jacobi_A_B_newmatrix_KokkosKernels(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
426 }
427 else {
428 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi newmatrix unknown kernel");
429 }
430
431#ifdef HAVE_TPETRA_MMM_TIMINGS
432 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
433#endif
434
435 // Final Fillcomplete
436 RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
437 labelList->set("Timer Label",label);
438 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
439
440 // NOTE: MSAK already fillCompletes, so we have to check here
441 if(!C.isFillComplete()) {
442 RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
443 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
444 }
445
446}
447
448
449
450/*********************************************************************************************************/
451template<class Scalar,
452 class LocalOrdinal,
453 class GlobalOrdinal,
454 class LocalOrdinalViewType>
455void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
456 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
457 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
458 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
459 const LocalOrdinalViewType & Acol2Brow,
460 const LocalOrdinalViewType & Acol2Irow,
461 const LocalOrdinalViewType & Bcol2Ccol,
462 const LocalOrdinalViewType & Icol2Ccol,
463 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
464 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
465 const std::string& label,
466 const Teuchos::RCP<Teuchos::ParameterList>& params) {
467
468#ifdef HAVE_TPETRA_MMM_TIMINGS
469 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
470 using Teuchos::TimeMonitor;
471 Teuchos::RCP<TimeMonitor> MM;
472#endif
473
474 // Lots and lots of typedefs
475 using Teuchos::RCP;
476
477 // Options
478 int team_work_size = 16; // Defaults to 16 as per Deveci 12/7/16 - csiefer
479 std::string myalg("LTG");
480 if(!params.is_null()) {
481 if(params->isParameter("openmp: jacobi algorithm"))
482 myalg = params->get("openmp: jacobi algorithm",myalg);
483 if(params->isParameter("openmp: team work size"))
484 team_work_size = params->get("openmp: team work size",team_work_size);
485 }
486
487 if(myalg == "LTG") {
488 // Use the LTG kernel if requested
489 ::Tpetra::MatrixMatrix::ExtraKernels::jacobi_A_B_reuse_LowThreadGustavsonKernel(omega,Dinv,Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
490 }
491 else {
492 throw std::runtime_error("Tpetra::MatrixMatrix::Jacobi reuse unknown kernel");
493 }
494
495#ifdef HAVE_TPETRA_MMM_TIMINGS
496 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Reuse OpenMPESFC"))));
497#endif
498 C.fillComplete(C.getDomainMap(), C.getRangeMap());
499
500}
501
502/*********************************************************************************************************/
503template<class Scalar,
504 class LocalOrdinal,
505 class GlobalOrdinal,
506 class LocalOrdinalViewType>
507void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::jacobi_A_B_newmatrix_KokkosKernels(Scalar omega,
508 const Vector<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> & Dinv,
509 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
510 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Bview,
511 const LocalOrdinalViewType & Acol2Brow,
512 const LocalOrdinalViewType & Acol2Irow,
513 const LocalOrdinalViewType & Bcol2Ccol,
514 const LocalOrdinalViewType & Icol2Ccol,
515 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
516 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
517 const std::string& label,
518 const Teuchos::RCP<Teuchos::ParameterList>& params) {
519
520#ifdef HAVE_TPETRA_MMM_TIMINGS
521 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
522 using Teuchos::TimeMonitor;
523 Teuchos::RCP<TimeMonitor> MM;
524#endif
525
526 // Check if the diagonal entries exist in debug mode
527 const bool debug = Tpetra::Details::Behavior::debug();
528 if(debug) {
529
530 auto rowMap = Aview.origMatrix->getRowMap();
532 Aview.origMatrix->getLocalDiagCopy(diags);
533 size_t diagLength = rowMap->getLocalNumElements();
534 Teuchos::Array<Scalar> diagonal(diagLength);
535 diags.get1dCopy(diagonal());
536
537 for(size_t i = 0; i < diagLength; ++i) {
538 TEUCHOS_TEST_FOR_EXCEPTION(diagonal[i] == Teuchos::ScalarTraits<Scalar>::zero(),
539 std::runtime_error,
540 "Matrix A has a zero/missing diagonal: " << diagonal[i] << std::endl <<
541 "KokkosKernels Jacobi-fused SpGEMM requires nonzero diagonal entries in A" << std::endl);
542 }
543 }
544
545 // Usings
546 using device_t = typename Kokkos::Compat::KokkosOpenMPWrapperNode::device_type;
548 using graph_t = typename matrix_t::StaticCrsGraphType;
549 using lno_view_t = typename graph_t::row_map_type::non_const_type;
550 using c_lno_view_t = typename graph_t::row_map_type::const_type;
551 using lno_nnz_view_t = typename graph_t::entries_type::non_const_type;
552 using scalar_view_t = typename matrix_t::values_type::non_const_type;
553
554 // KokkosKernels handle
555 using handle_t = typename KokkosKernels::Experimental::KokkosKernelsHandle<
556 typename lno_view_t::const_value_type,typename lno_nnz_view_t::const_value_type, typename scalar_view_t::const_value_type,
557 typename device_t::execution_space, typename device_t::memory_space,typename device_t::memory_space >;
558
559 // Get the rowPtr, colInd and vals of importMatrix
560 c_lno_view_t Irowptr;
561 lno_nnz_view_t Icolind;
562 scalar_view_t Ivals;
563 if(!Bview.importMatrix.is_null()) {
564 auto lclB = Bview.importMatrix->getLocalMatrixDevice();
565 Irowptr = lclB.graph.row_map;
566 Icolind = lclB.graph.entries;
567 Ivals = lclB.values;
568 }
569
570 // Merge the B and Bimport matrices
571 const matrix_t Bmerged = Tpetra::MMdetails::merge_matrices(Aview,Bview,Acol2Brow,Acol2Irow,Bcol2Ccol,Icol2Ccol,C.getColMap()->getLocalNumElements());
572
573 // Get the properties and arrays of input matrices
574 const matrix_t & Amat = Aview.origMatrix->getLocalMatrixDevice();
575 const matrix_t & Bmat = Bview.origMatrix->getLocalMatrixDevice();
576
577 typename handle_t::nnz_lno_t AnumRows = Amat.numRows();
578 typename handle_t::nnz_lno_t BnumRows = Bmerged.numRows();
579 typename handle_t::nnz_lno_t BnumCols = Bmerged.numCols();
580
581 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmerged.graph.row_map;
582 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmerged.graph.entries;
583 const scalar_view_t Avals = Amat.values, Bvals = Bmerged.values;
584
585 // Arrays of the output matrix
586 lno_view_t row_mapC (Kokkos::ViewAllocateWithoutInitializing("non_const_lnow_row"), AnumRows + 1);
587 lno_nnz_view_t entriesC;
588 scalar_view_t valuesC;
589
590 // Options
591 int team_work_size = 16;
592 std::string myalg("SPGEMM_KK_MEMORY");
593 if(!params.is_null()) {
594 if(params->isParameter("cuda: algorithm"))
595 myalg = params->get("cuda: algorithm",myalg);
596 if(params->isParameter("cuda: team work size"))
597 team_work_size = params->get("cuda: team work size",team_work_size);
598 }
599
600 // Get the algorithm mode
601 std::string nodename("OpenMP");
602 std::string alg = nodename + std::string(" algorithm");
603 if(!params.is_null() && params->isParameter(alg)) myalg = params->get(alg,myalg);
604 KokkosSparse::SPGEMMAlgorithm alg_enum = KokkosSparse::StringToSPGEMMAlgorithm(myalg);
605
606
607 // KokkosKernels call
608 handle_t kh;
609 kh.create_spgemm_handle(alg_enum);
610 kh.set_team_work_size(team_work_size);
611
612 KokkosSparse::Experimental::spgemm_symbolic(&kh, AnumRows, BnumRows, BnumCols,
613 Arowptr, Acolind, false,
614 Browptr, Bcolind, false,
615 row_mapC);
616
617 size_t c_nnz_size = kh.get_spgemm_handle()->get_c_nnz();
618 if (c_nnz_size){
619 entriesC = lno_nnz_view_t (Kokkos::ViewAllocateWithoutInitializing("entriesC"), c_nnz_size);
620 valuesC = scalar_view_t (Kokkos::ViewAllocateWithoutInitializing("valuesC"), c_nnz_size);
621 }
622
623 KokkosSparse::Experimental::spgemm_jacobi(&kh, AnumRows, BnumRows, BnumCols,
624 Arowptr, Acolind, Avals, false,
625 Browptr, Bcolind, Bvals, false,
626 row_mapC, entriesC, valuesC,
627 omega, Dinv.getLocalViewDevice(Tpetra::Access::ReadOnly));
628 kh.destroy_spgemm_handle();
629
630#ifdef HAVE_TPETRA_MMM_TIMINGS
631 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPSort"))));
632#endif
633
634 // Sort & set values
635 if (params.is_null() || params->get("sort entries",true))
636 Import_Util::sortCrsEntries(row_mapC, entriesC, valuesC);
637 C.setAllValues(row_mapC,entriesC,valuesC);
638
639#ifdef HAVE_TPETRA_MMM_TIMINGS
640 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("Jacobi Newmatrix OpenMPESFC"))));
641#endif
642
643 // Final Fillcomplete
644 Teuchos::RCP<Teuchos::ParameterList> labelList = rcp(new Teuchos::ParameterList);
645 labelList->set("Timer Label",label);
646 if(!params.is_null()) labelList->set("compute global constants",params->get("compute global constants",true));
647 Teuchos::RCP<const Export<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > dummyExport;
648 C.expertStaticFillComplete(Bview.origMatrix->getDomainMap(), Aview.origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
649}
650
651
652/*********************************************************************************************************/
653template<class Scalar,
654 class LocalOrdinal,
655 class GlobalOrdinal,
656 class LocalOrdinalViewType>
657void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
658 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
659 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
660 const LocalOrdinalViewType & Acol2Prow,
661 const LocalOrdinalViewType & Acol2PIrow,
662 const LocalOrdinalViewType & Pcol2Accol,
663 const LocalOrdinalViewType & PIcol2Accol,
664 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
665 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
666 const std::string& label,
667 const Teuchos::RCP<Teuchos::ParameterList>& params) {
668
669
670
671#ifdef HAVE_TPETRA_MMM_TIMINGS
672 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
673 using Teuchos::TimeMonitor;
674 Teuchos::RCP<TimeMonitor> MM;
675#endif
676
677 // Node-specific code
678 std::string nodename("OpenMP");
679
680 // Options
681 std::string myalg("LTG");
682
683 if(!params.is_null()) {
684 if(params->isParameter("openmp: rap algorithm"))
685 myalg = params->get("openmp: rap algorithm",myalg);
686 }
687
688 if(myalg == "LTG") {
689 // Use the LTG kernel if requested
690 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2PIrow,Pcol2Accol,PIcol2Accol,Ac,Acimport,label,params);
691 }
692 else {
693 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
694 }
695}
696
697/*********************************************************************************************************/
698template<class Scalar,
699 class LocalOrdinal,
700 class GlobalOrdinal,
701 class LocalOrdinalViewType>
702void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_R_A_P_reuse_kernel_wrapper(
703 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Rview,
704 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
705 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
706
707 const LocalOrdinalViewType & Acol2Prow,
708 const LocalOrdinalViewType & Acol2Irow,
709 const LocalOrdinalViewType & Pcol2Ccol,
710 const LocalOrdinalViewType & Icol2Ccol,
711 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& C,
712 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Cimport,
713 const std::string& label,
714 const Teuchos::RCP<Teuchos::ParameterList>& params) {
715
716#ifdef HAVE_TPETRA_MMM_TIMINGS
717 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
718 using Teuchos::TimeMonitor;
719 Teuchos::RCP<TimeMonitor> MM;
720#endif
721
722 // Lots and lots of typedefs
723 using Teuchos::RCP;
724
725 // Options
726 std::string myalg("LTG");
727 if(!params.is_null()) {
728 if(params->isParameter("openmp: rap algorithm"))
729 myalg = params->get("openmp: rap algorithm",myalg);
730 }
731
732 if(myalg == "LTG") {
733 // Use the LTG kernel if requested
734 ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel(Rview,Aview,Pview,Acol2Prow,Acol2Irow,Pcol2Ccol,Icol2Ccol,C,Cimport,label,params);
735 }
736 else {
737 throw std::runtime_error("Tpetra::MatrixMatrix::R_A_P newmatrix unknown kernel");
738 }
739
740#ifdef HAVE_TPETRA_MMM_TIMINGS
741 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("RAP Reuse OpenMPESFC"))));
742#endif
743 C.fillComplete(C.getDomainMap(), C.getRangeMap());
744
745}
746
747
748
749/*********************************************************************************************************/
750template<class Scalar,
751 class LocalOrdinal,
752 class GlobalOrdinal,
753 class LocalOrdinalViewType>
754void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_newmatrix_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
755
756 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
757 const LocalOrdinalViewType & Acol2Prow,
758 const LocalOrdinalViewType & Acol2PIrow,
759 const LocalOrdinalViewType & Pcol2Accol,
760 const LocalOrdinalViewType & PIcol2Accol,
761 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
762 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
763 const std::string& label,
764 const Teuchos::RCP<Teuchos::ParameterList>& params) {
765
766
767#ifdef HAVE_TPETRA_MMM_TIMINGS
768 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
769 using Teuchos::TimeMonitor;
770 Teuchos::RCP<TimeMonitor> MM;
771#endif
772
773 // Node-specific code
774 std::string nodename("OpenMP");
775
776 // Options
777 std::string myalg("LTG");
778
779 if(!params.is_null()) {
780 if(params->isParameter("openmp: ptap algorithm"))
781 myalg = params->get("openmp: ptap algorithm",myalg);
782 }
783
784 if(myalg == "LTG") {
785#ifdef HAVE_TPETRA_MMM_TIMINGS
786 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
787#endif
788
789 using Teuchos::ParameterList;
790 using Teuchos::RCP;
791 using LO = LocalOrdinal;
792 using GO = GlobalOrdinal;
793 using SC = Scalar;
794
795 // We don't need a kernel-level PTAP, we just transpose here
796 using transposer_type =
797 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
798 transposer_type transposer (Pview.origMatrix, label + "XP: ");
799 RCP<ParameterList> transposeParams (new ParameterList);
800 if (! params.is_null ()) {
801 transposeParams->set ("compute global constants",
802 params->get ("compute global constants: temporaries",
803 false));
804 }
805 transposeParams->set ("sort", false);
806 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
807 transposer.createTransposeLocal (transposeParams);
808 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
809 Rview.origMatrix = Ptrans;
810
811 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_newmatrix_LowThreadGustavsonKernel;
812 mult_R_A_P_newmatrix_LowThreadGustavsonKernel
813 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
814 PIcol2Accol, Ac, Acimport, label, params);
815 }
816 else {
817 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P newmatrix unknown kernel");
818 }
819}
820
821/*********************************************************************************************************/
822template<class Scalar,
823 class LocalOrdinal,
824 class GlobalOrdinal,
825 class LocalOrdinalViewType>
826void KernelWrappers3<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode,LocalOrdinalViewType>::mult_PT_A_P_reuse_kernel_wrapper(CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Aview,
827
828 CrsMatrixStruct<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Pview,
829 const LocalOrdinalViewType & Acol2Prow,
830 const LocalOrdinalViewType & Acol2PIrow,
831 const LocalOrdinalViewType & Pcol2Accol,
832 const LocalOrdinalViewType & PIcol2Accol,
833 CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Kokkos::Compat::KokkosOpenMPWrapperNode>& Ac,
834 Teuchos::RCP<const Import<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosOpenMPWrapperNode> > Acimport,
835 const std::string& label,
836 const Teuchos::RCP<Teuchos::ParameterList>& params) {
837
838
839#ifdef HAVE_TPETRA_MMM_TIMINGS
840 std::string prefix_mmm = std::string("TpetraExt ") + label + std::string(": ");
841 using Teuchos::TimeMonitor;
842 Teuchos::RCP<TimeMonitor> MM;
843#endif
844
845 // Node-specific code
846 std::string nodename("OpenMP");
847
848 // Options
849 std::string myalg("LTG");
850
851 if(!params.is_null()) {
852 if(params->isParameter("openmp: ptap algorithm"))
853 myalg = params->get("openmp: ptap algorithm",myalg);
854 }
855
856 if(myalg == "LTG") {
857#ifdef HAVE_TPETRA_MMM_TIMINGS
858 MM = Teuchos::null; MM = rcp(new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string("PTAP local transpose"))));
859#endif
860
861 using Teuchos::ParameterList;
862 using Teuchos::RCP;
863 using LO = LocalOrdinal;
864 using GO = GlobalOrdinal;
865 using SC = Scalar;
866
867 // We don't need a kernel-level PTAP, we just transpose here
868 using transposer_type =
869 RowMatrixTransposer<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode>;
870 transposer_type transposer (Pview.origMatrix, label + "XP: ");
871 RCP<ParameterList> transposeParams (new ParameterList);
872 if (! params.is_null ()) {
873 transposeParams->set ("compute global constants",
874 params->get ("compute global constants: temporaries",
875 false));
876 }
877 transposeParams->set ("sort", false);
878 RCP<CrsMatrix<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> > Ptrans =
879 transposer.createTransposeLocal (transposeParams);
880 CrsMatrixStruct<SC, LO, GO, Kokkos::Compat::KokkosOpenMPWrapperNode> Rview;
881 Rview.origMatrix = Ptrans;
882
883 using ::Tpetra::MatrixMatrix::ExtraKernels::mult_R_A_P_reuse_LowThreadGustavsonKernel;
884 mult_R_A_P_reuse_LowThreadGustavsonKernel
885 (Rview, Aview, Pview, Acol2Prow, Acol2PIrow, Pcol2Accol,
886 PIcol2Accol, Ac, Acimport, label, params);
887 }
888 else {
889 throw std::runtime_error("Tpetra::MatrixMatrix::PT_A_P reuse unknown kernel");
890 }
891 Ac.fillComplete(Ac.getDomainMap(), Ac.getRangeMap());
892}
893
894
895}//MMdetails
896}//Tpetra
897
898#endif//OpenMP
899
900#endif
KokkosSparse::CrsMatrix< impl_scalar_type, local_ordinal_type, device_type, void, typename local_graph_device_type::size_type > local_matrix_device_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
static bool debug()
Whether Tpetra is in debug mode.
A distributed dense vector.
Namespace Tpetra contains the class and methods constituting the Tpetra library.