Tpetra parallel linear algebra  Version of the Day
Tpetra_Experimental_BlockMultiVector_def.hpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
43 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
44 
45 #include "Tpetra_Experimental_BlockMultiVector_decl.hpp"
46 
47 namespace { // anonymous
48 
60  template<class MultiVectorType>
61  struct RawHostPtrFromMultiVector {
62  typedef typename MultiVectorType::impl_scalar_type impl_scalar_type;
63 
64  static impl_scalar_type* getRawPtr (MultiVectorType& X) {
65  // NOTE (mfh 09 Jun 2016) This does NOT sync to host, or mark
66  // host as modified. This is on purpose, because we don't want
67  // the BlockMultiVector sync'd to host unnecessarily.
68  // Otherwise, all the MultiVector and BlockMultiVector kernels
69  // would run on host instead of device. See Github Issue #428.
70  auto X_view_host = X.template getLocalView<Kokkos::HostSpace> ();
71  impl_scalar_type* X_raw = X_view_host.data ();
72  return X_raw;
73  }
74  };
75 
88  template<class S, class LO, class GO, class N>
90  getRawHostPtrFromMultiVector (Tpetra::MultiVector<S, LO, GO, N>& X) {
92  return RawHostPtrFromMultiVector<MV>::getRawPtr (X);
93  }
94 
95 } // namespace (anonymous)
96 
97 namespace Tpetra {
98 namespace Experimental {
99 namespace Classes {
100 
101 template<class Scalar, class LO, class GO, class Node>
102 typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type
103 BlockMultiVector<Scalar, LO, GO, Node>::
104 getMultiVectorView () const
105 {
106  return mv_;
107 }
108 
109 template<class Scalar, class LO, class GO, class Node>
110 Teuchos::RCP<const BlockMultiVector<Scalar, LO, GO, Node> >
113 {
115  const BMV* src_bmv = dynamic_cast<const BMV*> (&src);
116  TEUCHOS_TEST_FOR_EXCEPTION(
117  src_bmv == NULL, std::invalid_argument, "Tpetra::Experimental::"
118  "BlockMultiVector: The source object of an Import or Export to a "
119  "BlockMultiVector, must also be a BlockMultiVector.");
120  return Teuchos::rcp (src_bmv, false); // nonowning RCP
121 }
122 
123 template<class Scalar, class LO, class GO, class Node>
125 BlockMultiVector (const map_type& meshMap,
126  const LO blockSize,
127  const LO numVecs) :
128  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
129  meshMap_ (meshMap),
130  pointMap_ (makePointMap (meshMap, blockSize)),
131  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs), // nonowning RCP is OK, since pointMap_ won't go away
132  mvData_ (getRawHostPtrFromMultiVector (mv_)),
133  blockSize_ (blockSize)
134 {
135  // Make sure that mv_ has view semantics.
136  mv_.setCopyOrView (Teuchos::View);
137 }
138 
139 template<class Scalar, class LO, class GO, class Node>
141 BlockMultiVector (const map_type& meshMap,
142  const map_type& pointMap,
143  const LO blockSize,
144  const LO numVecs) :
145  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
146  meshMap_ (meshMap),
147  pointMap_ (pointMap),
148  mv_ (Teuchos::rcpFromRef (pointMap_), numVecs),
149  mvData_ (getRawHostPtrFromMultiVector (mv_)),
150  blockSize_ (blockSize)
151 {
152  // Make sure that mv_ has view semantics.
153  mv_.setCopyOrView (Teuchos::View);
154 }
155 
156 template<class Scalar, class LO, class GO, class Node>
159  const map_type& meshMap,
160  const LO blockSize) :
161  dist_object_type (Teuchos::rcp (new map_type (meshMap))), // shallow copy
162  meshMap_ (meshMap),
163  mvData_ (NULL), // just for now
164  blockSize_ (blockSize)
165 {
166  using Teuchos::RCP;
167 
168  if (X_mv.getCopyOrView () == Teuchos::View) {
169  // The input MultiVector has view semantics, so assignment just
170  // does a shallow copy.
171  mv_ = X_mv;
172  }
173  else if (X_mv.getCopyOrView () == Teuchos::Copy) {
174  // The input MultiVector has copy semantics. We can't change
175  // that, but we can make a view of the input MultiVector and
176  // change the view to have view semantics. (That sounds silly;
177  // shouldn't views always have view semantics? but remember that
178  // "view semantics" just governs the default behavior of the copy
179  // constructor and assignment operator.)
180  RCP<const mv_type> X_view_const;
181  const size_t numCols = X_mv.getNumVectors ();
182  if (numCols == 0) {
183  Teuchos::Array<size_t> cols (0); // view will have zero columns
184  X_view_const = X_mv.subView (cols ());
185  } else { // Range1D is an inclusive range
186  X_view_const = X_mv.subView (Teuchos::Range1D (0, numCols-1));
187  }
188  TEUCHOS_TEST_FOR_EXCEPTION(
189  X_view_const.is_null (), std::logic_error, "Tpetra::Experimental::"
190  "BlockMultiVector constructor: X_mv.subView(...) returned null. This "
191  "should never happen. Please report this bug to the Tpetra developers.");
192 
193  // It's perfectly OK to cast away const here. Those view methods
194  // should be marked const anyway, because views can't change the
195  // allocation (just the entries themselves).
196  RCP<mv_type> X_view = Teuchos::rcp_const_cast<mv_type> (X_view_const);
197  X_view->setCopyOrView (Teuchos::View);
198  TEUCHOS_TEST_FOR_EXCEPTION(
199  X_view->getCopyOrView () != Teuchos::View, std::logic_error, "Tpetra::"
200  "Experimental::BlockMultiVector constructor: We just set a MultiVector "
201  "to have view semantics, but it claims that it doesn't have view "
202  "semantics. This should never happen. "
203  "Please report this bug to the Tpetra developers.");
204  mv_ = *X_view; // MultiVector::operator= does a shallow copy here
205  }
206 
207  // At this point, mv_ has been assigned, so we can ignore X_mv.
208  Teuchos::RCP<const map_type> pointMap = mv_.getMap ();
209  if (! pointMap.is_null ()) {
210  pointMap_ = *pointMap; // Map::operator= also does a shallow copy
211  }
212  mvData_ = getRawHostPtrFromMultiVector (mv_);
213 }
214 
215 template<class Scalar, class LO, class GO, class Node>
218  const map_type& newMeshMap,
219  const map_type& newPointMap,
220  const size_t offset) :
221  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
222  meshMap_ (newMeshMap),
223  pointMap_ (newPointMap),
224  mv_ (X.mv_, newPointMap, offset * X.getBlockSize ()), // MV "offset view" constructor
225  mvData_ (getRawHostPtrFromMultiVector (mv_)),
226  blockSize_ (X.getBlockSize ())
227 {
228  // Make sure that mv_ has view semantics.
229  mv_.setCopyOrView (Teuchos::View);
230 }
231 
232 template<class Scalar, class LO, class GO, class Node>
235  const map_type& newMeshMap,
236  const size_t offset) :
237  dist_object_type (Teuchos::rcp (new map_type (newMeshMap))), // shallow copy
238  meshMap_ (newMeshMap),
239  pointMap_ (makePointMap (newMeshMap, X.getBlockSize ())),
240  mv_ (X.mv_, pointMap_, offset * X.getBlockSize ()), // MV "offset view" constructor
241  mvData_ (getRawHostPtrFromMultiVector (mv_)),
242  blockSize_ (X.getBlockSize ())
243 {
244  // Make sure that mv_ has view semantics.
245  mv_.setCopyOrView (Teuchos::View);
246 }
247 
248 template<class Scalar, class LO, class GO, class Node>
251  dist_object_type (Teuchos::null),
252  mvData_ (NULL),
253  blockSize_ (0)
254 {
255  // Make sure that mv_ has view semantics.
256  mv_.setCopyOrView (Teuchos::View);
257 }
258 
259 template<class Scalar, class LO, class GO, class Node>
262 makePointMap (const map_type& meshMap, const LO blockSize)
263 {
264  typedef Tpetra::global_size_t GST;
265  typedef typename Teuchos::ArrayView<const GO>::size_type size_type;
266 
267  const GST gblNumMeshMapInds =
268  static_cast<GST> (meshMap.getGlobalNumElements ());
269  const size_t lclNumMeshMapIndices =
270  static_cast<size_t> (meshMap.getNodeNumElements ());
271  const GST gblNumPointMapInds =
272  gblNumMeshMapInds * static_cast<GST> (blockSize);
273  const size_t lclNumPointMapInds =
274  lclNumMeshMapIndices * static_cast<size_t> (blockSize);
275  const GO indexBase = meshMap.getIndexBase ();
276 
277  if (meshMap.isContiguous ()) {
278  return map_type (gblNumPointMapInds, lclNumPointMapInds, indexBase,
279  meshMap.getComm (), meshMap.getNode ());
280  }
281  else {
282  // "Hilbert's Hotel" trick: multiply each process' GIDs by
283  // blockSize, and fill in. That ensures correctness even if the
284  // mesh Map is overlapping.
285  Teuchos::ArrayView<const GO> lclMeshGblInds = meshMap.getNodeElementList ();
286  const size_type lclNumMeshGblInds = lclMeshGblInds.size ();
287  Teuchos::Array<GO> lclPointGblInds (lclNumPointMapInds);
288  for (size_type g = 0; g < lclNumMeshGblInds; ++g) {
289  const GO meshGid = lclMeshGblInds[g];
290  const GO pointGidStart = indexBase +
291  (meshGid - indexBase) * static_cast<GO> (blockSize);
292  const size_type offset = g * static_cast<size_type> (blockSize);
293  for (LO k = 0; k < blockSize; ++k) {
294  const GO pointGid = pointGidStart + static_cast<GO> (k);
295  lclPointGblInds[offset + static_cast<size_type> (k)] = pointGid;
296  }
297  }
298  return map_type (gblNumPointMapInds, lclPointGblInds (), indexBase,
299  meshMap.getComm (), meshMap.getNode ());
300  }
301 }
302 
303 
304 template<class Scalar, class LO, class GO, class Node>
305 void
307 replaceLocalValuesImpl (const LO localRowIndex,
308  const LO colIndex,
309  const Scalar vals[]) const
310 {
311  auto X_dst = getLocalBlock (localRowIndex, colIndex);
312  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
313  getBlockSize ());
314  Kokkos::deep_copy (X_dst, X_src);
315 }
316 
317 
318 template<class Scalar, class LO, class GO, class Node>
319 bool
321 replaceLocalValues (const LO localRowIndex,
322  const LO colIndex,
323  const Scalar vals[]) const
324 {
325  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
326  return false;
327  } else {
328  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
329  return true;
330  }
331 }
332 
333 template<class Scalar, class LO, class GO, class Node>
334 bool
336 replaceGlobalValues (const GO globalRowIndex,
337  const LO colIndex,
338  const Scalar vals[]) const
339 {
340  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
341  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
342  return false;
343  } else {
344  replaceLocalValuesImpl (localRowIndex, colIndex, vals);
345  return true;
346  }
347 }
348 
349 template<class Scalar, class LO, class GO, class Node>
350 void
352 sumIntoLocalValuesImpl (const LO localRowIndex,
353  const LO colIndex,
354  const Scalar vals[]) const
355 {
356  auto X_dst = getLocalBlock (localRowIndex, colIndex);
357  typename const_little_vec_type::HostMirror::const_type X_src (reinterpret_cast<const impl_scalar_type*> (vals),
358  getBlockSize ());
359  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
360 }
361 
362 template<class Scalar, class LO, class GO, class Node>
363 bool
365 sumIntoLocalValues (const LO localRowIndex,
366  const LO colIndex,
367  const Scalar vals[]) const
368 {
369  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
370  return false;
371  } else {
372  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
373  return true;
374  }
375 }
376 
377 template<class Scalar, class LO, class GO, class Node>
378 bool
380 sumIntoGlobalValues (const GO globalRowIndex,
381  const LO colIndex,
382  const Scalar vals[]) const
383 {
384  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
385  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
386  return false;
387  } else {
388  sumIntoLocalValuesImpl (localRowIndex, colIndex, vals);
389  return true;
390  }
391 }
392 
393 template<class Scalar, class LO, class GO, class Node>
394 bool
396 getLocalRowView (const LO localRowIndex, const LO colIndex, Scalar*& vals) const
397 {
398  if (! meshMap_.isNodeLocalElement (localRowIndex)) {
399  return false;
400  } else {
401  auto X_ij = getLocalBlock (localRowIndex, colIndex);
402  vals = reinterpret_cast<Scalar*> (X_ij.data ());
403  return true;
404  }
405 }
406 
407 template<class Scalar, class LO, class GO, class Node>
408 bool
410 getGlobalRowView (const GO globalRowIndex, const LO colIndex, Scalar*& vals) const
411 {
412  const LO localRowIndex = meshMap_.getLocalElement (globalRowIndex);
413  if (localRowIndex == Teuchos::OrdinalTraits<LO>::invalid ()) {
414  return false;
415  } else {
416  auto X_ij = getLocalBlock (localRowIndex, colIndex);
417  vals = reinterpret_cast<Scalar*> (X_ij.data ());
418  return true;
419  }
420 }
421 
422 template<class Scalar, class LO, class GO, class Node>
425 getLocalBlock (const LO localRowIndex,
426  const LO colIndex) const
427 {
428  // NOTE (mfh 07 Jul 2016) It should be correct to add the
429  // commented-out test below. However, I've conservatively commented
430  // it out, since users might not realize that they need to have
431  // things sync'd correctly.
432 
433 // #ifdef HAVE_TPETRA_DEBUG
434 // TEUCHOS_TEST_FOR_EXCEPTION
435 // (mv_.template need_sync<Kokkos::HostSpace> (), std::runtime_error,
436 // "Tpetra::Experimental::BlockMultiVector::getLocalBlock: This method "
437 // "accesses host data, but the object is not in sync on host." );
438 // #endif // HAVE_TPETRA_DEBUG
439 
440  if (! isValidLocalMeshIndex (localRowIndex)) {
441  return typename little_vec_type::HostMirror ();
442  } else {
443  const size_t blockSize = getBlockSize ();
444  const size_t offset = colIndex * this->getStrideY () +
445  localRowIndex * blockSize;
446  impl_scalar_type* blockRaw = this->getRawPtr () + offset;
447  return typename little_vec_type::HostMirror (blockRaw, blockSize);
448  }
449 }
450 
451 template<class Scalar, class LO, class GO, class Node>
452 Teuchos::RCP<const typename BlockMultiVector<Scalar, LO, GO, Node>::mv_type>
455 {
456  using Teuchos::rcp;
457  using Teuchos::rcpFromRef;
458 
459  // The source object of an Import or Export must be a
460  // BlockMultiVector or MultiVector (a Vector is a MultiVector). Try
461  // them in that order; one must succeed. Note that the target of
462  // the Import or Export calls checkSizes in DistObject's doTransfer.
464  const this_type* srcBlkVec = dynamic_cast<const this_type*> (&src);
465  if (srcBlkVec == NULL) {
466  const mv_type* srcMultiVec = dynamic_cast<const mv_type*> (&src);
467  if (srcMultiVec == NULL) {
468  // FIXME (mfh 05 May 2014) Tpetra::MultiVector's operator=
469  // currently does a shallow copy by default. This is why we
470  // return by RCP, rather than by value.
471  return rcp (new mv_type ());
472  } else { // src is a MultiVector
473  return rcp (srcMultiVec, false); // nonowning RCP
474  }
475  } else { // src is a BlockMultiVector
476  return rcpFromRef (srcBlkVec->mv_); // nonowning RCP
477  }
478 }
479 
480 template<class Scalar, class LO, class GO, class Node>
483 {
484  return ! getMultiVectorFromSrcDistObject (src).is_null ();
485 }
486 
487 template<class Scalar, class LO, class GO, class Node>
490  size_t numSameIDs,
491  const Teuchos::ArrayView<const LO>& permuteToLIDs,
492  const Teuchos::ArrayView<const LO>& permuteFromLIDs)
493 {
494  const char tfecfFuncName[] = "copyAndPermute: ";
495  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
496  permuteToLIDs.size() != permuteFromLIDs.size(), std::runtime_error,
497  "permuteToLIDs and permuteFromLIDs must have the same size."
498  << std::endl << "permuteToLIDs.size() = " << permuteToLIDs.size ()
499  << " != permuteFromLIDs.size() = " << permuteFromLIDs.size () << ".");
500 
502  Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
503  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
504  srcAsBmvPtr.is_null (), std::invalid_argument,
505  "The source of an Import or Export to a BlockMultiVector "
506  "must also be a BlockMultiVector.");
507  const BMV& srcAsBmv = *srcAsBmvPtr;
508 
509  // FIXME (mfh 23 Apr 2014) This implementation is sequential and
510  // assumes UVM.
511 
512  const LO numVecs = getNumVectors ();
513  const LO numSame = static_cast<LO> (numSameIDs);
514  for (LO j = 0; j < numVecs; ++j) {
515  for (LO lclRow = 0; lclRow < numSame; ++lclRow) {
516  deep_copy (getLocalBlock (lclRow, j), srcAsBmv.getLocalBlock (lclRow, j));
517  }
518  }
519 
520  // FIXME (mfh 20 June 2012) For an Export with duplicate GIDs on the
521  // same process, this merges their values by replacement of the last
522  // encountered GID, not by the specified merge rule (such as ADD).
523  const LO numPermuteLIDs = static_cast<LO> (permuteToLIDs.size ());
524  for (LO j = 0; j < numVecs; ++j) {
525  for (LO k = numSame; k < numPermuteLIDs; ++k) {
526  deep_copy (getLocalBlock (permuteToLIDs[k], j), srcAsBmv.getLocalBlock (permuteFromLIDs[k], j));
527  }
528  }
529 }
530 
531 template<class Scalar, class LO, class GO, class Node>
534  const Teuchos::ArrayView<const LO>& exportLIDs,
535  Teuchos::Array<impl_scalar_type>& exports,
536  const Teuchos::ArrayView<size_t>& /* numPacketsPerLID */,
537  size_t& constantNumPackets,
538  Tpetra::Distributor& /* distor */)
539 {
541  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
542  const char tfecfFuncName[] = "packAndPrepare: ";
543 
544  Teuchos::RCP<const BMV> srcAsBmvPtr = getBlockMultiVectorFromSrcDistObject (src);
545  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
546  srcAsBmvPtr.is_null (), std::invalid_argument,
547  "The source of an Import or Export to a BlockMultiVector "
548  "must also be a BlockMultiVector.");
549  const BMV& srcAsBmv = *srcAsBmvPtr;
550 
551  const LO numVecs = getNumVectors ();
552  const LO blockSize = getBlockSize ();
553 
554  // Number of things to pack per LID is the block size, times the
555  // number of columns. Input LIDs correspond to the mesh points, not
556  // the degrees of freedom (DOFs).
557  constantNumPackets =
558  static_cast<size_t> (blockSize) * static_cast<size_t> (numVecs);
559  const size_type numMeshLIDs = exportLIDs.size ();
560 
561  const size_type requiredExportsSize = numMeshLIDs *
562  static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
563  exports.resize (requiredExportsSize);
564 
565  try {
566  size_type curExportPos = 0;
567  for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
568  for (LO j = 0; j < numVecs; ++j, curExportPos += blockSize) {
569  const LO meshLid = exportLIDs[meshLidIndex];
570  impl_scalar_type* const curExportPtr = &exports[curExportPos];
571  typename little_vec_type::HostMirror X_dst (curExportPtr, blockSize);
572  auto X_src = srcAsBmv.getLocalBlock (meshLid, j);
573 
574  Kokkos::deep_copy (X_dst, X_src);
575  }
576  }
577  } catch (std::exception& e) {
578  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
579  true, std::logic_error, "Oh no! packAndPrepare on Process "
580  << meshMap_.getComm ()->getRank () << " raised the following exception: "
581  << e.what ());
582  }
583 }
584 
585 template<class Scalar, class LO, class GO, class Node>
587 unpackAndCombine (const Teuchos::ArrayView<const LO>& importLIDs,
588  const Teuchos::ArrayView<const impl_scalar_type>& imports,
589  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
590  size_t constantNumPackets,
591  Tpetra::Distributor& distor,
593 {
594  typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
595  const char tfecfFuncName[] = "unpackAndCombine: ";
596 
597  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
598  CM != ADD && CM != REPLACE && CM != INSERT && CM != ABSMAX && CM != ZERO,
599  std::invalid_argument, "Invalid CombineMode: " << CM << ". Valid "
600  "CombineMode values are ADD, REPLACE, INSERT, ABSMAX, and ZERO.");
601 
602  if (CM == ZERO) {
603  return; // Combining does nothing, so we don't have to combine anything.
604  }
605 
606  // Number of things to pack per LID is the block size.
607  // Input LIDs correspond to the mesh points, not the DOFs.
608  const size_type numMeshLIDs = importLIDs.size ();
609  const LO blockSize = getBlockSize ();
610  const LO numVecs = getNumVectors ();
611 
612  const size_type requiredImportsSize = numMeshLIDs *
613  static_cast<size_type> (blockSize) * static_cast<size_type> (numVecs);
614  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
615  imports.size () < requiredImportsSize, std::logic_error,
616  ": imports.size () = " << imports.size ()
617  << " < requiredImportsSize = " << requiredImportsSize << ".");
618 
619  size_type curImportPos = 0;
620  for (size_type meshLidIndex = 0; meshLidIndex < numMeshLIDs; ++meshLidIndex) {
621  for (LO j = 0; j < numVecs; ++j, curImportPos += blockSize) {
622  const LO meshLid = importLIDs[meshLidIndex];
623  const impl_scalar_type* const curImportPtr = &imports[curImportPos];
624 
625  typename const_little_vec_type::HostMirror::const_type X_src (curImportPtr, blockSize);
626  auto X_dst = getLocalBlock (meshLid, j);
627 
628  if (CM == INSERT || CM == REPLACE) {
629  deep_copy (X_dst, X_src);
630  } else if (CM == ADD) {
631  AXPY (static_cast<impl_scalar_type> (STS::one ()), X_src, X_dst);
632  } else if (CM == ABSMAX) {
633  Impl::absMax (X_dst, X_src);
634  }
635  }
636  }
637 }
638 
639 template<class Scalar, class LO, class GO, class Node>
641 putScalar (const Scalar& val)
642 {
643  mv_.putScalar (val);
644 }
645 
646 template<class Scalar, class LO, class GO, class Node>
648 scale (const Scalar& val)
649 {
650  mv_.scale (val);
651 }
652 
653 template<class Scalar, class LO, class GO, class Node>
655 update (const Scalar& alpha,
657  const Scalar& beta)
658 {
659  mv_.update (alpha, X.mv_, beta);
660 }
661 
662 namespace Impl {
663 // Y := alpha * D * X
664 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
665 struct BlockWiseMultiply {
666  typedef typename ViewD::size_type Size;
667 
668 private:
669  typedef typename ViewD::device_type Device;
670  typedef typename ViewD::non_const_value_type ImplScalar;
671  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
672 
673  template <typename View>
674  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
675  typename View::device_type, Unmanaged>;
676  typedef UnmanagedView<ViewY> UnMViewY;
677  typedef UnmanagedView<ViewD> UnMViewD;
678  typedef UnmanagedView<ViewX> UnMViewX;
679 
680  const Size block_size_;
681  Scalar alpha_;
682  UnMViewY Y_;
683  UnMViewD D_;
684  UnMViewX X_;
685 
686 public:
687  BlockWiseMultiply (const Size block_size, const Scalar& alpha,
688  const ViewY& Y, const ViewD& D, const ViewX& X)
689  : block_size_(block_size), alpha_(alpha), Y_(Y), D_(D), X_(X)
690  {}
691 
692  KOKKOS_INLINE_FUNCTION
693  void operator() (const Size k) const {
694  const auto zero = Kokkos::Details::ArithTraits<Scalar>::zero();
695  auto D_curBlk = Kokkos::subview(D_, k, Kokkos::ALL (), Kokkos::ALL ());
696  const auto num_vecs = X_.extent(1);
697  for (Size i = 0; i < num_vecs; ++i) {
698  Kokkos::pair<Size, Size> kslice(k*block_size_, (k+1)*block_size_);
699  auto X_curBlk = Kokkos::subview(X_, kslice, i);
700  auto Y_curBlk = Kokkos::subview(Y_, kslice, i);
701  // Y_curBlk := alpha * D_curBlk * X_curBlk.
702  // Recall that GEMV does an update (+=) of the last argument.
703  Tpetra::Experimental::FILL(Y_curBlk, zero);
704  Tpetra::Experimental::GEMV(alpha_, D_curBlk, X_curBlk, Y_curBlk);
705  }
706  }
707 };
708 
709 template <typename Scalar, typename ViewY, typename ViewD, typename ViewX>
710 inline BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>
711 createBlockWiseMultiply (const int block_size, const Scalar& alpha,
712  const ViewY& Y, const ViewD& D, const ViewX& X) {
713  return BlockWiseMultiply<Scalar, ViewY, ViewD, ViewX>(block_size, alpha, Y, D, X);
714 }
715 
716 template <typename ViewY,
717  typename Scalar,
718  typename ViewD,
719  typename ViewZ,
720  typename LO = typename ViewY::size_type>
721 class BlockJacobiUpdate {
722 private:
723  typedef typename ViewD::device_type Device;
724  typedef typename ViewD::non_const_value_type ImplScalar;
725  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
726 
727  template <typename ViewType>
728  using UnmanagedView = Kokkos::View<typename ViewType::data_type,
729  typename ViewType::array_layout,
730  typename ViewType::device_type,
731  Unmanaged>;
732  typedef UnmanagedView<ViewY> UnMViewY;
733  typedef UnmanagedView<ViewD> UnMViewD;
734  typedef UnmanagedView<ViewZ> UnMViewZ;
735 
736  const LO blockSize_;
737  UnMViewY Y_;
738  const Scalar alpha_;
739  UnMViewD D_;
740  UnMViewZ Z_;
741  const Scalar beta_;
742 
743 public:
744  BlockJacobiUpdate (const ViewY& Y,
745  const Scalar& alpha,
746  const ViewD& D,
747  const ViewZ& Z,
748  const Scalar& beta) :
749  blockSize_ (D.extent (1)),
750  // numVecs_ (static_cast<int> (ViewY::rank) == 1 ? static_cast<size_type> (1) : static_cast<size_type> (Y_.extent (1))),
751  Y_ (Y),
752  alpha_ (alpha),
753  D_ (D),
754  Z_ (Z),
755  beta_ (beta)
756  {
757  static_assert (static_cast<int> (ViewY::rank) == 1,
758  "Y must have rank 1.");
759  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
760  static_assert (static_cast<int> (ViewZ::rank) == 1,
761  "Z must have rank 1.");
762  // static_assert (static_cast<int> (ViewZ::rank) ==
763  // static_cast<int> (ViewY::rank),
764  // "Z must have the same rank as Y.");
765  }
766 
767  KOKKOS_INLINE_FUNCTION void
768  operator() (const LO& k) const
769  {
770  using Kokkos::ALL;
771  using Kokkos::subview;
772  typedef Kokkos::pair<LO, LO> range_type;
773  typedef Kokkos::Details::ArithTraits<Scalar> KAT;
774 
775  // We only have to implement the alpha != 0 case.
776 
777  auto D_curBlk = subview (D_, k, ALL (), ALL ());
778  const range_type kslice (k*blockSize_, (k+1)*blockSize_);
779 
780  // Z.update (STS::one (), X, -STS::one ()); // assume this is done
781 
782  auto Z_curBlk = subview (Z_, kslice);
783  auto Y_curBlk = subview (Y_, kslice);
784  // Y_curBlk := beta * Y_curBlk + alpha * D_curBlk * Z_curBlk
785  if (beta_ == KAT::zero ()) {
786  Tpetra::Experimental::FILL (Y_curBlk, KAT::zero ());
787  }
788  else if (beta_ != KAT::one ()) {
789  Tpetra::Experimental::SCAL (beta_, Y_curBlk);
790  }
791  Tpetra::Experimental::GEMV (alpha_, D_curBlk, Z_curBlk, Y_curBlk);
792  }
793 };
794 
795 template<class ViewY,
796  class Scalar,
797  class ViewD,
798  class ViewZ,
799  class LO = typename ViewD::size_type>
800 void
801 blockJacobiUpdate (const ViewY& Y,
802  const Scalar& alpha,
803  const ViewD& D,
804  const ViewZ& Z,
805  const Scalar& beta)
806 {
807  static_assert (Kokkos::Impl::is_view<ViewY>::value, "Y must be a Kokkos::View.");
808  static_assert (Kokkos::Impl::is_view<ViewD>::value, "D must be a Kokkos::View.");
809  static_assert (Kokkos::Impl::is_view<ViewZ>::value, "Z must be a Kokkos::View.");
810  static_assert (static_cast<int> (ViewY::rank) == static_cast<int> (ViewZ::rank),
811  "Y and Z must have the same rank.");
812  static_assert (static_cast<int> (ViewD::rank) == 3, "D must have rank 3.");
813 
814  const auto lclNumMeshRows = D.extent (0);
815 
816 #ifdef HAVE_TPETRA_DEBUG
817  // D.extent(0) is the (local) number of mesh rows.
818  // D.extent(1) is the block size. Thus, their product should be
819  // the local number of point rows, that is, the number of rows in Y.
820  const auto blkSize = D.extent (1);
821  const auto lclNumPtRows = lclNumMeshRows * blkSize;
822  TEUCHOS_TEST_FOR_EXCEPTION
823  (Y.extent (0) != lclNumPtRows, std::invalid_argument,
824  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
825  "D.extent(0)*D.extent(1) = " << lclNumMeshRows << " * " << blkSize
826  << " = " << lclNumPtRows << ".");
827  TEUCHOS_TEST_FOR_EXCEPTION
828  (Y.extent (0) != Z.extent (0), std::invalid_argument,
829  "blockJacobiUpdate: Y.extent(0) = " << Y.extent (0) << " != "
830  "Z.extent(0) = " << Z.extent (0) << ".");
831  TEUCHOS_TEST_FOR_EXCEPTION
832  (Y.extent (1) != Z.extent (1), std::invalid_argument,
833  "blockJacobiUpdate: Y.extent(1) = " << Y.extent (1) << " != "
834  "Z.extent(1) = " << Z.extent (1) << ".");
835 #endif // HAVE_TPETRA_DEBUG
836 
837  BlockJacobiUpdate<ViewY, Scalar, ViewD, ViewZ, LO> functor (Y, alpha, D, Z, beta);
838  typedef Kokkos::RangePolicy<typename ViewY::execution_space, LO> range_type;
839  // lclNumMeshRows must fit in LO, else the Map would not be correct.
840  range_type range (0, static_cast<LO> (lclNumMeshRows));
841  Kokkos::parallel_for (range, functor);
842 }
843 
844 } // namespace Impl
845 
846 template<class Scalar, class LO, class GO, class Node>
848 blockWiseMultiply (const Scalar& alpha,
849  const Kokkos::View<const impl_scalar_type***,
850  device_type, Kokkos::MemoryUnmanaged>& D,
852 {
853  using Kokkos::ALL;
854  typedef typename device_type::execution_space execution_space;
855  typedef typename device_type::memory_space memory_space;
856  const LO lclNumMeshRows = meshMap_.getNodeNumElements ();
857 
858  if (alpha == STS::zero ()) {
859  this->putScalar (STS::zero ());
860  }
861  else { // alpha != 0
862  const LO blockSize = this->getBlockSize ();
863  const impl_scalar_type alphaImpl = static_cast<impl_scalar_type> (alpha);
864  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
865  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
866  auto bwm = Impl::createBlockWiseMultiply (blockSize, alphaImpl, Y_lcl, D, X_lcl);
867 
868  // Use an explicit RangePolicy with the desired execution space.
869  // Otherwise, if you just use a number, it runs on the default
870  // execution space. For example, if the default execution space
871  // is Cuda but the current execution space is Serial, using just a
872  // number would incorrectly run with Cuda.
873  Kokkos::RangePolicy<execution_space, LO> range (0, lclNumMeshRows);
874  Kokkos::parallel_for (range, bwm);
875  }
876 }
877 
878 template<class Scalar, class LO, class GO, class Node>
880 blockJacobiUpdate (const Scalar& alpha,
881  const Kokkos::View<const impl_scalar_type***,
882  device_type, Kokkos::MemoryUnmanaged>& D,
885  const Scalar& beta)
886 {
887  using Kokkos::ALL;
888  using Kokkos::subview;
889  typedef typename device_type::memory_space memory_space;
890  typedef impl_scalar_type IST;
891 
892  const IST alphaImpl = static_cast<IST> (alpha);
893  const IST betaImpl = static_cast<IST> (beta);
894  const LO numVecs = mv_.getNumVectors ();
895 
896  auto X_lcl = X.mv_.template getLocalView<memory_space> ();
897  auto Y_lcl = this->mv_.template getLocalView<memory_space> ();
898  auto Z_lcl = Z.mv_.template getLocalView<memory_space> ();
899 
900  if (alpha == STS::zero ()) { // Y := beta * Y
901  this->scale (beta);
902  }
903  else { // alpha != 0
904  Z.update (STS::one (), X, -STS::one ());
905  for (LO j = 0; j < numVecs; ++j) {
906  auto X_lcl_j = subview (X_lcl, ALL (), j);
907  auto Y_lcl_j = subview (Y_lcl, ALL (), j);
908  auto Z_lcl_j = subview (Z_lcl, ALL (), j);
909  Impl::blockJacobiUpdate (Y_lcl_j, alphaImpl, D, Z_lcl_j, betaImpl);
910  }
911  }
912 }
913 
914 } // namespace Classes
915 } // namespace Experimental
916 } // namespace Tpetra
917 
918 //
919 // Explicit instantiation macro
920 //
921 // Must be expanded from within the Tpetra namespace!
922 //
923 #define TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_INSTANT(S,LO,GO,NODE) \
924  namespace Experimental { \
925  namespace Classes { \
926  template class BlockMultiVector< S, LO, GO, NODE >; \
927  } \
928  }
929 
930 #endif // TPETRA_EXPERIMENTAL_BLOCKMULTIVECTOR_DEF_HPP
Base class for distributed Tpetra objects that support data redistribution.
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
impl_scalar_type * getRawPtr() const
Raw pointer to the MultiVector&#39;s data.
Tpetra::Map< LO, GO, Node > map_type
The specialization of Tpetra::Map that this class uses.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
void blockWiseMultiply(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X)
*this := alpha * D * X, where D is a block diagonal matrix.
void putScalar(const Scalar &val)
Fill all entries with the given value val.
bool sumIntoGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a global index.
size_t getStrideY() const
Stride between consecutive local entries in the same row.
virtual bool checkSizes(const Tpetra::SrcDistObject &source)
Compare the source and target (this) objects for compatibility.
One or more distributed dense vectors.
MultiVector for multiple degrees of freedom per mesh point.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
bool replaceLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using local row and column indices.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
GlobalOrdinal getIndexBase() const
The index base for this Map.
size_t getNumVectors() const
Number of columns in the multivector.
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
::Tpetra::Experimental::Classes::BlockMultiVector< SC, LO, GO, NT > BlockMultiVector
Alias for Tpetra::Experimental::Classes::BlockMultiVector.
mv_type::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
void scale(const Scalar &alpha)
Scale in place: this = alpha*this.
bool getGlobalRowView(const GO globalRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a global index.
Insert new values that don&#39;t currently exist.
Teuchos::RCP< const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > > subView(const Teuchos::Range1D &colRng) const
Return a const MultiVector with const views of selected columns.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
bool getLocalRowView(const LO localRowIndex, const LO colIndex, Scalar *&vals) const
Get a writeable view of the entries at the given mesh point, using a local index. ...
void update(const Scalar &alpha, const MultiVector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta)
Update: this = beta*this + alpha*A.
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
Replace old value with maximum of magnitudes of old and new values.
Node::device_type device_type
The Kokkos Device type.
bool replaceGlobalValues(const GO globalRowIndex, const LO colIndex, const Scalar vals[]) const
Replace all values at the given mesh point, using a global index.
Abstract base class for objects that can be the source of an Import or Export operation.
Replace existing values with new values.
Replace old values with zero.
void setCopyOrView(const Teuchos::DataAccess copyOrView)
Set whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
A parallel distribution of indices over processes.
void blockJacobiUpdate(const Scalar &alpha, const Kokkos::View< const impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D, const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Z, const Scalar &beta)
Block Jacobi update .
device_type::execution_space execution_space
The Kokkos execution space.
bool isValidLocalMeshIndex(const LO meshLocalIndex) const
True if and only if meshLocalIndex is a valid local index in the mesh Map.
void update(const Scalar &alpha, const BlockMultiVector< Scalar, LO, GO, Node > &X, const Scalar &beta)
Update: this = beta*this + alpha*X.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
Teuchos::DataAccess getCopyOrView() const
Get whether this has copy (copyOrView = Teuchos::Copy) or view (copyOrView = Teuchos::View) semantics...
mv_type mv_
The Tpetra::MultiVector used to represent the data.
bool sumIntoLocalValues(const LO localRowIndex, const LO colIndex, const Scalar vals[]) const
Sum into all values at the given mesh point, using a local index.
Teuchos::RCP< Node > getNode() const
Get this Map&#39;s Node object.