42 #ifndef TPETRA_DISTOBJECT_DEF_HPP 43 #define TPETRA_DISTOBJECT_DEF_HPP 53 #include "Tpetra_Distributor.hpp" 61 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
66 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 69 using Teuchos::TimeMonitor;
71 RCP<Time> doXferTimer =
72 TimeMonitor::lookupCounter (
"Tpetra::DistObject::doTransfer");
73 if (doXferTimer.is_null ()) {
75 TimeMonitor::getNewCounter (
"Tpetra::DistObject::doTransfer");
77 doXferTimer_ = doXferTimer;
79 RCP<Time> copyAndPermuteTimer =
80 TimeMonitor::lookupCounter (
"Tpetra::DistObject::copyAndPermute");
81 if (copyAndPermuteTimer.is_null ()) {
83 TimeMonitor::getNewCounter (
"Tpetra::DistObject::copyAndPermute");
85 copyAndPermuteTimer_ = copyAndPermuteTimer;
87 RCP<Time> packAndPrepareTimer =
88 TimeMonitor::lookupCounter (
"Tpetra::DistObject::packAndPrepare");
89 if (packAndPrepareTimer.is_null ()) {
91 TimeMonitor::getNewCounter (
"Tpetra::DistObject::packAndPrepare");
93 packAndPrepareTimer_ = packAndPrepareTimer;
95 RCP<Time> doPostsAndWaitsTimer =
96 TimeMonitor::lookupCounter (
"Tpetra::DistObject::doPostsAndWaits");
97 if (doPostsAndWaitsTimer.is_null ()) {
98 doPostsAndWaitsTimer =
99 TimeMonitor::getNewCounter (
"Tpetra::DistObject::doPostsAndWaits");
101 doPostsAndWaitsTimer_ = doPostsAndWaitsTimer;
103 RCP<Time> unpackAndCombineTimer =
104 TimeMonitor::lookupCounter (
"Tpetra::DistObject::unpackAndCombine");
105 if (unpackAndCombineTimer.is_null ()) {
106 unpackAndCombineTimer =
107 TimeMonitor::getNewCounter (
"Tpetra::DistObject::unpackAndCombine");
109 unpackAndCombineTimer_ = unpackAndCombineTimer;
110 #endif // HAVE_TPETRA_TRANSFER_TIMERS 113 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
119 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
124 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
129 using Teuchos::TypeNameTraits;
131 std::ostringstream os;
132 os <<
"\"Tpetra::DistObject\": {" 133 <<
"Packet: " << TypeNameTraits<packet_type>::name ()
134 <<
", LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name ()
135 <<
", GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name ()
136 <<
", Node: " << TypeNameTraits<Node>::name ();
137 if (this->getObjectLabel () !=
"") {
138 os <<
"Label: \"" << this->getObjectLabel () <<
"\"";
144 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
148 const Teuchos::EVerbosityLevel verbLevel)
const 150 using Teuchos::rcpFromRef;
151 using Teuchos::TypeNameTraits;
153 const Teuchos::EVerbosityLevel vl = (verbLevel == Teuchos::VERB_DEFAULT) ?
154 Teuchos::VERB_LOW : verbLevel;
155 Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getMap ()->getComm ();
156 const int myRank = comm.is_null () ? 0 : comm->getRank ();
157 const int numProcs = comm.is_null () ? 1 : comm->getSize ();
159 if (vl != Teuchos::VERB_NONE) {
160 Teuchos::OSTab tab0 (out);
162 out <<
"\"Tpetra::DistObject\":" << endl;
164 Teuchos::OSTab tab1 (out);
166 out <<
"Template parameters:" << endl;
168 Teuchos::OSTab tab2 (out);
169 out <<
"Packet: " << TypeNameTraits<packet_type>::name () << endl
170 <<
"LocalOrdinal: " << TypeNameTraits<local_ordinal_type>::name () << endl
171 <<
"GlobalOrdinal: " << TypeNameTraits<global_ordinal_type>::name () << endl
172 <<
"Node: " << TypeNameTraits<node_type>::name () << endl;
174 if (this->getObjectLabel () !=
"") {
175 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl;
182 out <<
"Map:" << endl;
184 Teuchos::OSTab tab2 (out);
185 map_->describe (out, vl);
189 if (vl > Teuchos::VERB_LOW) {
190 for (
int p = 0; p < numProcs; ++p) {
192 out <<
"Process " << myRank <<
":" << endl;
193 Teuchos::OSTab tab2 (out);
194 out <<
"Export buffer size (in packets): " 195 << exports_.extent (0)
197 <<
"Import buffer size (in packets): " 198 << imports_.extent (0)
201 if (! comm.is_null ()) {
211 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
216 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::logic_error,
217 "Tpetra::DistObject::removeEmptyProcessesInPlace: Not implemented");
249 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
257 const char modeString[] =
"doImport (forward mode)";
263 std::unique_ptr<std::string> prefix;
266 auto map = this->getMap ();
267 if (! map.is_null ()) {
268 auto comm = map->getComm ();
269 if (! comm.is_null ()) {
270 myRank = comm->getRank ();
273 prefix = [myRank] () {
274 std::ostringstream os;
275 os <<
"(Proc " << myRank <<
") ";
276 return std::unique_ptr<std::string> (
new std::string (os.str ()));
278 std::ostringstream os;
279 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
280 std::cerr << os.str ();
282 this->doTransfer (source, importer, modeString, DoForward, CM);
284 std::ostringstream os;
285 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!" 287 std::cerr << os.str ();
291 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
299 const char modeString[] =
"doExport (forward mode)";
305 std::unique_ptr<std::string> prefix;
308 auto map = this->getMap ();
309 if (! map.is_null ()) {
310 auto comm = map->getComm ();
311 if (! comm.is_null ()) {
312 myRank = comm->getRank ();
315 prefix = [myRank] () {
316 std::ostringstream os;
317 os <<
"(Proc " << myRank <<
") ";
318 return std::unique_ptr<std::string> (
new std::string (os.str ()));
320 std::ostringstream os;
321 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
322 std::cerr << os.str ();
324 this->doTransfer (source, exporter, modeString, DoForward, CM);
326 std::ostringstream os;
327 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!" 329 std::cerr << os.str ();
333 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
341 const char modeString[] =
"doImport (reverse mode)";
347 std::unique_ptr<std::string> prefix;
350 auto map = this->getMap ();
351 if (! map.is_null ()) {
352 auto comm = map->getComm ();
353 if (! comm.is_null ()) {
354 myRank = comm->getRank ();
357 prefix = [myRank] () {
358 std::ostringstream os;
359 os <<
"(Proc " << myRank <<
") ";
360 return std::unique_ptr<std::string> (
new std::string (os.str ()));
362 std::ostringstream os;
363 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
364 std::cerr << os.str ();
366 this->doTransfer (source, exporter, modeString, DoReverse, CM);
368 std::ostringstream os;
369 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!" 371 std::cerr << os.str ();
375 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
383 const char modeString[] =
"doExport (reverse mode)";
389 std::unique_ptr<std::string> prefix;
392 auto map = this->getMap ();
393 if (! map.is_null ()) {
394 auto comm = map->getComm ();
395 if (! comm.is_null ()) {
396 myRank = comm->getRank ();
399 prefix = [myRank] () {
400 std::ostringstream os;
401 os <<
"(Proc " << myRank <<
") ";
402 return std::unique_ptr<std::string> (
new std::string (os.str ()));
404 std::ostringstream os;
405 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
":" << endl;
406 std::cerr << os.str ();
408 this->doTransfer (source, importer, modeString, DoReverse, CM);
410 std::ostringstream os;
411 os << *prefix <<
"Tpetra::DistObject::" << modeString <<
": Done!" 413 std::cerr << os.str ();
417 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
424 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
431 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
435 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
436 const char modeString[],
442 typedef LocalOrdinal LO;
449 if (revOp == DoForward) {
450 const bool myMapSameAsTransferTgtMap =
451 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
452 TEUCHOS_TEST_FOR_EXCEPTION
453 (! myMapSameAsTransferTgtMap, std::invalid_argument,
454 "Tpetra::DistObject::" << modeString <<
": For forward-mode " 455 "communication, the target DistObject's Map must be the same " 456 "(in the sense of Tpetra::Map::isSameAs) as the input " 457 "Export/Import object's target Map.");
460 const bool myMapSameAsTransferSrcMap =
461 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
462 TEUCHOS_TEST_FOR_EXCEPTION
463 (! myMapSameAsTransferSrcMap, std::invalid_argument,
464 "Tpetra::DistObject::" << modeString <<
": For reverse-mode " 465 "communication, the target DistObject's Map must be the same " 466 "(in the sense of Tpetra::Map::isSameAs) as the input " 467 "Export/Import object's source Map.");
474 if (srcDistObj != NULL) {
475 if (revOp == DoForward) {
476 const bool srcMapSameAsImportSrcMap =
477 srcDistObj->
getMap ()->isSameAs (* (transfer.getSourceMap ()));
478 TEUCHOS_TEST_FOR_EXCEPTION
479 (! srcMapSameAsImportSrcMap, std::invalid_argument,
480 "Tpetra::DistObject::" << modeString <<
": For forward-mode " 481 "communication, the source DistObject's Map must be the same " 482 "as the input Export/Import object's source Map.");
485 const bool srcMapSameAsImportTgtMap =
486 srcDistObj->
getMap ()->isSameAs (* (transfer.getTargetMap ()));
487 TEUCHOS_TEST_FOR_EXCEPTION
488 (! srcMapSameAsImportTgtMap, std::invalid_argument,
489 "Tpetra::DistObject::" << modeString <<
": For reverse-mode " 490 "communication, the source DistObject's Map must be the same " 491 "as the input Export/Import object's target Map.");
500 std::unique_ptr<std::string> prefix;
503 auto map = this->getMap ();
504 if (! map.is_null ()) {
505 auto comm = map->getComm ();
506 if (! comm.is_null ()) {
507 myRank = comm->getRank ();
510 prefix = [myRank] () {
511 std::ostringstream os;
512 os <<
"(Proc " << myRank <<
") ";
513 return std::unique_ptr<std::string> (
new std::string (os.str ()));
515 std::ostringstream os;
516 os << *prefix <<
"Tpetra::DistObject::doTransfer:" << endl;
517 std::cerr << os.str ();
520 const size_t numSameIDs = transfer.getNumSameIDs ();
521 typedef Teuchos::ArrayView<const LocalOrdinal> view_type;
522 const view_type permuteToLIDs_ = (revOp == DoForward) ?
523 transfer.getPermuteToLIDs () : transfer.getPermuteFromLIDs ();
524 const view_type permuteFromLIDs_ = (revOp == DoForward) ?
525 transfer.getPermuteFromLIDs () : transfer.getPermuteToLIDs ();
526 const view_type exportLIDs_ = (revOp == DoForward) ?
527 transfer.getExportLIDs () : transfer.getRemoteLIDs ();
528 const view_type remoteLIDs_ = (revOp == DoForward) ?
529 transfer.getRemoteLIDs () : transfer.getExportLIDs ();
532 if (this->useNewInterface ()) {
533 using ::Tpetra::Details::Behavior;
535 const bool commOnHost = ! Behavior::assumeMpiIsCudaAware ();
537 std::ostringstream os;
538 os << *prefix <<
"doTransfer: Use new interface; " 539 "commOnHost=" << (commOnHost ?
"true" :
"false") << endl;
540 std::cerr << os.str ();
551 Kokkos::DualView<LO*, DT> permuteToLIDs =
552 getDualViewCopyFromArrayView<LO, DT> (permuteToLIDs_,
555 Kokkos::DualView<LO*, DT> permuteFromLIDs =
556 getDualViewCopyFromArrayView<LO, DT> (permuteFromLIDs_,
561 Kokkos::DualView<LO*, DT> remoteLIDs =
562 getDualViewCopyFromArrayView<LO, DT> (remoteLIDs_,
565 Kokkos::DualView<LO*, DT> exportLIDs =
566 getDualViewCopyFromArrayView<LO, DT> (exportLIDs_,
569 doTransferNew (src, CM, numSameIDs, permuteToLIDs, permuteFromLIDs,
570 remoteLIDs, exportLIDs, distor, revOp, commOnHost);
574 std::ostringstream os;
575 os << *prefix <<
"doTransfer: Use old interface" << endl;
576 std::cerr << os.str ();
578 doTransferOld (src, CM, numSameIDs, permuteToLIDs_, permuteFromLIDs_,
579 remoteLIDs_, exportLIDs_, distor, revOp);
583 std::ostringstream os;
584 os << *prefix <<
"Tpetra::DistObject::doTransfer: Done!" << endl;
585 std::cerr << os.str ();
589 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
595 const int myRank = this->getMap ()->getComm ()->getRank ();
596 std::ostringstream os;
597 os <<
"(Proc " << myRank <<
") Reallocate (if needed) imports_ from " 598 << imports_.extent (0) <<
" to " << newSize << std::endl;
599 std::cerr << os.str ();
602 const bool reallocated =
605 const int myRank = this->getMap ()->getComm ()->getRank ();
606 std::ostringstream os;
607 os <<
"(Proc " << myRank <<
") Finished reallocating imports_" 609 std::cerr << os.str ();
614 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
618 const size_t numImportLIDs)
627 constexpr
size_t tooBigFactor = 10;
631 const int myRank = this->getMap ()->getComm ()->getRank ();
632 std::ostringstream os;
633 os <<
"(Proc " << myRank <<
") reallocArraysForNumPacketsPerLid before:" 635 <<
"(Proc " << myRank <<
") " 638 <<
"(Proc " << myRank <<
") " 641 std::cerr << os.str ();
645 const bool firstReallocated =
648 "numExportPacketsPerLID",
655 const bool needFenceBeforeNextAlloc = ! firstReallocated;
656 const bool secondReallocated =
659 "numImportPacketsPerLID",
661 needFenceBeforeNextAlloc);
664 const int myRank = this->getMap ()->getComm ()->getRank ();
665 std::ostringstream os;
666 os <<
"(Proc " << myRank <<
") reallocArraysForNumPacketsPerLid before:" 668 <<
"(Proc " << myRank <<
") " 671 <<
"(Proc " << myRank <<
") " 674 std::cerr << os.str ();
677 return firstReallocated || secondReallocated;
680 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
686 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
687 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
688 const Teuchos::ArrayView<const LocalOrdinal>& remoteLIDs,
689 const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
699 constexpr
bool debug =
false;
701 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 702 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
703 #endif // HAVE_TPETRA_TRANSFER_TIMERS 705 TEUCHOS_TEST_FOR_EXCEPTION(
706 ! checkSizes (src), std::invalid_argument,
707 "Tpetra::DistObject::doTransfer(): checkSizes() indicates that the " 708 "destination object is not a legal target for redistribution from the " 709 "source object. This probably means that they do not have the same " 710 "dimensions. For example, MultiVectors must have the same number of " 711 "rows and columns.");
714 const size_t numIDsToWrite = numSameIDs +
715 static_cast<size_t> (permuteToLIDs.size ()) +
716 static_cast<size_t> (remoteLIDs.size ());
717 if (numIDsToWrite == this->getMap ()->getNodeNumElements ()) {
725 rwo = KokkosClassic::WriteOnly;
737 if (srcDistObj != NULL) {
753 this->createViewsNonConst (rwo);
755 if (numSameIDs + permuteToLIDs.size()) {
756 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 757 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
758 #endif // HAVE_TPETRA_TRANSFER_TIMERS 760 copyAndPermute (src, numSameIDs, permuteToLIDs, permuteFromLIDs);
771 size_t constantNumPackets = this->constantNumberOfPackets ();
778 if (constantNumPackets == 0) {
779 this->reallocArraysForNumPacketsPerLid (exportLIDs.size (),
784 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 785 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
786 #endif // HAVE_TPETRA_TRANSFER_TIMERS 792 numExportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
793 Teuchos::ArrayView<size_t> numExportPacketsPerLID =
801 Teuchos::Array<packet_type> exportsOld;
802 packAndPrepare (src, exportLIDs, exportsOld, numExportPacketsPerLID,
803 constantNumPackets, distor);
804 const size_t exportsLen =
static_cast<size_t> (exportsOld.size ());
806 Kokkos::View<
const packet_type*, Kokkos::HostSpace,
807 Kokkos::MemoryUnmanaged> exportsOldK (exportsOld.getRawPtr (),
809 exports_.template modify<Kokkos::HostSpace> ();
819 if (srcDistObj != NULL) {
825 if (constantNumPackets != 0) {
830 const size_t rbufLen = remoteLIDs.size() * constantNumPackets;
832 std::ostringstream os;
833 os <<
"*** doTransferOld: Const # packets: imports_.extent(0) = " 834 << imports_.extent (0) <<
", rbufLen = " << rbufLen
836 std::cerr << os.str ();
838 reallocImportsIfNeeded (rbufLen, debug);
842 bool needCommunication =
true;
843 if (revOp == DoReverse && ! isDistributed ()) {
844 needCommunication =
false;
853 else if (revOp == DoForward && srcDistObj != NULL &&
855 needCommunication =
false;
858 if (needCommunication) {
859 if (revOp == DoReverse) {
860 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 861 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
862 #endif // HAVE_TPETRA_TRANSFER_TIMERS 863 if (constantNumPackets == 0) {
869 numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
870 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
877 numImportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
878 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
881 numImportPacketsPerLID);
882 size_t totalImportPackets = 0;
884 typedef typename Kokkos::DualView<
size_t*,
885 device_type>::t_host::execution_space host_exec_space;
886 typedef Kokkos::RangePolicy<host_exec_space, Array_size_type> range_type;
887 const size_t*
const arrayToSum = numImportPacketsPerLID.getRawPtr ();
888 Kokkos::parallel_reduce (
"Count import packets",
889 range_type (0, numImportPacketsPerLID.size ()),
891 lclSum += arrayToSum[i];
892 }, totalImportPackets);
895 reallocImportsIfNeeded (totalImportPackets, debug);
901 imports_.template modify<Kokkos::HostSpace> ();
902 Teuchos::ArrayView<packet_type> hostImports =
904 exports_.template sync<Kokkos::HostSpace> ();
905 Teuchos::ArrayView<const packet_type> hostExports =
908 numExportPacketsPerLID,
910 numImportPacketsPerLID);
917 imports_.template modify<Kokkos::HostSpace> ();
918 Teuchos::ArrayView<packet_type> hostImports =
920 exports_.template sync<Kokkos::HostSpace> ();
921 Teuchos::ArrayView<const packet_type> hostExports =
929 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 930 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
931 #endif // HAVE_TPETRA_TRANSFER_TIMERS 932 if (constantNumPackets == 0) {
938 numExportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
939 Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
946 numImportPacketsPerLID_.template modify<Kokkos::HostSpace> ();
947 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
950 numImportPacketsPerLID);
951 size_t totalImportPackets = 0;
953 typedef typename Kokkos::DualView<
size_t*,
954 device_type>::t_host::execution_space host_exec_space;
955 typedef Kokkos::RangePolicy<host_exec_space, Array_size_type> range_type;
956 const size_t*
const arrayToSum = numImportPacketsPerLID.getRawPtr ();
957 Kokkos::parallel_reduce (
"Count import packets",
958 range_type (0, numImportPacketsPerLID.size ()),
960 lclSum += arrayToSum[i];
961 }, totalImportPackets);
964 reallocImportsIfNeeded (totalImportPackets, debug);
970 imports_.template modify<Kokkos::HostSpace> ();
971 Teuchos::ArrayView<packet_type> hostImports =
973 exports_.template sync<Kokkos::HostSpace> ();
974 Teuchos::ArrayView<const packet_type> hostExports =
977 numExportPacketsPerLID,
979 numImportPacketsPerLID);
986 imports_.template modify<Kokkos::HostSpace> ();
987 Teuchos::ArrayView<packet_type> hostImports =
989 exports_.template sync<Kokkos::HostSpace> ();
990 Teuchos::ArrayView<const packet_type> hostExports =
998 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 999 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1000 #endif // HAVE_TPETRA_TRANSFER_TIMERS 1005 imports_.template modify<Kokkos::HostSpace> ();
1006 Teuchos::ArrayView<packet_type> hostImports =
1011 numImportPacketsPerLID_.template sync<Kokkos::HostSpace> ();
1015 Teuchos::ArrayView<size_t> numImportPacketsPerLID =
1017 unpackAndCombine (remoteLIDs, hostImports, numImportPacketsPerLID,
1018 constantNumPackets, distor, CM);
1023 this->releaseViews ();
1027 template<
class DeviceType,
class IndexType =
size_t>
1029 SumFunctor (
const Kokkos::View<const size_t*, DeviceType>& viewToSum) :
1030 viewToSum_ (viewToSum) {}
1031 KOKKOS_FUNCTION
void operator() (
const IndexType& i,
size_t& lclSum)
const {
1032 lclSum += viewToSum_(i);
1034 Kokkos::View<const size_t*, DeviceType> viewToSum_;
1037 template<
class DeviceType,
class IndexType =
size_t>
1039 countTotalImportPackets (
const Kokkos::View<const size_t*, DeviceType>& numImportPacketsPerLID)
1041 using Kokkos::parallel_reduce;
1042 typedef DeviceType DT;
1043 typedef typename DT::execution_space DES;
1044 typedef Kokkos::RangePolicy<DES, IndexType> range_type;
1046 const IndexType numOut = numImportPacketsPerLID.extent (0);
1047 size_t totalImportPackets = 0;
1048 parallel_reduce (
"Count import packets",
1049 range_type (0, numOut),
1050 SumFunctor<DeviceType, IndexType> (numImportPacketsPerLID),
1051 totalImportPackets);
1052 return totalImportPackets;
1056 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1061 const size_t numSameIDs,
1072 const bool commOnHost)
1076 using Kokkos::Compat::getArrayView;
1077 using Kokkos::Compat::getConstArrayView;
1078 using Kokkos::Compat::getKokkosViewDeepCopy;
1079 using Kokkos::Compat::create_const_view;
1081 typedef LocalOrdinal LO;
1084 typedef typename Kokkos::DualView<LO*, DT>::t_dev::execution_space DES;
1093 typedef typename Kokkos::DualView<
size_t*,
1095 typedef typename Kokkos::DualView<
size_t*,
1104 std::unique_ptr<std::string> prefix;
1106 auto map = this->getMap ();
1107 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1108 const int myRank = comm.is_null () ? 0 : comm->getRank ();
1109 std::ostringstream os;
1110 os <<
"(Proc " << myRank <<
") ";
1111 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1115 std::ostringstream os;
1116 os << *prefix <<
"Tpetra::CrsMatrix::doTransferNew: Input arguments:" << endl
1118 << *prefix <<
" numSameIDs: " << numSameIDs << endl
1127 << *prefix <<
" revOp: Do" << (revOp == DoReverse ?
"Reverse" :
"Forward") << endl
1128 << *prefix <<
" commOnHost: " << (commOnHost ?
"true" :
"false") << endl;
1129 std::cerr << os.str ();
1132 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 1133 Teuchos::TimeMonitor doXferMon (*doXferTimer_);
1134 #endif // HAVE_TPETRA_TRANSFER_TIMERS 1138 std::ostringstream os;
1139 os << *prefix <<
"1. checkSizes" << endl;
1140 std::cerr << os.str ();
1142 const bool checkSizesResult = this->checkSizes (src);
1143 TEUCHOS_TEST_FOR_EXCEPTION
1144 (! checkSizesResult, std::invalid_argument,
1145 "Tpetra::DistObject::doTransfer: checkSizes() indicates that the " 1146 "destination object is not a legal target for redistribution from the " 1147 "source object. This probably means that they do not have the same " 1148 "dimensions. For example, MultiVectors must have the same number of " 1149 "rows and columns.");
1156 if (numSameIDs + permuteToLIDs.extent (0) != 0) {
1159 std::ostringstream os;
1160 os << *prefix <<
"2. copyAndPermuteNew" << endl;
1161 std::cerr << os.str ();
1164 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 1165 Teuchos::TimeMonitor copyAndPermuteMon (*copyAndPermuteTimer_);
1166 #endif // HAVE_TPETRA_TRANSFER_TIMERS 1167 this->copyAndPermuteNew (src, numSameIDs, permuteToLIDs,
1171 std::ostringstream os;
1172 os << *prefix <<
"After copyAndPermuteNew:" << endl
1179 std::cerr << os.str ();
1191 size_t constantNumPackets = this->constantNumberOfPackets ();
1193 std::ostringstream os;
1194 os << *prefix <<
"constantNumPackets=" << constantNumPackets << endl;
1195 std::cerr << os.str ();
1203 if (constantNumPackets == 0) {
1205 std::ostringstream os;
1206 os << *prefix <<
"3. (Re)allocate num{Ex,Im}portPacketsPerLID" 1208 std::cerr << os.str ();
1212 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1213 remoteLIDs.extent (0));
1217 std::ostringstream os;
1218 os << *prefix <<
"4. packAndPrepareNew: before, " 1221 std::cerr << os.str ();
1224 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 1225 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1226 #endif // HAVE_TPETRA_TRANSFER_TIMERS 1232 this->packAndPrepareNew (src, exportLIDs, this->exports_,
1233 this->numExportPacketsPerLID_,
1234 constantNumPackets, distor);
1239 typedef typename Kokkos::View<char*, buffer_device_type>::HostMirror::device_type
1240 buffer_host_device_type;
1241 typedef typename buffer_host_device_type::memory_space
1242 buffer_host_memory_space;
1243 this->exports_.template sync<buffer_host_memory_space> ();
1246 typedef typename buffer_device_type::memory_space buffer_dev_memory_space;
1247 this->exports_.template sync<buffer_dev_memory_space> ();
1251 std::ostringstream os;
1252 os << *prefix <<
"5.1. After packAndPrepareNew, " 1255 std::cerr << os.str ();
1261 if (constantNumPackets != 0) {
1263 std::ostringstream os;
1264 os << *prefix <<
"6. Realloc imports_" << std::endl;
1265 std::cerr << os.str ();
1271 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1272 reallocImportsIfNeeded (rbufLen, verbose);
1276 bool needCommunication =
true;
1281 if (revOp == DoReverse && ! this->isDistributed ()) {
1282 needCommunication =
false;
1291 else if (revOp == DoForward && srcDistObj != NULL &&
1293 needCommunication =
false;
1297 std::ostringstream os;
1298 os << *prefix <<
"needCommunication=" 1299 << (needCommunication ?
"true" :
"false") << endl;
1300 std::cerr << os.str ();
1311 if (needCommunication) {
1312 if (revOp == DoReverse) {
1314 std::ostringstream os;
1315 os << *prefix <<
"7.0. Reverse mode" << endl;
1316 std::cerr << os.str ();
1318 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 1319 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1320 #endif // HAVE_TPETRA_TRANSFER_TIMERS 1321 if (constantNumPackets == 0) {
1323 std::ostringstream os;
1324 os << *prefix <<
"7.1. Variable # packets / LID: first comm " 1325 <<
"(commOnHost = " << (commOnHost ?
"true" :
"false") <<
")" 1327 std::cerr << os.str ();
1329 size_t totalImportPackets = 0;
1331 this->numExportPacketsPerLID_.template sync<CHMS> ();
1332 this->numImportPacketsPerLID_.template sync<CHMS> ();
1333 this->numImportPacketsPerLID_.template modify<CHMS> ();
1334 auto numExp_h = create_const_view (this->numExportPacketsPerLID_.template view<CHMS> ());
1335 auto numImp_h = this->numImportPacketsPerLID_.template view<CHMS> ();
1341 typedef typename decltype (numImp_h)::
device_type the_dev_type;
1342 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1345 this->numExportPacketsPerLID_.template sync<CDMS> ();
1346 this->numImportPacketsPerLID_.template sync<CDMS> ();
1347 this->numImportPacketsPerLID_.template modify<CDMS> ();
1348 auto numExp_d = create_const_view (this->numExportPacketsPerLID_.template view<CDMS> ());
1349 auto numImp_d = this->numImportPacketsPerLID_.template view<CDMS> ();
1355 typedef typename decltype (numImp_d)::
device_type the_dev_type;
1356 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1360 std::ostringstream os;
1361 os << *prefix <<
"totalImportPackets=" << totalImportPackets
1363 std::cerr << os.str ();
1365 this->reallocImportsIfNeeded (totalImportPackets, verbose);
1367 std::ostringstream os;
1368 os << *prefix <<
"7.3. Second comm" << std::endl;
1369 std::cerr << os.str ();
1376 this->numExportPacketsPerLID_.template sync<CHMS> ();
1377 this->numImportPacketsPerLID_.template sync<CHMS> ();
1383 auto numExportPacketsPerLID_av =
1385 auto numImportPacketsPerLID_av =
1393 this->imports_.modified_device() = 0;
1394 this->imports_.modified_host() = 0;
1397 this->imports_.template modify<CHMS> ();
1399 numExportPacketsPerLID_av,
1400 this->imports_.template view<CHMS> (),
1401 numImportPacketsPerLID_av);
1404 this->imports_.template modify<CDMS> ();
1406 numExportPacketsPerLID_av,
1407 this->imports_.template view<CDMS> (),
1408 numImportPacketsPerLID_av);
1413 std::ostringstream os;
1414 os << *prefix <<
"7.1. Const # packets per LID: " << endl
1421 std::cerr << os.str ();
1429 this->imports_.modified_device() = 0;
1430 this->imports_.modified_host() = 0;
1433 this->imports_.template modify<CHMS> ();
1436 this->imports_.template view<CHMS> ());
1439 this->imports_.template modify<CDMS> ();
1442 this->imports_.template view<CDMS> ());
1448 std::cerr <<
">>> 7.0. Forward mode" << std::endl;
1451 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 1452 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1453 #endif // HAVE_TPETRA_TRANSFER_TIMERS 1454 if (constantNumPackets == 0) {
1456 std::cerr <<
">>> 7.1. Variable # packets / LID: first comm" << std::endl;
1459 size_t totalImportPackets = 0;
1461 this->numExportPacketsPerLID_.template sync<CHMS> ();
1462 this->numImportPacketsPerLID_.template sync<CHMS> ();
1463 this->numImportPacketsPerLID_.template modify<CHMS> ();
1464 auto numExp_h = create_const_view (this->numExportPacketsPerLID_.template view<CHMS> ());
1465 auto numImp_h = this->numImportPacketsPerLID_.template view<CHMS> ();
1471 typedef typename decltype (numImp_h)::
device_type the_dev_type;
1472 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1475 this->numExportPacketsPerLID_.template sync<CDMS> ();
1476 this->numImportPacketsPerLID_.template sync<CDMS> ();
1477 this->numImportPacketsPerLID_.template modify<CDMS> ();
1478 auto numExp_d = create_const_view (this->numExportPacketsPerLID_.template view<CDMS> ());
1479 auto numImp_d = this->numImportPacketsPerLID_.template view<CDMS> ();
1485 typedef typename decltype (numImp_d)::
device_type the_dev_type;
1486 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1489 this->reallocImportsIfNeeded (totalImportPackets, verbose);
1492 std::cerr <<
">>> 7.3. Second comm" << std::endl;
1499 this->numExportPacketsPerLID_.template sync<CHMS> ();
1500 this->numImportPacketsPerLID_.template sync<CHMS> ();
1506 auto numExportPacketsPerLID_av =
1508 auto numImportPacketsPerLID_av =
1516 this->imports_.modified_device() = 0;
1517 this->imports_.modified_host() = 0;
1520 this->imports_.template modify<CHMS> ();
1521 distor.
doPostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1522 numExportPacketsPerLID_av,
1523 this->imports_.template view<CHMS> (),
1524 numImportPacketsPerLID_av);
1527 this->imports_.template modify<CDMS> ();
1528 distor.
doPostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1529 numExportPacketsPerLID_av,
1530 this->imports_.template view<CDMS> (),
1531 numImportPacketsPerLID_av);
1536 std::ostringstream os;
1537 os << *prefix <<
"7.1. Const # packets per LID: " 1538 <<
"exports_.extent(0)=" << exports_.extent (0)
1539 <<
", imports_.extent(0) = " << imports_.extent (0)
1541 std::cerr << os.str ();
1548 this->imports_.modified_device() = 0;
1549 this->imports_.modified_host() = 0;
1553 std::ostringstream os;
1554 os << *prefix <<
"7.2. Comm buffers on host" << endl;
1555 std::cerr << os.str ();
1557 this->imports_.template modify<CHMS> ();
1558 distor.
doPostsAndWaits (create_const_view (this->exports_.template view<CHMS> ()),
1560 this->imports_.template view<CHMS> ());
1564 std::ostringstream os;
1565 os << *prefix <<
"7.2. Comm buffers on device" << endl;
1566 std::cerr << os.str ();
1568 this->imports_.template modify<CDMS> ();
1569 distor.
doPostsAndWaits (create_const_view (this->exports_.template view<CDMS> ()),
1571 this->imports_.template view<CDMS> ());
1578 std::ostringstream os;
1579 os << *prefix <<
"8. unpackAndCombineNew" << endl;
1580 std::cerr << os.str ();
1582 #ifdef HAVE_TPETRA_TRANSFER_TIMERS 1583 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1584 #endif // HAVE_TPETRA_TRANSFER_TIMERS 1592 this->unpackAndCombineNew (remoteLIDs, this->imports_,
1593 this->numImportPacketsPerLID_,
1594 constantNumPackets, distor, CM);
1600 std::ostringstream os;
1601 os << *prefix <<
"9. Done!" << endl;
1602 std::cerr << os.str ();
1606 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1611 using Teuchos::FancyOStream;
1612 using Teuchos::getFancyOStream;
1614 using Teuchos::rcpFromRef;
1617 RCP<FancyOStream> out = getFancyOStream (rcpFromRef (os));
1618 this->describe (*out, Teuchos::VERB_DEFAULT);
1621 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1627 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1633 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1641 template<
class DistObjectType>
1644 const Teuchos::RCP<
const Map<
typename DistObjectType::local_ordinal_type,
1645 typename DistObjectType::global_ordinal_type,
1646 typename DistObjectType::node_type> >& newMap)
1648 input->removeEmptyProcessesInPlace (newMap);
1649 if (newMap.is_null ()) {
1650 input = Teuchos::null;
1654 template<
class DistObjectType>
1659 typedef typename DistObjectType::local_ordinal_type LO;
1660 typedef typename DistObjectType::global_ordinal_type GO;
1661 typedef typename DistObjectType::node_type NT;
1665 removeEmptyProcessesInPlace<DistObjectType> (input, newMap);
1669 #define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \ 1670 namespace Classes { template class DistObject< SCALAR , LO , GO , NODE >; } 1674 #define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \ 1675 namespace Classes { template class DistObject< char , LO , GO , NODE >; } 1679 #endif // TPETRA_DISTOBJECT_DEF_HPP Base class for distributed Tpetra objects that support data redistribution.
void doPostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the (forward) communication plan.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
LocalOrdinal local_ordinal_type
The type of local indices.
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
static bool debug()
Whether Tpetra is in debug mode.
Teuchos_Ordinal Array_size_type
Size type for Teuchos Array objects.
void doReversePostsAndWaits(const Teuchos::ArrayView< const Packet > &exports, size_t numPackets, const Teuchos::ArrayView< Packet > &imports)
Execute the reverse communication plan.
::Tpetra::Classes::DistObject< Packet, LocalOrdinal, GlobalOrdinal, Node > DistObject
Alias for Tpetra::Classes::DistObject.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Insert new values that don't currently exist.
bool isDistributed() const
Whether this is a globally distributed object.
Kokkos::DualView< T *, DT > getDualViewCopyFromArrayView(const Teuchos::ArrayView< const T > &x_av, const char label[], const bool leaveOnHost)
Get a 1-D Kokkos::DualView which is a deep copy of the input Teuchos::ArrayView (which views host mem...
virtual void releaseViews() const
Hook for releasing views.
Sets up and executes a communication plan for a Tpetra DistObject.
static bool verbose()
Whether Tpetra is in verbose mode.
CombineMode
Rule for combining data in an Import or Export.
bool reallocDualViewIfNeeded(Kokkos::DualView< ValueType *, DeviceType > &dv, const size_t newSize, const char newLabel[], const size_t tooBigFactor=2, const bool needFenceBeforeRealloc=true)
Reallocate the DualView in/out argument, if needed.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > removeEmptyProcesses() const
Return a new Map with processes with zero elements removed.
Node::device_type device_type
The Kokkos Device type.
Abstract base class for objects that can be the source of an Import or Export operation.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
virtual void createViews() const
Hook for creating a const view.
ReadWriteOption
Read/write options for non-const views.
Replace old values with zero.
std::string combineModeToString(const CombineMode combineMode)
Human-readable string representation of the given CombineMode.
::Kokkos::Details::ArithTraits< GlobalOrdinal >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
std::string dualViewStatusToString(const DualViewType &dv, const char name[])
Return the status of the given Kokkos::DualView, as a human-readable string.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
A parallel distribution of indices over processes.
ReverseOption
Whether the data transfer should be performed in forward or reverse mode.
DistObject(const Teuchos::RCP< const map_type > &map)
Constructor.
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.