42 #ifndef TPETRA_MULTIVECTOR_DEF_HPP
43 #define TPETRA_MULTIVECTOR_DEF_HPP
54 #include "Tpetra_Vector.hpp"
57 #include "Tpetra_Details_checkView.hpp"
59 #include "Tpetra_Details_gathervPrint.hpp"
66 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
67 # include "Teuchos_SerialDenseMatrix.hpp"
68 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
69 #include "Tpetra_KokkosRefactor_Details_MultiVectorDistObjectKernels.hpp"
70 #include "KokkosCompat_View.hpp"
71 #include "KokkosBlas.hpp"
72 #include "KokkosKernels_Utils.hpp"
73 #include "Kokkos_Random.hpp"
74 #include "Kokkos_ArithTraits.hpp"
78 #ifdef HAVE_TPETRA_INST_FLOAT128
81 template<
class Generator>
82 struct rand<Generator, __float128> {
83 static KOKKOS_INLINE_FUNCTION __float128 max ()
85 return static_cast<__float128
> (1.0);
87 static KOKKOS_INLINE_FUNCTION __float128
92 const __float128 scalingFactor =
93 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
94 static_cast<__float128
> (2.0);
95 const __float128 higherOrderTerm =
static_cast<__float128
> (gen.drand ());
96 const __float128 lowerOrderTerm =
97 static_cast<__float128
> (gen.drand ()) * scalingFactor;
98 return higherOrderTerm + lowerOrderTerm;
100 static KOKKOS_INLINE_FUNCTION __float128
101 draw (Generator& gen,
const __float128& range)
104 const __float128 scalingFactor =
105 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
106 static_cast<__float128
> (2.0);
107 const __float128 higherOrderTerm =
108 static_cast<__float128
> (gen.drand (range));
109 const __float128 lowerOrderTerm =
110 static_cast<__float128
> (gen.drand (range)) * scalingFactor;
111 return higherOrderTerm + lowerOrderTerm;
113 static KOKKOS_INLINE_FUNCTION __float128
114 draw (Generator& gen,
const __float128& start,
const __float128& end)
117 const __float128 scalingFactor =
118 static_cast<__float128
> (std::numeric_limits<double>::min ()) /
119 static_cast<__float128
> (2.0);
120 const __float128 higherOrderTerm =
121 static_cast<__float128
> (gen.drand (start, end));
122 const __float128 lowerOrderTerm =
123 static_cast<__float128
> (gen.drand (start, end)) * scalingFactor;
124 return higherOrderTerm + lowerOrderTerm;
128 #endif // HAVE_TPETRA_INST_FLOAT128
146 template<
class ST,
class LO,
class GO,
class NT>
148 allocDualView (
const size_t lclNumRows,
149 const size_t numCols,
150 const bool zeroOut =
true,
151 const bool allowPadding =
false)
153 using ::Tpetra::Details::Behavior;
154 using Kokkos::AllowPadding;
155 using Kokkos::view_alloc;
156 using Kokkos::WithoutInitializing;
158 typedef typename dual_view_type::t_dev dev_view_type;
163 const std::string label (
"MV::DualView");
164 const bool debug = Behavior::debug ();
174 dev_view_type d_view;
177 d_view = dev_view_type (view_alloc (label, AllowPadding),
178 lclNumRows, numCols);
181 d_view = dev_view_type (view_alloc (label),
182 lclNumRows, numCols);
187 d_view = dev_view_type (view_alloc (label,
190 lclNumRows, numCols);
193 d_view = dev_view_type (view_alloc (label, WithoutInitializing),
194 lclNumRows, numCols);
205 const ST nan = Kokkos::Details::ArithTraits<ST>::nan ();
206 KokkosBlas::fill (d_view, nan);
210 TEUCHOS_TEST_FOR_EXCEPTION
211 (
static_cast<size_t> (d_view.extent (0)) != lclNumRows ||
212 static_cast<size_t> (d_view.extent (1)) != numCols, std::logic_error,
213 "allocDualView: d_view's dimensions actual dimensions do not match "
214 "requested dimensions. d_view is " << d_view.extent (0) <<
" x " <<
215 d_view.extent (1) <<
"; requested " << lclNumRows <<
" x " << numCols
216 <<
". Please report this bug to the Tpetra developers.");
219 dual_view_type dv (d_view, Kokkos::create_mirror_view (d_view));
233 template<
class T,
class ExecSpace>
234 struct MakeUnmanagedView {
247 typedef typename Kokkos::Impl::if_c<
248 Kokkos::Impl::VerifyExecutionCanAccessMemorySpace<
249 typename ExecSpace::memory_space,
250 Kokkos::HostSpace>::value,
251 typename ExecSpace::device_type,
252 typename Kokkos::DualView<T*, ExecSpace>::host_mirror_space>::type host_exec_space;
253 typedef Kokkos::LayoutLeft array_layout;
254 typedef Kokkos::View<T*, array_layout, host_exec_space,
255 Kokkos::MemoryUnmanaged> view_type;
257 static view_type getView (
const Teuchos::ArrayView<T>& x_in)
259 const size_t numEnt =
static_cast<size_t> (x_in.size ());
263 return view_type (x_in.getRawPtr (), numEnt);
271 template<
class DualViewType>
273 takeSubview (
const DualViewType& X,
274 const Kokkos::Impl::ALL_t&,
275 const std::pair<size_t, size_t>& colRng)
277 if (X.extent (0) == 0 && X.extent (1) != 0) {
278 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
281 return subview (X, Kokkos::ALL (), colRng);
288 template<
class DualViewType>
290 takeSubview (
const DualViewType& X,
291 const std::pair<size_t, size_t>& rowRng,
292 const std::pair<size_t, size_t>& colRng)
294 if (X.extent (0) == 0 && X.extent (1) != 0) {
295 return DualViewType (
"MV::DualView", 0, colRng.second - colRng.first);
298 return subview (X, rowRng, colRng);
302 template<
class DualViewType>
304 getDualViewStride (
const DualViewType& dv)
308 const size_t LDA = dv.d_view.stride (1);
309 const size_t numRows = dv.extent (0);
312 return (numRows == 0) ? size_t (1) : numRows;
319 template<
class ViewType>
321 getViewStride (
const ViewType& view)
323 const size_t LDA = view.stride (1);
324 const size_t numRows = view.extent (0);
327 return (numRows == 0) ? size_t (1) : numRows;
334 template <
class SC,
class LO,
class GO,
class NT>
336 multiVectorRunNormOnHost (const ::Tpetra::MultiVector<SC, LO, GO, NT>& X)
338 if (! X.need_sync_device ()) {
342 constexpr
size_t localLengthThreshold = 10000;
343 return X.getLocalLength () <= localLengthThreshold;
347 template <
class SC,
class LO,
class GO,
class NT>
349 multiVectorNormImpl (typename ::Tpetra::MultiVector<SC, LO, GO, NT>::mag_type norms[],
355 using val_type =
typename MV::impl_scalar_type;
356 using mag_type =
typename MV::mag_type;
357 using dual_view_type =
typename MV::dual_view_type;
360 auto comm = map.is_null () ? nullptr : map->getComm ().getRawPtr ();
361 auto whichVecs = getMultiVectorWhichVectors (X);
365 const bool runOnHost = multiVectorRunNormOnHost (X);
367 using view_type =
typename dual_view_type::t_host;
368 using array_layout =
typename view_type::array_layout;
369 using device_type =
typename view_type::device_type;
375 normImpl<val_type, array_layout, device_type,
376 mag_type> (norms, X_lcl, whichNorm, whichVecs,
377 isConstantStride, isDistributed, comm);
380 using view_type =
typename dual_view_type::t_dev;
381 using array_layout =
typename view_type::array_layout;
382 using device_type =
typename view_type::device_type;
388 normImpl<val_type, array_layout, device_type,
389 mag_type> (norms, X_lcl, whichNorm, whichVecs,
390 isConstantStride, isDistributed, comm);
398 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
400 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
401 vectorIndexOutOfRange (
const size_t VectorIndex)
const {
402 return (VectorIndex < 1 && VectorIndex != 0) || VectorIndex >= getNumVectors();
405 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
411 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
413 MultiVector (
const Teuchos::RCP<const map_type>& map,
414 const size_t numVecs,
415 const bool zeroOut) :
421 view_ = allocDualView<Scalar, LocalOrdinal, GlobalOrdinal, Node> (lclNumRows, numVecs, zeroOut);
425 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
428 const Teuchos::DataAccess copyOrView) :
430 view_ (source.view_),
431 origView_ (source.origView_),
432 whichVectors_ (source.whichVectors_)
435 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, "
436 "const Teuchos::DataAccess): ";
440 if (copyOrView == Teuchos::Copy) {
444 this->
view_ = cpy.view_;
448 else if (copyOrView == Teuchos::View) {
451 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
452 true, std::invalid_argument,
"Second argument 'copyOrView' has an "
453 "invalid value " << copyOrView <<
". Valid values include "
454 "Teuchos::Copy = " << Teuchos::Copy <<
" and Teuchos::View = "
455 << Teuchos::View <<
".");
459 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
461 MultiVector (
const Teuchos::RCP<const map_type>& map,
467 const char tfecfFuncName[] =
"MultiVector(Map,DualView): ";
468 const size_t lclNumRows_map = map.is_null () ? size_t (0) :
469 map->getNodeNumElements ();
470 const size_t lclNumRows_view = view.extent (0);
471 const size_t LDA = getDualViewStride (view);
473 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
474 (LDA < lclNumRows_map || lclNumRows_map != lclNumRows_view,
475 std::invalid_argument,
"Kokkos::DualView does not match Map. "
476 "map->getNodeNumElements() = " << lclNumRows_map
477 <<
", view.extent(0) = " << lclNumRows_view
478 <<
", and getStride() = " << LDA <<
".");
480 using ::Tpetra::Details::Behavior;
481 const bool debug = Behavior::debug ();
483 using ::Tpetra::Details::checkGlobalDualViewValidity;
484 std::ostringstream gblErrStrm;
485 const bool verbose = Behavior::verbose ();
486 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
487 const bool gblValid =
488 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
490 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
491 (! gblValid, std::runtime_error, gblErrStrm.str ());
496 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
498 MultiVector (
const Teuchos::RCP<const map_type>& map,
499 const typename dual_view_type::t_dev& d_view) :
502 using Teuchos::ArrayRCP;
504 const char tfecfFuncName[] =
"MultiVector(map,d_view): ";
508 const size_t LDA = getViewStride (d_view);
509 const size_t lclNumRows = map->getNodeNumElements ();
510 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
511 (LDA < lclNumRows, std::invalid_argument,
"Map does not match "
512 "Kokkos::View. map->getNodeNumElements() = " << lclNumRows
513 <<
", View's column stride = " << LDA
514 <<
", and View's extent(0) = " << d_view.extent (0) <<
".");
516 auto h_view = Kokkos::create_mirror_view (d_view);
519 using ::Tpetra::Details::Behavior;
520 const bool debug = Behavior::debug ();
522 using ::Tpetra::Details::checkGlobalDualViewValidity;
523 std::ostringstream gblErrStrm;
524 const bool verbose = Behavior::verbose ();
525 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
526 const bool gblValid =
527 checkGlobalDualViewValidity (&gblErrStrm,
view_, verbose,
529 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
530 (! gblValid, std::runtime_error, gblErrStrm.str ());
539 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
541 MultiVector (
const Teuchos::RCP<const map_type>& map,
548 const char tfecfFuncName[] =
"MultiVector(map,view,origView): ";
550 const size_t LDA = getDualViewStride (origView);
552 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
553 LDA < lclNumRows, std::invalid_argument,
"The input Kokkos::DualView's "
554 "column stride LDA = " << LDA <<
" < getLocalLength() = " << lclNumRows
555 <<
". This may also mean that the input origView's first dimension (number "
556 "of rows = " << origView.extent (0) <<
") does not not match the number "
557 "of entries in the Map on the calling process.");
559 using ::Tpetra::Details::Behavior;
560 const bool debug = Behavior::debug ();
562 using ::Tpetra::Details::checkGlobalDualViewValidity;
563 std::ostringstream gblErrStrm;
564 const bool verbose = Behavior::verbose ();
565 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
566 const bool gblValid_0 =
567 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
569 const bool gblValid_1 =
570 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
572 const bool gblValid = gblValid_0 && gblValid_1;
573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
574 (! gblValid, std::runtime_error, gblErrStrm.str ());
579 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
581 MultiVector (
const Teuchos::RCP<const map_type>& map,
583 const Teuchos::ArrayView<const size_t>& whichVectors) :
587 whichVectors_ (whichVectors.begin (), whichVectors.end ())
590 using Kokkos::subview;
591 const char tfecfFuncName[] =
"MultiVector(map,view,whichVectors): ";
593 using ::Tpetra::Details::Behavior;
594 const bool debug = Behavior::debug ();
596 using ::Tpetra::Details::checkGlobalDualViewValidity;
597 std::ostringstream gblErrStrm;
598 const bool verbose = Behavior::verbose ();
599 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
600 const bool gblValid =
601 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
603 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
604 (! gblValid, std::runtime_error, gblErrStrm.str ());
607 const size_t lclNumRows = map.is_null () ? size_t (0) :
608 map->getNodeNumElements ();
615 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
616 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
617 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
618 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
619 if (whichVectors.size () != 0) {
620 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
621 view.extent (1) != 0 && view.extent (1) == 0,
622 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
623 " = " << whichVectors.size () <<
" > 0.");
624 size_t maxColInd = 0;
625 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
626 for (size_type k = 0; k < whichVectors.size (); ++k) {
627 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
628 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
629 std::invalid_argument,
"whichVectors[" << k <<
"] = "
630 "Teuchos::OrdinalTraits<size_t>::invalid().");
631 maxColInd = std::max (maxColInd, whichVectors[k]);
633 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
634 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
635 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
636 <<
" <= max(whichVectors) = " << maxColInd <<
".");
641 const size_t LDA = getDualViewStride (view);
642 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
643 (LDA < lclNumRows, std::invalid_argument,
644 "LDA = " << LDA <<
" < this->getLocalLength() = " << lclNumRows <<
".");
646 if (whichVectors.size () == 1) {
655 const std::pair<size_t, size_t> colRng (whichVectors[0],
656 whichVectors[0] + 1);
657 view_ = takeSubview (view_, ALL (), colRng);
658 origView_ = takeSubview (origView_, ALL (), colRng);
660 whichVectors_.clear ();
664 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
666 MultiVector (
const Teuchos::RCP<const map_type>& map,
669 const Teuchos::ArrayView<const size_t>& whichVectors) :
672 origView_ (origView),
673 whichVectors_ (whichVectors.begin (), whichVectors.end ())
676 using Kokkos::subview;
677 const char tfecfFuncName[] =
"MultiVector(map,view,origView,whichVectors): ";
679 using ::Tpetra::Details::Behavior;
680 const bool debug = Behavior::debug ();
682 using ::Tpetra::Details::checkGlobalDualViewValidity;
683 std::ostringstream gblErrStrm;
684 const bool verbose = Behavior::verbose ();
685 const auto comm = map.is_null () ? Teuchos::null : map->getComm ();
686 const bool gblValid_0 =
687 checkGlobalDualViewValidity (&gblErrStrm, view, verbose,
689 const bool gblValid_1 =
690 checkGlobalDualViewValidity (&gblErrStrm, origView, verbose,
692 const bool gblValid = gblValid_0 && gblValid_1;
693 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
694 (! gblValid, std::runtime_error, gblErrStrm.str ());
704 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
705 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (0)) < lclNumRows,
706 std::invalid_argument,
"view.extent(0) = " << view.extent (0)
707 <<
" < map->getNodeNumElements() = " << lclNumRows <<
".");
708 if (whichVectors.size () != 0) {
709 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
710 view.extent (1) != 0 && view.extent (1) == 0,
711 std::invalid_argument,
"view.extent(1) = 0, but whichVectors.size()"
712 " = " << whichVectors.size () <<
" > 0.");
713 size_t maxColInd = 0;
714 typedef Teuchos::ArrayView<const size_t>::size_type size_type;
715 for (size_type k = 0; k < whichVectors.size (); ++k) {
716 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
717 whichVectors[k] == Teuchos::OrdinalTraits<size_t>::invalid (),
718 std::invalid_argument,
"whichVectors[" << k <<
"] = "
719 "Teuchos::OrdinalTraits<size_t>::invalid().");
720 maxColInd = std::max (maxColInd, whichVectors[k]);
722 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
723 view.extent (1) != 0 &&
static_cast<size_t> (view.extent (1)) <= maxColInd,
724 std::invalid_argument,
"view.extent(1) = " << view.extent (1)
725 <<
" <= max(whichVectors) = " << maxColInd <<
".");
730 const size_t LDA = getDualViewStride (origView);
731 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
732 (LDA < lclNumRows, std::invalid_argument,
"Map and DualView origView "
733 "do not match. LDA = " << LDA <<
" < this->getLocalLength() = " <<
734 lclNumRows <<
". origView.extent(0) = " << origView.extent(0)
735 <<
", origView.stride(1) = " << origView.d_view.stride(1) <<
".");
737 if (whichVectors.size () == 1) {
746 const std::pair<size_t, size_t> colRng (whichVectors[0],
747 whichVectors[0] + 1);
755 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
757 MultiVector (
const Teuchos::RCP<const map_type>& map,
758 const Teuchos::ArrayView<const Scalar>& data,
760 const size_t numVecs) :
763 typedef LocalOrdinal LO;
764 typedef GlobalOrdinal GO;
765 const char tfecfFuncName[] =
"MultiVector(map,data,LDA,numVecs): ";
771 const size_t lclNumRows =
772 map.is_null () ? size_t (0) : map->getNodeNumElements ();
773 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
774 (LDA < lclNumRows, std::invalid_argument,
"LDA = " << LDA <<
" < "
775 "map->getNodeNumElements() = " << lclNumRows <<
".");
777 const size_t minNumEntries = LDA * (numVecs - 1) + lclNumRows;
778 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
779 (
static_cast<size_t> (data.size ()) < minNumEntries,
780 std::invalid_argument,
"Input Teuchos::ArrayView does not have enough "
781 "entries, given the input Map and number of vectors in the MultiVector."
782 " data.size() = " << data.size () <<
" < (LDA*(numVecs-1)) + "
783 "map->getNodeNumElements () = " << minNumEntries <<
".");
786 this->
view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
799 Kokkos::MemoryUnmanaged> X_in_orig (X_in_raw, LDA, numVecs);
800 const Kokkos::pair<size_t, size_t> rowRng (0, lclNumRows);
801 auto X_in = Kokkos::subview (X_in_orig, rowRng, Kokkos::ALL ());
806 const size_t outStride =
807 X_out.extent (1) == 0 ? size_t (1) : X_out.stride (1);
808 if (LDA == outStride) {
814 typedef decltype (Kokkos::subview (X_out, Kokkos::ALL (), 0))
816 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0))
818 for (
size_t j = 0; j < numVecs; ++j) {
819 out_col_view_type X_out_j = Kokkos::subview (X_out, Kokkos::ALL (), j);
820 in_col_view_type X_in_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
826 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
828 MultiVector (
const Teuchos::RCP<const map_type>& map,
829 const Teuchos::ArrayView<
const Teuchos::ArrayView<const Scalar> >& ArrayOfPtrs,
830 const size_t numVecs) :
834 typedef LocalOrdinal LO;
835 typedef GlobalOrdinal GO;
836 const char tfecfFuncName[] =
"MultiVector(map,ArrayOfPtrs,numVecs): ";
839 const size_t lclNumRows =
840 map.is_null () ? size_t (0) : map->getNodeNumElements ();
841 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
842 (numVecs < 1 || numVecs !=
static_cast<size_t> (ArrayOfPtrs.size ()),
843 std::runtime_error,
"Either numVecs (= " << numVecs <<
") < 1, or "
844 "ArrayOfPtrs.size() (= " << ArrayOfPtrs.size () <<
") != numVecs.");
845 for (
size_t j = 0; j < numVecs; ++j) {
846 Teuchos::ArrayView<const Scalar> X_j_av = ArrayOfPtrs[j];
847 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848 static_cast<size_t> (X_j_av.size ()) < lclNumRows,
849 std::invalid_argument,
"ArrayOfPtrs[" << j <<
"].size() = "
850 << X_j_av.size () <<
" < map->getNodeNumElements() = " << lclNumRows
854 view_ = allocDualView<Scalar, LO, GO, Node> (lclNumRows, numVecs);
860 using array_layout =
typename decltype (X_out)::array_layout;
861 using input_col_view_type =
typename Kokkos::View<
const IST*,
864 Kokkos::MemoryUnmanaged>;
866 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
867 for (
size_t j = 0; j < numVecs; ++j) {
868 Teuchos::ArrayView<const IST> X_j_av =
869 Teuchos::av_reinterpret_cast<const IST> (ArrayOfPtrs[j]);
870 input_col_view_type X_j_in (X_j_av.getRawPtr (), lclNumRows);
871 auto X_j_out = Kokkos::subview (X_out, rowRng, j);
877 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
880 return whichVectors_.empty ();
883 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
888 if (this->getMap ().is_null ()) {
889 return static_cast<size_t> (0);
891 return this->getMap ()->getNodeNumElements ();
895 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
900 if (this->getMap ().is_null ()) {
901 return static_cast<size_t> (0);
903 return this->getMap ()->getGlobalNumElements ();
907 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
912 return isConstantStride () ? getDualViewStride (origView_) : size_t (0);
915 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
925 if (src ==
nullptr) {
935 return src->getNumVectors () == this->getNumVectors ();
939 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
943 return this->getNumVectors ();
946 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
949 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
951 #else // TPETRA_ENABLE_DEPRECATED_CODE
953 #endif // TPETRA_ENABLE_DEPRECATED_CODE
955 const size_t numSameIDs,
956 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
957 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs)
959 using ::Tpetra::Details::Behavior;
961 using ::Tpetra::Details::ProfilingRegion;
963 using KokkosRefactor::Details::permute_array_multi_column;
964 using KokkosRefactor::Details::permute_array_multi_column_variable_stride;
965 using Kokkos::Compat::create_const_view;
967 const char tfecfFuncName[] =
"copyAndPermute: ";
968 ProfilingRegion regionCAP (
"Tpetra::MultiVector::copyAndPermute");
970 const bool verbose = Behavior::verbose ();
971 std::unique_ptr<std::string> prefix;
973 auto map = this->getMap ();
974 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
975 const int myRank = comm.is_null () ? -1 : comm->getRank ();
976 std::ostringstream os;
977 os <<
"Proc " << myRank <<
": MV::copyAndPermute: ";
978 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
979 os <<
"Start" << endl;
980 std::cerr << os.str ();
983 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
984 (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
985 std::logic_error,
"permuteToLIDs.extent(0) = "
986 << permuteToLIDs.extent (0) <<
" != permuteFromLIDs.extent(0) = "
987 << permuteFromLIDs.extent (0) <<
".");
990 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
991 const size_t numCols = this->getNumVectors ();
995 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
996 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
997 std::logic_error,
"Input MultiVector needs sync to both host "
999 const bool copyOnHost = sourceMV.need_sync_device ();
1001 std::ostringstream os;
1002 os << *prefix <<
"copyOnHost=" << (copyOnHost ?
"true" :
"false") << endl;
1003 std::cerr << os.str ();
1007 if (this->need_sync_host ()) {
1010 this->modify_host ();
1013 if (this->need_sync_device ()) {
1014 this->sync_device ();
1016 this->modify_device ();
1020 std::ostringstream os;
1021 os << *prefix <<
"Copy" << endl;
1022 std::cerr << os.str ();
1047 if (numSameIDs > 0) {
1048 const std::pair<size_t, size_t> rows (0, numSameIDs);
1050 auto tgt_h = this->getLocalViewHost ();
1051 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1053 for (
size_t j = 0; j < numCols; ++j) {
1054 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1055 const size_t srcCol =
1056 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1058 auto tgt_j = Kokkos::subview (tgt_h, rows, tgtCol);
1059 auto src_j = Kokkos::subview (src_h, rows, srcCol);
1064 auto tgt_d = this->getLocalViewDevice ();
1065 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1067 for (
size_t j = 0; j < numCols; ++j) {
1068 const size_t tgtCol = isConstantStride () ? j : whichVectors_[j];
1069 const size_t srcCol =
1070 sourceMV.isConstantStride () ? j : sourceMV.whichVectors_[j];
1072 auto tgt_j = Kokkos::subview (tgt_d, rows, tgtCol);
1073 auto src_j = Kokkos::subview (src_d, rows, srcCol);
1089 if (permuteFromLIDs.extent (0) == 0 ||
1090 permuteToLIDs.extent (0) == 0) {
1092 std::ostringstream os;
1093 os << *prefix <<
"No permutations. Done!" << endl;
1094 std::cerr << os.str ();
1100 std::ostringstream os;
1101 os << *prefix <<
"Permute" << endl;
1102 std::cerr << os.str ();
1107 const bool nonConstStride =
1108 ! this->isConstantStride () || ! sourceMV.isConstantStride ();
1111 std::ostringstream os;
1112 os << *prefix <<
"nonConstStride="
1113 << (nonConstStride ?
"true" :
"false") << endl;
1114 std::cerr << os.str ();
1121 Kokkos::DualView<const size_t*, device_type> srcWhichVecs;
1122 Kokkos::DualView<const size_t*, device_type> tgtWhichVecs;
1123 if (nonConstStride) {
1124 if (this->whichVectors_.size () == 0) {
1125 Kokkos::DualView<size_t*, device_type> tmpTgt (
"tgtWhichVecs", numCols);
1126 tmpTgt.modify_host ();
1127 for (
size_t j = 0; j < numCols; ++j) {
1128 tmpTgt.h_view(j) = j;
1131 tmpTgt.sync_device ();
1133 tgtWhichVecs = tmpTgt;
1136 Teuchos::ArrayView<const size_t> tgtWhichVecsT = this->whichVectors_ ();
1138 getDualViewCopyFromArrayView<size_t, device_type> (tgtWhichVecsT,
1142 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1143 (
static_cast<size_t> (tgtWhichVecs.extent (0)) !=
1144 this->getNumVectors (),
1145 std::logic_error,
"tgtWhichVecs.extent(0) = " <<
1146 tgtWhichVecs.extent (0) <<
" != this->getNumVectors() = " <<
1147 this->getNumVectors () <<
".");
1149 if (sourceMV.whichVectors_.size () == 0) {
1150 Kokkos::DualView<size_t*, device_type> tmpSrc (
"srcWhichVecs", numCols);
1151 tmpSrc.modify_host ();
1152 for (
size_t j = 0; j < numCols; ++j) {
1153 tmpSrc.h_view(j) = j;
1156 tmpSrc.sync_device ();
1158 srcWhichVecs = tmpSrc;
1161 Teuchos::ArrayView<const size_t> srcWhichVecsT =
1162 sourceMV.whichVectors_ ();
1164 getDualViewCopyFromArrayView<size_t, device_type> (srcWhichVecsT,
1168 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1169 (
static_cast<size_t> (srcWhichVecs.extent (0)) !=
1170 sourceMV.getNumVectors (), std::logic_error,
1171 "srcWhichVecs.extent(0) = " << srcWhichVecs.extent (0)
1172 <<
" != sourceMV.getNumVectors() = " << sourceMV.getNumVectors ()
1178 std::ostringstream os;
1179 os << *prefix <<
"Get permute LIDs on host" << std::endl;
1180 std::cerr << os.str ();
1182 auto tgt_h = this->getLocalViewHost ();
1183 auto src_h = create_const_view (sourceMV.getLocalViewHost ());
1185 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_host () );
1186 auto permuteToLIDs_h = create_const_view (permuteToLIDs.view_host ());
1187 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_host () );
1188 auto permuteFromLIDs_h =
1189 create_const_view (permuteFromLIDs.view_host ());
1192 std::ostringstream os;
1193 os << *prefix <<
"Permute on host" << endl;
1194 std::cerr << os.str ();
1196 if (nonConstStride) {
1199 auto tgtWhichVecs_h =
1200 create_const_view (tgtWhichVecs.view_host ());
1201 auto srcWhichVecs_h =
1202 create_const_view (srcWhichVecs.view_host ());
1203 permute_array_multi_column_variable_stride (tgt_h, src_h,
1207 srcWhichVecs_h, numCols);
1210 permute_array_multi_column (tgt_h, src_h, permuteToLIDs_h,
1211 permuteFromLIDs_h, numCols);
1216 std::ostringstream os;
1217 os << *prefix <<
"Get permute LIDs on device" << endl;
1218 std::cerr << os.str ();
1220 auto tgt_d = this->getLocalViewDevice ();
1221 auto src_d = create_const_view (sourceMV.getLocalViewDevice ());
1223 TEUCHOS_ASSERT( ! permuteToLIDs.need_sync_device () );
1224 auto permuteToLIDs_d = create_const_view (permuteToLIDs.view_device ());
1225 TEUCHOS_ASSERT( ! permuteFromLIDs.need_sync_device () );
1226 auto permuteFromLIDs_d =
1227 create_const_view (permuteFromLIDs.view_device ());
1230 std::ostringstream os;
1231 os << *prefix <<
"Permute on device" << endl;
1232 std::cerr << os.str ();
1234 if (nonConstStride) {
1237 auto tgtWhichVecs_d = create_const_view (tgtWhichVecs.view_device ());
1238 auto srcWhichVecs_d = create_const_view (srcWhichVecs.view_device ());
1239 permute_array_multi_column_variable_stride (tgt_d, src_d,
1243 srcWhichVecs_d, numCols);
1246 permute_array_multi_column (tgt_d, src_d, permuteToLIDs_d,
1247 permuteFromLIDs_d, numCols);
1252 std::ostringstream os;
1253 os << *prefix <<
"Done!" << endl;
1254 std::cerr << os.str ();
1258 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1261 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1263 #else // TPETRA_ENABLE_DEPRECATED_CODE
1265 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1267 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1268 Kokkos::DualView<impl_scalar_type*, buffer_device_type>& exports,
1269 Kokkos::DualView<size_t*, buffer_device_type> ,
1270 size_t& constantNumPackets,
1273 using ::Tpetra::Details::Behavior;
1274 using ::Tpetra::Details::ProfilingRegion;
1276 using Kokkos::Compat::create_const_view;
1277 using Kokkos::Compat::getKokkosViewDeepCopy;
1280 const char tfecfFuncName[] =
"packAndPrepare: ";
1281 ProfilingRegion regionPAP (
"Tpetra::MultiVector::packAndPrepare");
1289 const bool debugCheckIndices = Behavior::debug ();
1294 const bool printDebugOutput = Behavior::verbose ();
1296 std::unique_ptr<std::string> prefix;
1297 if (printDebugOutput) {
1298 auto map = this->getMap ();
1299 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1300 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1301 std::ostringstream os;
1302 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1303 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1304 os <<
"Start" << endl;
1305 std::cerr << os.str ();
1309 const MV& sourceMV =
dynamic_cast<const MV&
> (sourceObj);
1311 const size_t numCols = sourceMV.getNumVectors ();
1316 constantNumPackets = numCols;
1320 if (exportLIDs.extent (0) == 0) {
1321 if (printDebugOutput) {
1322 std::ostringstream os;
1323 os << *prefix <<
"No exports on this proc, DONE" << std::endl;
1324 std::cerr << os.str ();
1344 const size_t numExportLIDs = exportLIDs.extent (0);
1345 const size_t newExportsSize = numCols * numExportLIDs;
1346 if (printDebugOutput) {
1347 std::ostringstream os;
1348 os << *prefix <<
"realloc: "
1349 <<
"numExportLIDs: " << numExportLIDs
1350 <<
", exports.extent(0): " << exports.extent (0)
1351 <<
", newExportsSize: " << newExportsSize << std::endl;
1352 std::cerr << os.str ();
1358 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1359 (sourceMV.need_sync_device () && sourceMV.need_sync_host (),
1360 std::logic_error,
"Input MultiVector needs sync to both host "
1362 const bool packOnHost = sourceMV.need_sync_device ();
1363 auto src_dev = sourceMV.getLocalViewHost ();
1364 auto src_host = sourceMV.getLocalViewDevice ();
1365 if (printDebugOutput) {
1366 std::ostringstream os;
1367 os << *prefix <<
"packOnHost=" << (packOnHost ?
"true" :
"false") << endl;
1368 std::cerr << os.str ();
1375 exports.modify_host ();
1378 exports.modify_device ();
1394 if (sourceMV.isConstantStride ()) {
1395 using KokkosRefactor::Details::pack_array_single_column;
1396 if (printDebugOutput) {
1397 std::ostringstream os;
1398 os << *prefix <<
"Pack numCols=1 const stride" << endl;
1399 std::cerr << os.str ();
1402 pack_array_single_column (exports.view_host (),
1403 create_const_view (src_host),
1404 exportLIDs.view_host (),
1409 pack_array_single_column (exports.view_device (),
1410 create_const_view (src_dev),
1411 exportLIDs.view_device (),
1417 using KokkosRefactor::Details::pack_array_single_column;
1418 if (printDebugOutput) {
1419 std::ostringstream os;
1420 os << *prefix <<
"Pack numCols=1 nonconst stride" << endl;
1421 std::cerr << os.str ();
1424 pack_array_single_column (exports.view_host (),
1425 create_const_view (src_host),
1426 exportLIDs.view_host (),
1427 sourceMV.whichVectors_[0],
1431 pack_array_single_column (exports.view_device (),
1432 create_const_view (src_dev),
1433 exportLIDs.view_device (),
1434 sourceMV.whichVectors_[0],
1440 if (sourceMV.isConstantStride ()) {
1441 using KokkosRefactor::Details::pack_array_multi_column;
1442 if (printDebugOutput) {
1443 std::ostringstream os;
1444 os << *prefix <<
"Pack numCols=" << numCols <<
" const stride" << endl;
1445 std::cerr << os.str ();
1448 pack_array_multi_column (exports.view_host (),
1449 create_const_view (src_host),
1450 exportLIDs.view_host (),
1455 pack_array_multi_column (exports.view_device (),
1456 create_const_view (src_dev),
1457 exportLIDs.view_device (),
1463 using KokkosRefactor::Details::pack_array_multi_column_variable_stride;
1464 if (printDebugOutput) {
1465 std::ostringstream os;
1466 os << *prefix <<
"Pack numCols=" << numCols <<
" nonconst stride"
1468 std::cerr << os.str ();
1473 using IST = impl_scalar_type;
1474 using DV = Kokkos::DualView<IST*, device_type>;
1475 using HES =
typename DV::t_host::execution_space;
1476 using DES =
typename DV::t_dev::execution_space;
1477 Teuchos::ArrayView<const size_t> whichVecs = sourceMV.whichVectors_ ();
1479 pack_array_multi_column_variable_stride
1480 (exports.view_host (),
1481 create_const_view (src_host),
1482 exportLIDs.view_host (),
1483 getKokkosViewDeepCopy<HES> (whichVecs),
1488 pack_array_multi_column_variable_stride
1489 (exports.view_device (),
1490 create_const_view (src_dev),
1491 exportLIDs.view_device (),
1492 getKokkosViewDeepCopy<DES> (whichVecs),
1499 if (printDebugOutput) {
1500 std::ostringstream os;
1501 os << *prefix <<
"Done!" << endl;
1502 std::cerr << os.str ();
1507 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1509 MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
1510 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1512 #else // TPETRA_ENABLE_DEPRECATED_CODE
1514 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1515 (
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& importLIDs,
1516 Kokkos::DualView<impl_scalar_type*, buffer_device_type> imports,
1517 Kokkos::DualView<size_t*, buffer_device_type> ,
1518 const size_t constantNumPackets,
1522 using ::Tpetra::Details::Behavior;
1523 using ::Tpetra::Details::ProfilingRegion;
1524 using KokkosRefactor::Details::unpack_array_multi_column;
1525 using KokkosRefactor::Details::unpack_array_multi_column_variable_stride;
1526 using Kokkos::Compat::getKokkosViewDeepCopy;
1529 const char tfecfFuncName[] =
"unpackAndCombine: ";
1530 ProfilingRegion regionUAC (
"Tpetra::MultiVector::unpackAndCombine");
1538 const bool debugCheckIndices = Behavior::debug ();
1540 const bool printDebugOutput = Behavior::verbose ();
1541 std::unique_ptr<std::string> prefix;
1542 if (printDebugOutput) {
1543 auto map = this->getMap ();
1544 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
1545 const int myRank = comm.is_null () ? -1 : comm->getRank ();
1546 std::ostringstream os;
1547 os <<
"Proc " << myRank <<
": MV::packAndPrepare: ";
1548 prefix = std::unique_ptr<std::string> (
new std::string (os.str ()));
1549 os <<
"Start" << endl;
1550 std::cerr << os.str ();
1554 if (importLIDs.extent (0) == 0) {
1555 if (printDebugOutput) {
1556 std::ostringstream os;
1557 os << *prefix <<
"No imports. Done!" << endl;
1562 const size_t numVecs = getNumVectors ();
1563 if (debugCheckIndices) {
1564 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1565 (
static_cast<size_t> (imports.extent (0)) !=
1566 numVecs * importLIDs.extent (0),
1568 "imports.extent(0) = " << imports.extent (0)
1569 <<
" != getNumVectors() * importLIDs.extent(0) = " << numVecs
1570 <<
" * " << importLIDs.extent (0) <<
" = "
1571 << numVecs * importLIDs.extent (0) <<
".");
1573 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1574 (constantNumPackets ==
static_cast<size_t> (0), std::runtime_error,
1575 "constantNumPackets input argument must be nonzero.");
1577 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1578 (
static_cast<size_t> (numVecs) !=
1579 static_cast<size_t> (constantNumPackets),
1580 std::runtime_error,
"constantNumPackets must equal numVecs.");
1586 const bool unpackOnHost = imports.need_sync_device ();
1588 if (printDebugOutput) {
1589 std::ostringstream os;
1590 os << *prefix <<
"unpackOnHost=" << (unpackOnHost ?
"true" :
"false")
1592 std::cerr << os.str ();
1598 if (this->need_sync_host ()) {
1601 this->modify_host ();
1604 if (this->need_sync_device ()) {
1605 this->sync_device ();
1607 this->modify_device ();
1609 auto X_d = this->getLocalViewDevice ();
1610 auto X_h = this->getLocalViewHost ();
1611 auto imports_d = imports.view_device ();
1612 auto imports_h = imports.view_host ();
1613 auto importLIDs_d = importLIDs.view_device ();
1614 auto importLIDs_h = importLIDs.view_host ();
1616 Kokkos::DualView<size_t*, device_type> whichVecs;
1617 if (! isConstantStride ()) {
1618 Kokkos::View<
const size_t*, Kokkos::HostSpace,
1619 Kokkos::MemoryUnmanaged> whichVecsIn (whichVectors_.getRawPtr (),
1621 whichVecs = Kokkos::DualView<size_t*, device_type> (
"whichVecs", numVecs);
1623 whichVecs.modify_host ();
1627 whichVecs.modify_device ();
1631 auto whichVecs_d = whichVecs.view_device ();
1632 auto whichVecs_h = whichVecs.view_host ();
1642 if (numVecs > 0 && importLIDs.extent (0) > 0) {
1643 using dev_exec_space =
typename dual_view_type::t_dev::execution_space;
1644 using host_exec_space =
typename dual_view_type::t_host::execution_space;
1647 const bool use_atomic_updates = unpackOnHost ?
1648 host_exec_space::concurrency () != 1 :
1649 dev_exec_space::concurrency () != 1;
1651 if (printDebugOutput) {
1652 std::ostringstream os;
1654 std::cerr << os.str ();
1661 using op_type = KokkosRefactor::Details::InsertOp<IST>;
1662 if (isConstantStride ()) {
1664 unpack_array_multi_column (host_exec_space (),
1665 X_h, imports_h, importLIDs_h,
1666 op_type (), numVecs,
1672 unpack_array_multi_column (dev_exec_space (),
1673 X_d, imports_d, importLIDs_d,
1674 op_type (), numVecs,
1681 unpack_array_multi_column_variable_stride (host_exec_space (),
1691 unpack_array_multi_column_variable_stride (dev_exec_space (),
1702 else if (CM ==
ADD) {
1703 using op_type = KokkosRefactor::Details::AddOp<IST>;
1704 if (isConstantStride ()) {
1706 unpack_array_multi_column (host_exec_space (),
1707 X_h, imports_h, importLIDs_h,
1708 op_type (), numVecs,
1713 unpack_array_multi_column (dev_exec_space (),
1714 X_d, imports_d, importLIDs_d,
1715 op_type (), numVecs,
1722 unpack_array_multi_column_variable_stride (host_exec_space (),
1732 unpack_array_multi_column_variable_stride (dev_exec_space (),
1744 using op_type = KokkosRefactor::Details::AbsMaxOp<IST>;
1745 if (isConstantStride ()) {
1747 unpack_array_multi_column (host_exec_space (),
1748 X_h, imports_h, importLIDs_h,
1749 op_type (), numVecs,
1754 unpack_array_multi_column (dev_exec_space (),
1755 X_d, imports_d, importLIDs_d,
1756 op_type (), numVecs,
1763 unpack_array_multi_column_variable_stride (host_exec_space (),
1773 unpack_array_multi_column_variable_stride (dev_exec_space (),
1785 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1786 (
true, std::logic_error,
"Invalid CombineMode");
1790 if (printDebugOutput) {
1791 std::ostringstream os;
1792 os << *prefix <<
"Nothing to unpack" << endl;
1793 std::cerr << os.str ();
1797 if (printDebugOutput) {
1798 std::ostringstream os;
1799 os << *prefix <<
"Done!" << endl;
1800 std::cerr << os.str ();
1804 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1809 if (isConstantStride ()) {
1810 return static_cast<size_t> (view_.extent (1));
1812 return static_cast<size_t> (whichVectors_.size ());
1820 gblDotImpl (
const RV& dotsOut,
1821 const Teuchos::RCP<
const Teuchos::Comm<int> >& comm,
1822 const bool distributed)
1824 using Teuchos::REDUCE_MAX;
1825 using Teuchos::REDUCE_SUM;
1826 using Teuchos::reduceAll;
1827 typedef typename RV::non_const_value_type dot_type;
1829 const size_t numVecs = dotsOut.extent (0);
1844 if (distributed && ! comm.is_null ()) {
1847 const int nv =
static_cast<int> (numVecs);
1848 const bool commIsInterComm = ::Tpetra::Details::isInterComm (*comm);
1850 if (commIsInterComm) {
1854 typename RV::non_const_type lclDots (Kokkos::ViewAllocateWithoutInitializing (
"tmp"), numVecs);
1856 const dot_type*
const lclSum = lclDots.data ();
1857 dot_type*
const gblSum = dotsOut.data ();
1858 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, lclSum, gblSum);
1861 dot_type*
const inout = dotsOut.data ();
1862 reduceAll<int, dot_type> (*comm, REDUCE_SUM, nv, inout, inout);
1868 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1872 const Kokkos::View<dot_type*, Kokkos::HostSpace>& dots)
const
1874 using ::Tpetra::Details::Behavior;
1875 using Kokkos::subview;
1876 using Teuchos::Comm;
1877 using Teuchos::null;
1880 typedef Kokkos::View<dot_type*, Kokkos::HostSpace> RV;
1882 typedef typename dual_view_type::t_dev XMV;
1883 const char tfecfFuncName[] =
"Tpetra::MultiVector::dot: ";
1887 const size_t numVecs = this->getNumVectors ();
1891 const size_t lclNumRows = this->getLocalLength ();
1892 const size_t numDots =
static_cast<size_t> (dots.extent (0));
1893 const bool debug = Behavior::debug ();
1896 const bool compat = this->getMap ()->isCompatible (* (A.
getMap ()));
1897 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1898 (! compat, std::invalid_argument,
"'*this' MultiVector is not "
1899 "compatible with the input MultiVector A. We only test for this "
1910 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1912 "MultiVectors do not have the same local length. "
1913 "this->getLocalLength() = " << lclNumRows <<
" != "
1915 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1917 "MultiVectors must have the same number of columns (vectors). "
1918 "this->getNumVectors() = " << numVecs <<
" != "
1920 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1921 numDots != numVecs, std::runtime_error,
1922 "The output array 'dots' must have the same number of entries as the "
1923 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
1924 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
1926 const std::pair<size_t, size_t> colRng (0, numVecs);
1927 RV dotsOut = subview (dots, colRng);
1928 RCP<const Comm<int> > comm = this->getMap ().is_null () ? null :
1929 this->getMap ()->getComm ();
1932 if (this->need_sync_device ()) {
1933 const_cast<MV*
>(
this)->sync_device ();
1936 const_cast<MV&
>(A).sync_device ();
1939 auto thisView = this->getLocalViewDevice ();
1942 ::Tpetra::Details::lclDot<RV, XMV> (dotsOut, thisView, A_view, lclNumRows, numVecs,
1943 this->whichVectors_.getRawPtr (),
1946 gblDotImpl (dotsOut, comm, this->isDistributed ());
1950 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1955 using ::Tpetra::Details::ProfilingRegion;
1957 using dot_type =
typename MV::dot_type;
1958 ProfilingRegion region (
"Tpetra::multiVectorSingleColumnDot");
1961 Teuchos::RCP<const Teuchos::Comm<int> > comm =
1962 map.is_null () ? Teuchos::null : map->getComm ();
1963 if (comm.is_null ()) {
1964 return Kokkos::ArithTraits<dot_type>::zero ();
1967 using LO = LocalOrdinal;
1971 const LO lclNumRows =
static_cast<LO
> (std::min (x.
getLocalLength (),
1973 const Kokkos::pair<LO, LO> rowRng (0, lclNumRows);
1974 dot_type lclDot = Kokkos::ArithTraits<dot_type>::zero ();
1975 dot_type gblDot = Kokkos::ArithTraits<dot_type>::zero ();
1982 const_cast<MV&
>(y).sync_device ();
1987 auto x_1d = Kokkos::subview (x_2d, rowRng, 0);
1989 auto y_1d = Kokkos::subview (y_2d, rowRng, 0);
1990 lclDot = KokkosBlas::dot (x_1d, y_1d);
1993 using Teuchos::outArg;
1994 using Teuchos::REDUCE_SUM;
1995 using Teuchos::reduceAll;
1996 reduceAll<int, dot_type> (*comm, REDUCE_SUM, lclDot, outArg (gblDot));
2008 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2012 const Teuchos::ArrayView<dot_type>& dots)
const
2015 const char tfecfFuncName[] =
"dot: ";
2018 const size_t numVecs = this->getNumVectors ();
2019 const size_t lclNumRows = this->getLocalLength ();
2020 const size_t numDots =
static_cast<size_t> (dots.size ());
2030 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2032 "MultiVectors do not have the same local length. "
2033 "this->getLocalLength() = " << lclNumRows <<
" != "
2035 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2037 "MultiVectors must have the same number of columns (vectors). "
2038 "this->getNumVectors() = " << numVecs <<
" != "
2040 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2041 (numDots != numVecs, std::runtime_error,
2042 "The output array 'dots' must have the same number of entries as the "
2043 "number of columns (vectors) in *this and A. dots.extent(0) = " <<
2044 numDots <<
" != this->getNumVectors() = " << numVecs <<
".");
2047 const dot_type gblDot = multiVectorSingleColumnDot (
const_cast<MV&
> (*
this), A);
2051 this->dot (A, Kokkos::View<dot_type*, Kokkos::HostSpace>(dots.getRawPtr (), numDots));
2055 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2058 norm2 (
const Teuchos::ArrayView<mag_type>& norms)
const
2061 using ::Tpetra::Details::NORM_TWO;
2062 using ::Tpetra::Details::ProfilingRegion;
2063 ProfilingRegion region (
"Tpetra::MV::norm2 (host output)");
2066 MV& X =
const_cast<MV&
> (*this);
2067 multiVectorNormImpl (norms.getRawPtr (), X, NORM_TWO);
2070 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2073 norm2 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2075 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2076 this->norm2 (norms_av);
2079 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2080 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2081 void TPETRA_DEPRECATED
2084 const Teuchos::ArrayView<mag_type> &norms)
const
2087 using Kokkos::subview;
2088 using Teuchos::Comm;
2089 using Teuchos::null;
2091 using Teuchos::reduceAll;
2092 using Teuchos::REDUCE_SUM;
2093 typedef Kokkos::ArithTraits<impl_scalar_type> ATS;
2094 typedef Kokkos::ArithTraits<mag_type> ATM;
2095 typedef Kokkos::View<mag_type*, Kokkos::HostSpace> norms_view_type;
2097 const char tfecfFuncName[] =
"normWeighted: ";
2099 const size_t numVecs = this->getNumVectors ();
2100 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2101 static_cast<size_t> (norms.size ()) != numVecs, std::runtime_error,
2102 "norms.size() = " << norms.size () <<
" != this->getNumVectors() = "
2106 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2107 ! OneW && weights.
getNumVectors () != numVecs, std::runtime_error,
2108 "The input MultiVector of weights must contain either one column, "
2109 "or must have the same number of columns as *this. "
2111 <<
" and this->getNumVectors() = " << numVecs <<
".");
2116 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2117 (! this->getMap ()->isCompatible (*weights.
getMap ()), std::runtime_error,
2118 "MultiVectors do not have compatible Maps:" << std::endl
2119 <<
"this->getMap(): " << std::endl << *this->getMap()
2120 <<
"weights.getMap(): " << std::endl << *weights.
getMap() << std::endl);
2123 const size_t lclNumRows = this->getLocalLength ();
2124 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2126 "MultiVectors do not have the same local length.");
2129 norms_view_type lclNrms (
"Tpetra::MV::lclNrms", numVecs);
2132 const_cast<MV*
> (
this)->sync_device ();
2133 const_cast<MV&
> (weights).sync_device ();
2135 auto X_lcl = this->getLocalViewDevice ();
2138 if (isConstantStride () && ! OneW) {
2139 KokkosBlas::nrm2w_squared (lclNrms, X_lcl, W_lcl);
2142 for (
size_t j = 0; j < numVecs; ++j) {
2143 const size_t X_col = this->isConstantStride () ? j :
2144 this->whichVectors_[j];
2145 const size_t W_col = OneW ?
static_cast<size_t> (0) :
2147 KokkosBlas::nrm2w_squared (subview (lclNrms, j),
2148 subview (X_lcl, ALL (), X_col),
2149 subview (W_lcl, ALL (), W_col));
2153 const mag_type OneOverN =
2154 ATM::one () /
static_cast<mag_type
> (this->getGlobalLength ());
2155 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
2156 Teuchos::null : this->getMap ()->getComm ();
2158 if (! comm.is_null () && this->isDistributed ()) {
2160 reduceAll<int, mag_type> (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2161 lclNrms.data (), norms.getRawPtr ());
2162 for (
size_t k = 0; k < numVecs; ++k) {
2163 norms[k] = ATM::sqrt (norms[k] * OneOverN);
2167 for (
size_t k = 0; k < numVecs; ++k) {
2168 norms[k] = ATM::sqrt (ATS::magnitude (lclNrms(k)) * OneOverN);
2172 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2174 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2177 norm1 (
const Teuchos::ArrayView<mag_type>& norms)
const
2180 using ::Tpetra::Details::NORM_ONE;
2181 using ::Tpetra::Details::ProfilingRegion;
2182 ProfilingRegion region (
"Tpetra::MV::norm1 (host output)");
2185 MV& X =
const_cast<MV&
> (*this);
2186 multiVectorNormImpl (norms.data (), X, NORM_ONE);
2189 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2192 norm1 (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2194 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2195 this->norm1 (norms_av);
2198 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2201 normInf (
const Teuchos::ArrayView<mag_type>& norms)
const
2204 using ::Tpetra::Details::NORM_INF;
2205 using ::Tpetra::Details::ProfilingRegion;
2206 ProfilingRegion region (
"Tpetra::MV::normInf (host output)");
2209 MV& X =
const_cast<MV&
> (*this);
2210 multiVectorNormImpl (norms.getRawPtr (), X, NORM_INF);
2213 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2216 normInf (
const Kokkos::View<mag_type*, Kokkos::HostSpace>& norms)
const
2218 Teuchos::ArrayView<mag_type> norms_av (norms.data (), norms.extent (0));
2219 this->normInf (norms_av);
2222 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2225 meanValue (
const Teuchos::ArrayView<impl_scalar_type>& means)
const
2229 using Kokkos::subview;
2230 using Teuchos::Comm;
2232 using Teuchos::reduceAll;
2233 using Teuchos::REDUCE_SUM;
2234 typedef Kokkos::Details::ArithTraits<impl_scalar_type> ATS;
2236 const size_t lclNumRows = this->getLocalLength ();
2237 const size_t numVecs = this->getNumVectors ();
2238 const size_t numMeans =
static_cast<size_t> (means.size ());
2240 TEUCHOS_TEST_FOR_EXCEPTION(
2241 numMeans != numVecs, std::runtime_error,
2242 "Tpetra::MultiVector::meanValue: means.size() = " << numMeans
2243 <<
" != this->getNumVectors() = " << numVecs <<
".");
2245 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2246 const std::pair<size_t, size_t> colRng (0, numVecs);
2251 typedef Kokkos::View<impl_scalar_type*, device_type> local_view_type;
2253 typename local_view_type::HostMirror::array_layout,
2255 Kokkos::MemoryTraits<Kokkos::Unmanaged> > host_local_view_type;
2256 host_local_view_type meansOut (means.getRawPtr (), numMeans);
2258 RCP<const Comm<int> > comm = this->getMap ().is_null () ? Teuchos::null :
2259 this->getMap ()->getComm ();
2262 const bool useHostVersion = this->need_sync_device ();
2263 if (useHostVersion) {
2265 auto X_lcl = subview (getLocalViewHost (),
2266 rowRng, Kokkos::ALL ());
2268 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2269 if (isConstantStride ()) {
2270 KokkosBlas::sum (lclSums, X_lcl);
2273 for (
size_t j = 0; j < numVecs; ++j) {
2274 const size_t col = whichVectors_[j];
2275 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2282 if (! comm.is_null () && this->isDistributed ()) {
2283 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2284 lclSums.data (), meansOut.data ());
2292 auto X_lcl = subview (this->getLocalViewDevice (),
2293 rowRng, Kokkos::ALL ());
2296 Kokkos::View<impl_scalar_type*, Kokkos::HostSpace> lclSums (
"MV::meanValue tmp", numVecs);
2297 if (isConstantStride ()) {
2298 KokkosBlas::sum (lclSums, X_lcl);
2301 for (
size_t j = 0; j < numVecs; ++j) {
2302 const size_t col = whichVectors_[j];
2303 KokkosBlas::sum (subview (lclSums, j), subview (X_lcl, ALL (), col));
2311 if (! comm.is_null () && this->isDistributed ()) {
2312 reduceAll (*comm, REDUCE_SUM,
static_cast<int> (numVecs),
2313 lclSums.data (), meansOut.data ());
2323 const impl_scalar_type OneOverN =
2324 ATS::one () /
static_cast<mag_type
> (this->getGlobalLength ());
2325 for (
size_t k = 0; k < numMeans; ++k) {
2326 meansOut(k) = meansOut(k) * OneOverN;
2331 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2337 typedef Kokkos::Details::ArithTraits<IST> ATS;
2338 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2339 typedef typename pool_type::generator_type generator_type;
2341 const IST max = Kokkos::rand<generator_type, IST>::max ();
2342 const IST min = ATS::is_signed ? IST (-max) : ATS::zero ();
2344 this->randomize (
static_cast<Scalar
> (min),
static_cast<Scalar
> (max));
2348 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2351 randomize (
const Scalar& minVal,
const Scalar& maxVal)
2354 typedef Kokkos::Random_XorShift64_Pool<typename device_type::execution_space> pool_type;
2365 const uint64_t myRank =
2366 static_cast<uint64_t
> (this->getMap ()->getComm ()->getRank ());
2367 uint64_t seed64 =
static_cast<uint64_t
> (std::rand ()) + myRank + 17311uLL;
2368 unsigned int seed =
static_cast<unsigned int> (seed64&0xffffffff);
2370 pool_type rand_pool (seed);
2371 const IST max =
static_cast<IST
> (maxVal);
2372 const IST min =
static_cast<IST
> (minVal);
2377 this->view_.clear_sync_state();
2379 this->modify_device ();
2380 auto thisView = this->getLocalViewDevice ();
2382 if (isConstantStride ()) {
2383 Kokkos::fill_random (thisView, rand_pool, min, max);
2386 const size_t numVecs = getNumVectors ();
2387 for (
size_t k = 0; k < numVecs; ++k) {
2388 const size_t col = whichVectors_[k];
2389 auto X_k = Kokkos::subview (thisView, Kokkos::ALL (), col);
2390 Kokkos::fill_random (X_k, rand_pool, min, max);
2395 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2400 using ::Tpetra::Details::ProfilingRegion;
2401 using ::Tpetra::Details::Blas::fill;
2402 using DES =
typename dual_view_type::t_dev::execution_space;
2403 using HES =
typename dual_view_type::t_host::execution_space;
2404 using LO = LocalOrdinal;
2405 ProfilingRegion region (
"Tpetra::MultiVector::putScalar");
2410 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
2411 const LO numVecs =
static_cast<LO
> (this->getNumVectors ());
2417 const bool runOnHost = this->need_sync_device ();
2420 this->modify_device ();
2421 auto X = this->getLocalViewDevice ();
2422 if (this->isConstantStride ()) {
2423 fill (DES (), X, theAlpha, lclNumRows, numVecs);
2426 fill (DES (), X, theAlpha, lclNumRows, numVecs,
2427 this->whichVectors_.getRawPtr ());
2431 this->modify_host ();
2432 auto X = this->getLocalViewHost ();
2433 if (this->isConstantStride ()) {
2434 fill (HES (), X, theAlpha, lclNumRows, numVecs);
2437 fill (HES (), X, theAlpha, lclNumRows, numVecs,
2438 this->whichVectors_.getRawPtr ());
2444 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2447 replaceMap (
const Teuchos::RCP<const map_type>& newMap)
2449 using Teuchos::ArrayRCP;
2450 using Teuchos::Comm;
2453 using LO = LocalOrdinal;
2454 using GO = GlobalOrdinal;
2460 TEUCHOS_TEST_FOR_EXCEPTION(
2461 ! this->isConstantStride (), std::logic_error,
2462 "Tpetra::MultiVector::replaceMap: This method does not currently work "
2463 "if the MultiVector is a column view of another MultiVector (that is, if "
2464 "isConstantStride() == false).");
2494 #ifdef HAVE_TEUCHOS_DEBUG
2508 #endif // HAVE_TEUCHOS_DEBUG
2510 if (this->getMap ().is_null ()) {
2515 TEUCHOS_TEST_FOR_EXCEPTION(
2516 newMap.is_null (), std::invalid_argument,
2517 "Tpetra::MultiVector::replaceMap: both current and new Maps are null. "
2518 "This probably means that the input Map is incorrect.");
2522 const size_t newNumRows = newMap->getNodeNumElements ();
2523 const size_t origNumRows = view_.extent (0);
2524 const size_t numCols = this->getNumVectors ();
2526 if (origNumRows != newNumRows || view_.extent (1) != numCols) {
2527 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2530 else if (newMap.is_null ()) {
2533 const size_t newNumRows =
static_cast<size_t> (0);
2534 const size_t numCols = this->getNumVectors ();
2535 view_ = allocDualView<ST, LO, GO, Node> (newNumRows, numCols);
2538 this->map_ = newMap;
2541 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2544 scale (
const Scalar& alpha)
2549 const IST theAlpha =
static_cast<IST
> (alpha);
2550 if (theAlpha == Kokkos::ArithTraits<IST>::one ()) {
2553 const size_t lclNumRows = getLocalLength ();
2554 const size_t numVecs = getNumVectors ();
2555 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2556 const std::pair<size_t, size_t> colRng (0, numVecs);
2564 const bool useHostVersion = need_sync_device ();
2565 if (useHostVersion) {
2566 auto Y_lcl = Kokkos::subview (getLocalViewHost (), rowRng, ALL ());
2567 if (isConstantStride ()) {
2568 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2571 for (
size_t k = 0; k < numVecs; ++k) {
2572 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2573 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2574 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2579 auto Y_lcl = Kokkos::subview (getLocalViewDevice (), rowRng, ALL ());
2580 if (isConstantStride ()) {
2581 KokkosBlas::scal (Y_lcl, theAlpha, Y_lcl);
2584 for (
size_t k = 0; k < numVecs; ++k) {
2585 const size_t Y_col = isConstantStride () ? k : whichVectors_[k];
2586 auto Y_k = Kokkos::subview (Y_lcl, ALL (), Y_col);
2587 KokkosBlas::scal (Y_k, theAlpha, Y_k);
2594 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2597 scale (
const Teuchos::ArrayView<const Scalar>& alphas)
2599 const size_t numVecs = this->getNumVectors ();
2600 const size_t numAlphas =
static_cast<size_t> (alphas.size ());
2601 TEUCHOS_TEST_FOR_EXCEPTION(
2602 numAlphas != numVecs, std::invalid_argument,
"Tpetra::MultiVector::"
2603 "scale: alphas.size() = " << numAlphas <<
" != this->getNumVectors() = "
2607 using k_alphas_type = Kokkos::DualView<impl_scalar_type*, device_type>;
2608 k_alphas_type k_alphas (
"alphas::tmp", numAlphas);
2609 k_alphas.modify_host ();
2610 for (
size_t i = 0; i < numAlphas; ++i) {
2613 k_alphas.sync_device ();
2615 this->scale (k_alphas.view_device ());
2618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2621 scale (
const Kokkos::View<const impl_scalar_type*, device_type>& alphas)
2624 using Kokkos::subview;
2626 const size_t lclNumRows = this->getLocalLength ();
2627 const size_t numVecs = this->getNumVectors ();
2628 TEUCHOS_TEST_FOR_EXCEPTION(
2629 static_cast<size_t> (alphas.extent (0)) != numVecs,
2630 std::invalid_argument,
"Tpetra::MultiVector::scale(alphas): "
2631 "alphas.extent(0) = " << alphas.extent (0)
2632 <<
" != this->getNumVectors () = " << numVecs <<
".");
2633 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2634 const std::pair<size_t, size_t> colRng (0, numVecs);
2644 const bool useHostVersion = this->need_sync_device ();
2645 if (useHostVersion) {
2648 auto alphas_h = Kokkos::create_mirror_view (alphas);
2651 auto Y_lcl = subview (this->getLocalViewHost (), rowRng, ALL ());
2652 if (isConstantStride ()) {
2653 KokkosBlas::scal (Y_lcl, alphas_h, Y_lcl);
2656 for (
size_t k = 0; k < numVecs; ++k) {
2657 const size_t Y_col = this->isConstantStride () ? k :
2658 this->whichVectors_[k];
2659 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2662 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2667 auto Y_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2668 if (isConstantStride ()) {
2669 KokkosBlas::scal (Y_lcl, alphas, Y_lcl);
2676 auto alphas_h = Kokkos::create_mirror_view (alphas);
2679 for (
size_t k = 0; k < numVecs; ++k) {
2680 const size_t Y_col = this->isConstantStride () ? k :
2681 this->whichVectors_[k];
2682 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2683 KokkosBlas::scal (Y_k, alphas_h(k), Y_k);
2689 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2692 scale (
const Scalar& alpha,
2696 using Kokkos::subview;
2698 const char tfecfFuncName[] =
"scale: ";
2700 const size_t lclNumRows = getLocalLength ();
2701 const size_t numVecs = getNumVectors ();
2703 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2705 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2707 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2709 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2713 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2714 const std::pair<size_t, size_t> colRng (0, numVecs);
2717 if (this->need_sync_device ()) {
2718 this->sync_device ();
2721 const_cast<MV&
>(A).sync_device ();
2724 this->modify_device ();
2725 auto Y_lcl_orig = this->getLocalViewDevice ();
2727 auto Y_lcl = subview (Y_lcl_orig, rowRng, ALL ());
2728 auto X_lcl = subview (X_lcl_orig, rowRng, ALL ());
2731 KokkosBlas::scal (Y_lcl, theAlpha, X_lcl);
2735 for (
size_t k = 0; k < numVecs; ++k) {
2736 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2738 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2739 auto X_k = subview (X_lcl, ALL (), X_col);
2741 KokkosBlas::scal (Y_k, theAlpha, X_k);
2748 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2754 const char tfecfFuncName[] =
"reciprocal: ";
2756 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2758 "MultiVectors do not have the same local length. "
2759 "this->getLocalLength() = " << getLocalLength ()
2761 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2762 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2763 ": MultiVectors do not have the same number of columns (vectors). "
2764 "this->getNumVectors() = " << getNumVectors ()
2765 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2767 const size_t numVecs = getNumVectors ();
2770 if (this->need_sync_device ()) {
2771 this->sync_device ();
2774 const_cast<MV&
>(A).sync_device ();
2776 this->modify_device ();
2778 auto this_view_dev = this->getLocalViewDevice ();
2782 KokkosBlas::reciprocal (this_view_dev, A_view_dev);
2786 using Kokkos::subview;
2787 for (
size_t k = 0; k < numVecs; ++k) {
2788 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2789 auto vector_k = subview (this_view_dev, ALL (), this_col);
2790 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2791 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2792 KokkosBlas::reciprocal (vector_k, vector_Ak);
2797 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2803 const char tfecfFuncName[] =
"abs";
2805 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2807 ": MultiVectors do not have the same local length. "
2808 "this->getLocalLength() = " << getLocalLength ()
2810 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2811 A.
getNumVectors () != this->getNumVectors (), std::runtime_error,
2812 ": MultiVectors do not have the same number of columns (vectors). "
2813 "this->getNumVectors() = " << getNumVectors ()
2814 <<
" != A.getNumVectors() = " << A.
getNumVectors () <<
".");
2815 const size_t numVecs = getNumVectors ();
2818 if (this->need_sync_device ()) {
2819 this->sync_device ();
2822 const_cast<MV&
>(A).sync_device ();
2824 this->modify_device ();
2826 auto this_view_dev = this->getLocalViewDevice ();
2830 KokkosBlas::abs (this_view_dev, A_view_dev);
2834 using Kokkos::subview;
2836 for (
size_t k=0; k < numVecs; ++k) {
2837 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2838 auto vector_k = subview (this_view_dev, ALL (), this_col);
2839 const size_t A_col = isConstantStride () ? k : A.
whichVectors_[k];
2840 auto vector_Ak = subview (A_view_dev, ALL (), A_col);
2841 KokkosBlas::abs (vector_k, vector_Ak);
2846 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2849 update (
const Scalar& alpha,
2853 const char tfecfFuncName[] =
"update: ";
2854 using Kokkos::subview;
2860 const size_t lclNumRows = getLocalLength ();
2861 const size_t numVecs = getNumVectors ();
2863 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2865 "this->getLocalLength() = " << lclNumRows <<
" != A.getLocalLength() = "
2867 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2869 "this->getNumVectors() = " << numVecs <<
" != A.getNumVectors() = "
2873 if (this->need_sync_device ()) {
2874 this->sync_device ();
2877 const_cast<MV&
>(A).sync_device ();
2882 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2883 const std::pair<size_t, size_t> colRng (0, numVecs);
2885 auto Y_lcl_orig = this->getLocalViewDevice ();
2886 auto Y_lcl = subview (Y_lcl_orig, rowRng, Kokkos::ALL ());
2888 auto X_lcl = subview (X_lcl_orig, rowRng, Kokkos::ALL ());
2891 this->modify_device ();
2893 KokkosBlas::axpby (theAlpha, X_lcl, theBeta, Y_lcl);
2897 for (
size_t k = 0; k < numVecs; ++k) {
2898 const size_t Y_col = this->isConstantStride () ? k : this->whichVectors_[k];
2900 auto Y_k = subview (Y_lcl, ALL (), Y_col);
2901 auto X_k = subview (X_lcl, ALL (), X_col);
2903 KokkosBlas::axpby (theAlpha, X_k, theBeta, Y_k);
2908 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2911 update (
const Scalar& alpha,
2915 const Scalar& gamma)
2918 using Kokkos::subview;
2921 const char tfecfFuncName[] =
"update(alpha,A,beta,B,gamma): ";
2925 const size_t lclNumRows = this->getLocalLength ();
2926 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2928 "The input MultiVector A has " << A.
getLocalLength () <<
" local "
2929 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2931 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2933 "The input MultiVector B has " << B.
getLocalLength () <<
" local "
2934 "row(s), but this MultiVector has " << lclNumRows <<
" local "
2936 const size_t numVecs = getNumVectors ();
2937 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2939 "The input MultiVector A has " << A.
getNumVectors () <<
" column(s), "
2940 "but this MultiVector has " << numVecs <<
" column(s).");
2941 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2943 "The input MultiVector B has " << B.
getNumVectors () <<
" column(s), "
2944 "but this MultiVector has " << numVecs <<
" column(s).");
2951 if (this->need_sync_device ()) this->sync_device ();
2956 this->modify_device ();
2958 const std::pair<size_t, size_t> rowRng (0, lclNumRows);
2959 const std::pair<size_t, size_t> colRng (0, numVecs);
2964 auto C_lcl = subview (this->getLocalViewDevice (), rowRng, ALL ());
2969 KokkosBlas::update (theAlpha, A_lcl, theBeta, B_lcl, theGamma, C_lcl);
2974 for (
size_t k = 0; k < numVecs; ++k) {
2975 const size_t this_col = isConstantStride () ? k : whichVectors_[k];
2978 KokkosBlas::update (theAlpha, subview (A_lcl, rowRng, A_col),
2979 theBeta, subview (B_lcl, rowRng, B_col),
2980 theGamma, subview (C_lcl, rowRng, this_col));
2985 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
2986 Teuchos::ArrayRCP<const Scalar>
2993 const char tfecfFuncName[] =
"getData: ";
2999 const_cast<MV*
> (
this)->sync_host ();
3001 auto hostView = getLocalViewHost ();
3002 const size_t col = isConstantStride () ? j : whichVectors_[j];
3003 auto hostView_j = Kokkos::subview (hostView, ALL (), col);
3004 Teuchos::ArrayRCP<const IST> dataAsArcp =
3005 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3007 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3008 (
static_cast<size_t> (hostView_j.extent (0)) <
3009 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3010 "hostView_j.extent(0) = " << hostView_j.extent (0)
3011 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3012 "Please report this bug to the Tpetra developers.");
3014 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3017 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3018 Teuchos::ArrayRCP<Scalar>
3023 using Kokkos::subview;
3026 const char tfecfFuncName[] =
"getDataNonConst: ";
3032 const_cast<MV*
> (
this)->sync_host ();
3037 auto hostView = getLocalViewHost ();
3038 const size_t col = isConstantStride () ? j : whichVectors_[j];
3039 auto hostView_j = subview (hostView, ALL (), col);
3040 Teuchos::ArrayRCP<IST> dataAsArcp =
3041 Kokkos::Compat::persistingView (hostView_j, 0, getLocalLength ());
3043 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3044 (
static_cast<size_t> (hostView_j.extent (0)) <
3045 static_cast<size_t> (dataAsArcp.size ()), std::logic_error,
3046 "hostView_j.extent(0) = " << hostView_j.extent (0)
3047 <<
" < dataAsArcp.size() = " << dataAsArcp.size () <<
". "
3048 "Please report this bug to the Tpetra developers.");
3050 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3053 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3054 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3056 subCopy (
const Teuchos::ArrayView<const size_t>& cols)
const
3063 bool contiguous =
true;
3064 const size_t numCopyVecs =
static_cast<size_t> (cols.size ());
3065 for (
size_t j = 1; j < numCopyVecs; ++j) {
3066 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3071 if (contiguous && numCopyVecs > 0) {
3072 return this->subCopy (Teuchos::Range1D (cols[0], cols[numCopyVecs-1]));
3075 RCP<const MV> X_sub = this->subView (cols);
3076 RCP<MV> Y (
new MV (this->getMap (), numCopyVecs,
false));
3082 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3083 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3085 subCopy (
const Teuchos::Range1D &colRng)
const
3090 RCP<const MV> X_sub = this->subView (colRng);
3091 RCP<MV> Y (
new MV (this->getMap (),
static_cast<size_t> (colRng.size ()),
false));
3096 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3100 return origView_.extent (0);
3103 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3107 return origView_.extent (1);
3110 template <
class Scalar,
class LO,
class GO,
class Node>
3113 const Teuchos::RCP<const map_type>& subMap,
3114 const local_ordinal_type rowOffset) :
3118 using Kokkos::subview;
3119 using Teuchos::outArg;
3122 using Teuchos::reduceAll;
3123 using Teuchos::REDUCE_MIN;
3126 const char prefix[] =
"Tpetra::MultiVector constructor (offsetView): ";
3127 const char suffix[] =
"Please report this bug to the Tpetra developers.";
3130 std::unique_ptr<std::ostringstream> errStrm;
3137 const auto comm = subMap->getComm ();
3138 TEUCHOS_ASSERT( ! comm.is_null () );
3139 const int myRank = comm->getRank ();
3141 const LO lclNumRowsBefore =
static_cast<LO
> (X.
getLocalLength ());
3143 const LO newNumRows =
static_cast<LO
> (subMap->getNodeNumElements ());
3145 std::ostringstream os;
3146 os <<
"Proc " << myRank <<
": " << prefix
3147 <<
"X: {lclNumRows: " << lclNumRowsBefore
3149 <<
", numCols: " << numCols <<
"}, "
3150 <<
"subMap: {lclNumRows: " << newNumRows <<
"}" << endl;
3151 std::cerr << os.str ();
3156 const bool tooManyElts =
3159 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3160 *errStrm <<
" Proc " << myRank <<
": subMap->getNodeNumElements() (="
3161 << newNumRows <<
") + rowOffset (=" << rowOffset
3165 TEUCHOS_TEST_FOR_EXCEPTION
3166 (! debug && tooManyElts, std::invalid_argument,
3167 prefix << errStrm->str () << suffix);
3171 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3173 std::ostringstream gblErrStrm;
3174 const std::string myErrStr =
3175 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3176 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3177 TEUCHOS_TEST_FOR_EXCEPTION
3178 (
true, std::invalid_argument, gblErrStrm.str ());
3182 using range_type = std::pair<LO, LO>;
3183 const range_type origRowRng
3184 (rowOffset,
static_cast<LO
> (X.
origView_.extent (0)));
3185 const range_type rowRng
3186 (rowOffset, rowOffset + newNumRows);
3200 subview (rowOffset == 0 ? X.
origView_ : X.
view_, rowRng, ALL ());
3207 if (newOrigView.extent (0) == 0 &&
3208 newOrigView.extent (1) != X.
origView_.extent (1)) {
3210 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3212 if (newView.extent (0) == 0 &&
3213 newView.extent (1) != X.
view_.extent (1)) {
3215 allocDualView<Scalar, LO, GO, Node> (0, X.
getNumVectors ());
3219 MV (subMap, newView, newOrigView) :
3223 const LO lclNumRowsRet =
static_cast<LO
> (subViewMV.getLocalLength ());
3224 const LO numColsRet =
static_cast<LO
> (subViewMV.getNumVectors ());
3225 if (newNumRows != lclNumRowsRet || numCols != numColsRet) {
3227 if (errStrm.get () ==
nullptr) {
3228 errStrm = std::unique_ptr<std::ostringstream> (
new std::ostringstream);
3230 *errStrm <<
" Proc " << myRank <<
3231 ": subMap.getNodeNumElements(): " << newNumRows <<
3232 ", subViewMV.getLocalLength(): " << lclNumRowsRet <<
3233 ", X.getNumVectors(): " << numCols <<
3234 ", subViewMV.getNumVectors(): " << numColsRet << endl;
3236 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3238 std::ostringstream gblErrStrm;
3240 gblErrStrm << prefix <<
"Returned MultiVector has the wrong local "
3241 "dimensions on one or more processes:" << endl;
3243 const std::string myErrStr =
3244 errStrm.get () !=
nullptr ? errStrm->str () : std::string (
"");
3245 ::Tpetra::Details::gathervPrint (gblErrStrm, myErrStr, *comm);
3246 gblErrStrm << suffix << endl;
3247 TEUCHOS_TEST_FOR_EXCEPTION
3248 (
true, std::invalid_argument, gblErrStrm.str ());
3253 std::ostringstream os;
3254 os <<
"Proc " << myRank <<
": " << prefix <<
"Call op=" << endl;
3255 std::cerr << os.str ();
3261 std::ostringstream os;
3262 os <<
"Proc " << myRank <<
": " << prefix <<
"Done!" << endl;
3263 std::cerr << os.str ();
3267 template <
class Scalar,
class LO,
class GO,
class Node>
3269 MultiVector (
const MultiVector<Scalar, LO, GO, Node>& X,
3270 const map_type& subMap,
3271 const size_t rowOffset) :
3272 MultiVector (X, Teuchos::RCP<const map_type> (new map_type (subMap)),
3276 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3277 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3279 offsetView (
const Teuchos::RCP<const map_type>& subMap,
3280 const size_t offset)
const
3283 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3286 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3287 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3290 const size_t offset)
3293 return Teuchos::rcp (
new MV (*
this, *subMap, offset));
3296 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3297 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3299 subView (
const Teuchos::ArrayView<const size_t>& cols)
const
3301 using Teuchos::Array;
3305 const size_t numViewCols =
static_cast<size_t> (cols.size ());
3306 TEUCHOS_TEST_FOR_EXCEPTION(
3307 numViewCols < 1, std::runtime_error,
"Tpetra::MultiVector::subView"
3308 "(const Teuchos::ArrayView<const size_t>&): The input array cols must "
3309 "contain at least one entry, but cols.size() = " << cols.size ()
3314 bool contiguous =
true;
3315 for (
size_t j = 1; j < numViewCols; ++j) {
3316 if (cols[j] != cols[j-1] +
static_cast<size_t> (1)) {
3322 if (numViewCols == 0) {
3324 return rcp (
new MV (this->getMap (), numViewCols));
3327 return this->subView (Teuchos::Range1D (cols[0], cols[numViewCols-1]));
3331 if (isConstantStride ()) {
3332 return rcp (
new MV (this->getMap (), view_, origView_, cols));
3335 Array<size_t> newcols (cols.size ());
3336 for (
size_t j = 0; j < numViewCols; ++j) {
3337 newcols[j] = whichVectors_[cols[j]];
3339 return rcp (
new MV (this->getMap (), view_, origView_, newcols ()));
3344 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3345 Teuchos::RCP<const MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3347 subView (
const Teuchos::Range1D& colRng)
const
3349 using ::Tpetra::Details::Behavior;
3351 using Kokkos::subview;
3352 using Teuchos::Array;
3356 const char tfecfFuncName[] =
"subView(Range1D): ";
3358 const size_t lclNumRows = this->getLocalLength ();
3359 const size_t numVecs = this->getNumVectors ();
3363 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3364 static_cast<size_t> (colRng.size ()) > numVecs, std::runtime_error,
3365 "colRng.size() = " << colRng.size () <<
" > this->getNumVectors() = "
3367 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3368 numVecs != 0 && colRng.size () != 0 &&
3369 (colRng.lbound () <
static_cast<Teuchos::Ordinal
> (0) ||
3370 static_cast<size_t> (colRng.ubound ()) >= numVecs),
3371 std::invalid_argument,
"Nonempty input range [" << colRng.lbound () <<
3372 "," << colRng.ubound () <<
"] exceeds the valid range of column indices "
3373 "[0, " << numVecs <<
"].");
3375 RCP<const MV> X_ret;
3385 const std::pair<size_t, size_t> rows (0, lclNumRows);
3386 if (colRng.size () == 0) {
3387 const std::pair<size_t, size_t> cols (0, 0);
3389 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3393 if (isConstantStride ()) {
3394 const std::pair<size_t, size_t> cols (colRng.lbound (),
3395 colRng.ubound () + 1);
3397 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3400 if (
static_cast<size_t> (colRng.size ()) ==
static_cast<size_t> (1)) {
3403 const std::pair<size_t, size_t> col (whichVectors_[0] + colRng.lbound (),
3404 whichVectors_[0] + colRng.ubound () + 1);
3406 X_ret = rcp (
new MV (this->getMap (), X_sub, origView_));
3409 Array<size_t> which (whichVectors_.begin () + colRng.lbound (),
3410 whichVectors_.begin () + colRng.ubound () + 1);
3411 X_ret = rcp (
new MV (this->getMap (), view_, origView_, which));
3416 const bool debug = Behavior::debug ();
3418 using Teuchos::Comm;
3419 using Teuchos::outArg;
3420 using Teuchos::REDUCE_MIN;
3421 using Teuchos::reduceAll;
3423 RCP<const Comm<int> > comm = this->getMap ().is_null () ?
3424 Teuchos::null : this->getMap ()->getComm ();
3425 if (! comm.is_null ()) {
3429 if (X_ret.is_null ()) {
3432 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
3433 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3434 (lclSuccess != 1, std::logic_error,
"X_ret (the subview of this "
3435 "MultiVector; the return value of this method) is null on some MPI "
3436 "process in this MultiVector's communicator. This should never "
3437 "happen. Please report this bug to the Tpetra developers.");
3438 if (! X_ret.is_null () &&
3439 X_ret->getNumVectors () !=
static_cast<size_t> (colRng.size ())) {
3442 reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
3443 outArg (gblSuccess));
3444 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3445 (lclSuccess != 1, std::logic_error,
"X_ret->getNumVectors() != "
3446 "colRng.size(), on at least one MPI process in this MultiVector's "
3447 "communicator. This should never happen. "
3448 "Please report this bug to the Tpetra developers.");
3455 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3456 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3461 return Teuchos::rcp_const_cast<MV> (this->subView (cols));
3465 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3466 Teuchos::RCP<MultiVector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3471 return Teuchos::rcp_const_cast<MV> (this->subView (colRng));
3475 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3481 using Kokkos::subview;
3482 typedef std::pair<size_t, size_t> range_type;
3483 const char tfecfFuncName[] =
"MultiVector(const MultiVector&, const size_t): ";
3486 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3487 (j >= numCols, std::invalid_argument,
"Input index j (== " << j
3488 <<
") exceeds valid column index range [0, " << numCols <<
" - 1].");
3490 static_cast<size_t> (j) :
3492 this->
view_ = takeSubview (X.
view_, Kokkos::ALL (), range_type (jj, jj+1));
3504 const size_t newSize = X.
imports_.extent (0) / numCols;
3506 newImports.d_view = subview (X.
imports_.d_view, range_type (0, newSize));
3507 newImports.h_view = subview (X.
imports_.h_view, range_type (0, newSize));
3510 const size_t newSize = X.
exports_.extent (0) / numCols;
3512 newExports.d_view = subview (X.
exports_.d_view, range_type (0, newSize));
3513 newExports.h_view = subview (X.
exports_.h_view, range_type (0, newSize));
3523 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3524 Teuchos::RCP<const Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3529 return Teuchos::rcp (
new V (*
this, j));
3533 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3534 Teuchos::RCP<Vector<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
3539 return Teuchos::rcp (
new V (*
this, j));
3543 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3546 get1dCopy (
const Teuchos::ArrayView<Scalar>& A,
const size_t LDA)
const
3548 using dev_view_type =
typename dual_view_type::t_dev;
3549 using host_view_type =
typename dual_view_type::t_host;
3551 using input_view_type = Kokkos::View<IST**, Kokkos::LayoutLeft,
3553 Kokkos::MemoryUnmanaged>;
3554 const char tfecfFuncName[] =
"get1dCopy: ";
3556 const size_t numRows = this->getLocalLength ();
3557 const size_t numCols = this->getNumVectors ();
3559 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3560 (LDA < numRows, std::runtime_error,
3561 "LDA = " << LDA <<
" < numRows = " << numRows <<
".");
3562 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3563 (numRows >
size_t (0) && numCols >
size_t (0) &&
3564 size_t (A.size ()) < LDA * (numCols - 1) + numRows,
3566 "A.size() = " << A.size () <<
", but its size must be at least "
3567 << (LDA * (numCols - 1) + numRows) <<
" to hold all the entries.");
3569 const std::pair<size_t, size_t> rowRange (0, numRows);
3570 const std::pair<size_t, size_t> colRange (0, numCols);
3572 input_view_type A_view_orig (
reinterpret_cast<IST*
> (A.getRawPtr ()),
3574 auto A_view = Kokkos::subview (A_view_orig, rowRange, colRange);
3581 const bool useHostVersion = this->need_sync_device ();
3583 dev_view_type srcView_dev;
3584 host_view_type srcView_host;
3585 if (useHostVersion) {
3586 srcView_host = this->getLocalViewHost ();
3589 srcView_dev = this->getLocalViewDevice ();
3592 if (this->isConstantStride ()) {
3593 if (useHostVersion) {
3601 for (
size_t j = 0; j < numCols; ++j) {
3602 const size_t srcCol = this->whichVectors_[j];
3603 auto dstColView = Kokkos::subview (A_view, rowRange, j);
3605 if (useHostVersion) {
3606 auto srcColView_host = Kokkos::subview (srcView_host, rowRange, srcCol);
3610 auto srcColView_dev = Kokkos::subview (srcView_dev, rowRange, srcCol);
3618 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3621 get2dCopy (
const Teuchos::ArrayView<
const Teuchos::ArrayView<Scalar> >& ArrayOfPtrs)
const
3624 const char tfecfFuncName[] =
"get2dCopy: ";
3625 const size_t numRows = this->getLocalLength ();
3626 const size_t numCols = this->getNumVectors ();
3628 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3629 static_cast<size_t> (ArrayOfPtrs.size ()) != numCols,
3630 std::runtime_error,
"Input array of pointers must contain as many "
3631 "entries (arrays) as the MultiVector has columns. ArrayOfPtrs.size() = "
3632 << ArrayOfPtrs.size () <<
" != getNumVectors() = " << numCols <<
".");
3634 if (numRows != 0 && numCols != 0) {
3636 for (
size_t j = 0; j < numCols; ++j) {
3637 const size_t dstLen =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3638 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3639 dstLen < numRows, std::invalid_argument,
"Array j = " << j <<
" of "
3640 "the input array of arrays is not long enough to fit all entries in "
3641 "that column of the MultiVector. ArrayOfPtrs[j].size() = " << dstLen
3642 <<
" < getLocalLength() = " << numRows <<
".");
3646 for (
size_t j = 0; j < numCols; ++j) {
3647 Teuchos::RCP<const V> X_j = this->getVector (j);
3648 const size_t LDA =
static_cast<size_t> (ArrayOfPtrs[j].size ());
3649 X_j->get1dCopy (ArrayOfPtrs[j], LDA);
3655 template <
class SC,
class LO,
class GO,
class NT>
3658 const bool markModified)
3675 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3676 Teuchos::ArrayRCP<const Scalar>
3680 if (getLocalLength () == 0 || getNumVectors () == 0) {
3681 return Teuchos::null;
3683 TEUCHOS_TEST_FOR_EXCEPTION(
3684 ! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3685 "get1dView: This MultiVector does not have constant stride, so it is "
3686 "not possible to view its data as a single array. You may check "
3687 "whether a MultiVector has constant stride by calling "
3688 "isConstantStride().");
3692 constexpr
bool markModified =
false;
3694 auto X_lcl = syncMVToHostIfNeededAndGetHostView (
const_cast<MV&
> (*
this),
3696 Teuchos::ArrayRCP<const impl_scalar_type> dataAsArcp =
3697 Kokkos::Compat::persistingView (X_lcl);
3698 return Teuchos::arcp_reinterpret_cast<const Scalar> (dataAsArcp);
3702 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3703 Teuchos::ArrayRCP<Scalar>
3707 if (this->getLocalLength () == 0 || this->getNumVectors () == 0) {
3708 return Teuchos::null;
3711 TEUCHOS_TEST_FOR_EXCEPTION
3712 (! isConstantStride (), std::runtime_error,
"Tpetra::MultiVector::"
3713 "get1dViewNonConst: This MultiVector does not have constant stride, "
3714 "so it is not possible to view its data as a single array. You may "
3715 "check whether a MultiVector has constant stride by calling "
3716 "isConstantStride().");
3717 constexpr
bool markModified =
true;
3718 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3719 Teuchos::ArrayRCP<impl_scalar_type> dataAsArcp =
3720 Kokkos::Compat::persistingView (X_lcl);
3721 return Teuchos::arcp_reinterpret_cast<Scalar> (dataAsArcp);
3725 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3726 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> >
3730 constexpr
bool markModified =
true;
3731 auto X_lcl = syncMVToHostIfNeededAndGetHostView (*
this, markModified);
3737 const size_t myNumRows = this->getLocalLength ();
3738 const size_t numCols = this->getNumVectors ();
3739 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3741 Teuchos::ArrayRCP<Teuchos::ArrayRCP<Scalar> > views (numCols);
3742 for (
size_t j = 0; j < numCols; ++j) {
3743 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3744 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3745 Teuchos::ArrayRCP<impl_scalar_type> X_lcl_j_arcp =
3746 Kokkos::Compat::persistingView (X_lcl_j);
3747 views[j] = Teuchos::arcp_reinterpret_cast<Scalar> (X_lcl_j_arcp);
3752 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3753 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> >
3760 constexpr
bool markModified =
false;
3762 auto X_lcl = syncMVToHostIfNeededAndGetHostView (
const_cast<MV&
> (*
this),
3768 const size_t myNumRows = this->getLocalLength ();
3769 const size_t numCols = this->getNumVectors ();
3770 const Kokkos::pair<size_t, size_t> rowRange (0, myNumRows);
3772 Teuchos::ArrayRCP<Teuchos::ArrayRCP<const Scalar> > views (numCols);
3773 for (
size_t j = 0; j < numCols; ++j) {
3774 const size_t col = this->isConstantStride () ? j : this->whichVectors_[j];
3775 auto X_lcl_j = Kokkos::subview (X_lcl, rowRange, col);
3776 Teuchos::ArrayRCP<const impl_scalar_type> X_lcl_j_arcp =
3777 Kokkos::Compat::persistingView (X_lcl_j);
3778 views[j] = Teuchos::arcp_reinterpret_cast<const Scalar> (X_lcl_j_arcp);
3783 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
3787 Teuchos::ETransp transB,
3788 const Scalar& alpha,
3793 using ::Tpetra::Details::ProfilingRegion;
3794 using Teuchos::CONJ_TRANS;
3795 using Teuchos::NO_TRANS;
3796 using Teuchos::TRANS;
3799 using Teuchos::rcpFromRef;
3801 using ATS = Kokkos::ArithTraits<impl_scalar_type>;
3803 using STS = Teuchos::ScalarTraits<Scalar>;
3805 const char tfecfFuncName[] =
"multiply: ";
3806 ProfilingRegion region (
"Tpetra::MV::multiply");
3838 const bool C_is_local = ! this->isDistributed ();
3848 auto myMap = this->getMap ();
3849 if (! myMap.is_null () && ! myMap->getComm ().is_null ()) {
3850 using Teuchos::REDUCE_MIN;
3851 using Teuchos::reduceAll;
3852 using Teuchos::outArg;
3854 auto comm = myMap->getComm ();
3855 const size_t A_nrows =
3857 const size_t A_ncols =
3859 const size_t B_nrows =
3861 const size_t B_ncols =
3864 const bool lclBad = this->getLocalLength () != A_nrows ||
3865 this->getNumVectors () != B_ncols || A_ncols != B_nrows;
3867 const int myRank = comm->getRank ();
3868 std::ostringstream errStrm;
3869 if (this->getLocalLength () != A_nrows) {
3870 errStrm <<
"Proc " << myRank <<
": this->getLocalLength()="
3871 << this->getLocalLength () <<
" != A_nrows=" << A_nrows
3872 <<
"." << std::endl;
3874 if (this->getNumVectors () != B_ncols) {
3875 errStrm <<
"Proc " << myRank <<
": this->getNumVectors()="
3876 << this->getNumVectors () <<
" != B_ncols=" << B_ncols
3877 <<
"." << std::endl;
3879 if (A_ncols != B_nrows) {
3880 errStrm <<
"Proc " << myRank <<
": A_ncols="
3881 << A_ncols <<
" != B_nrows=" << B_nrows
3882 <<
"." << std::endl;
3885 const int lclGood = lclBad ? 0 : 1;
3887 reduceAll<int, int> (*comm, REDUCE_MIN, lclGood, outArg (gblGood));
3889 std::ostringstream os;
3890 ::Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
3892 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3893 (
true, std::runtime_error,
"Inconsistent local dimensions on at "
3894 "least one process in this object's communicator." << std::endl
3896 <<
"C(" << (C_is_local ?
"local" :
"distr") <<
") = "
3898 << (transA == Teuchos::TRANS ?
"^T" :
3899 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3900 <<
"(" << (A_is_local ?
"local" :
"distr") <<
") * "
3902 << (transA == Teuchos::TRANS ?
"^T" :
3903 (transA == Teuchos::CONJ_TRANS ?
"^H" :
""))
3904 <<
"(" << (B_is_local ?
"local" :
"distr") <<
")." << std::endl
3905 <<
"Global dimensions: C(" << this->getGlobalLength () <<
", "
3915 const bool Case1 = C_is_local && A_is_local && B_is_local;
3917 const bool Case2 = C_is_local && ! A_is_local && ! B_is_local &&
3918 transA != NO_TRANS &&
3921 const bool Case3 = ! C_is_local && ! A_is_local && B_is_local &&
3925 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3926 (! Case1 && ! Case2 && ! Case3, std::runtime_error,
3927 "Multiplication of op(A) and op(B) into *this is not a "
3928 "supported use case.");
3930 if (beta != STS::zero () && Case2) {
3936 const int myRank = this->getMap ()->getComm ()->getRank ();
3938 beta_local = ATS::zero ();
3947 if (! isConstantStride ()) {
3948 C_tmp = rcp (
new MV (*
this, Teuchos::Copy));
3950 C_tmp = rcp (
this,
false);
3953 RCP<const MV> A_tmp;
3955 A_tmp = rcp (
new MV (A, Teuchos::Copy));
3957 A_tmp = rcpFromRef (A);
3960 RCP<const MV> B_tmp;
3962 B_tmp = rcp (
new MV (B, Teuchos::Copy));
3964 B_tmp = rcpFromRef (B);
3967 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3968 (! C_tmp->isConstantStride () || ! B_tmp->isConstantStride () ||
3969 ! A_tmp->isConstantStride (), std::logic_error,
3970 "Failed to make temporary constant-stride copies of MultiVectors.");
3973 if (A_tmp->need_sync_device ()) {
3974 const_cast<MV&
> (*A_tmp).sync_device ();
3976 const LO A_lclNumRows = A_tmp->getLocalLength ();
3977 const LO A_numVecs = A_tmp->getNumVectors ();
3978 auto A_lcl = A_tmp->getLocalViewDevice ();
3979 auto A_sub = Kokkos::subview (A_lcl,
3980 std::make_pair (LO (0), A_lclNumRows),
3981 std::make_pair (LO (0), A_numVecs));
3983 if (B_tmp->need_sync_device ()) {
3984 const_cast<MV&
> (*B_tmp).sync_device ();
3986 const LO B_lclNumRows = B_tmp->getLocalLength ();
3987 const LO B_numVecs = B_tmp->getNumVectors ();
3988 auto B_lcl = B_tmp->getLocalViewDevice ();
3989 auto B_sub = Kokkos::subview (B_lcl,
3990 std::make_pair (LO (0), B_lclNumRows),
3991 std::make_pair (LO (0), B_numVecs));
3993 if (C_tmp->need_sync_device ()) {
3994 const_cast<MV&
> (*C_tmp).sync_device ();
3996 const LO C_lclNumRows = C_tmp->getLocalLength ();
3997 const LO C_numVecs = C_tmp->getNumVectors ();
3998 auto C_lcl = C_tmp->getLocalViewDevice ();
3999 auto C_sub = Kokkos::subview (C_lcl,
4000 std::make_pair (LO (0), C_lclNumRows),
4001 std::make_pair (LO (0), C_numVecs));
4002 const char ctransA = (transA == Teuchos::NO_TRANS ?
'N' :
4003 (transA == Teuchos::TRANS ?
'T' :
'C'));
4004 const char ctransB = (transB == Teuchos::NO_TRANS ?
'N' :
4005 (transB == Teuchos::TRANS ?
'T' :
'C'));
4008 ProfilingRegion regionGemm (
"Tpetra::MV::multiply-call-gemm");
4009 KokkosBlas::gemm (&ctransA, &ctransB, alpha_IST, A_sub, B_sub,
4013 if (! isConstantStride ()) {
4018 A_tmp = Teuchos::null;
4019 B_tmp = Teuchos::null;
4027 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4036 using Kokkos::subview;
4039 const char tfecfFuncName[] =
"elementWiseMultiply: ";
4041 const size_t lclNumRows = this->getLocalLength ();
4042 const size_t numVecs = this->getNumVectors ();
4044 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4046 std::runtime_error,
"MultiVectors do not have the same local length.");
4047 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4048 numVecs != B.
getNumVectors (), std::runtime_error,
"this->getNumVectors"
4049 "() = " << numVecs <<
" != B.getNumVectors() = " << B.
getNumVectors ()
4053 if (this->need_sync_device ()) {
4054 this->sync_device ();
4057 const_cast<V&
>(A).sync_device ();
4060 const_cast<MV&
>(B).sync_device ();
4062 this->modify_device ();
4064 auto this_view = this->getLocalViewDevice ();
4074 KokkosBlas::mult (scalarThis,
4077 subview (A_view, ALL (), 0),
4081 for (
size_t j = 0; j < numVecs; ++j) {
4082 const size_t C_col = isConstantStride () ? j : whichVectors_[j];
4084 KokkosBlas::mult (scalarThis,
4085 subview (this_view, ALL (), C_col),
4087 subview (A_view, ALL (), 0),
4088 subview (B_view, ALL (), B_col));
4093 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4099 using ::Tpetra::Details::ProfilingRegion;
4100 ProfilingRegion region (
"Tpetra::MV::reduce");
4102 const auto map = this->getMap ();
4103 if (map.get () ==
nullptr) {
4106 const auto comm = map->getComm ();
4107 if (comm.get () ==
nullptr) {
4113 const bool changed_on_device = this->need_sync_host ();
4114 if (changed_on_device) {
4118 this->modify_device ();
4119 auto X_lcl = this->getLocalViewDevice ();
4123 this->modify_host ();
4124 auto X_lcl = this->getLocalViewHost ();
4129 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4136 #ifdef HAVE_TPETRA_DEBUG
4137 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4138 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4139 TEUCHOS_TEST_FOR_EXCEPTION(
4140 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4142 "Tpetra::MultiVector::replaceLocalValue: row index " << lclRow
4143 <<
" is invalid. The range of valid row indices on this process "
4144 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4145 <<
", " << maxLocalIndex <<
"].");
4146 TEUCHOS_TEST_FOR_EXCEPTION(
4147 vectorIndexOutOfRange(col),
4149 "Tpetra::MultiVector::replaceLocalValue: vector index " << col
4150 <<
" of the multivector is invalid.");
4152 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4153 view_.h_view (lclRow, colInd) = ScalarValue;
4157 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4163 const bool atomic)
const
4165 #ifdef HAVE_TPETRA_DEBUG
4166 const LocalOrdinal minLocalIndex = this->getMap()->getMinLocalIndex();
4167 const LocalOrdinal maxLocalIndex = this->getMap()->getMaxLocalIndex();
4168 TEUCHOS_TEST_FOR_EXCEPTION(
4169 lclRow < minLocalIndex || lclRow > maxLocalIndex,
4171 "Tpetra::MultiVector::sumIntoLocalValue: row index " << lclRow
4172 <<
" is invalid. The range of valid row indices on this process "
4173 << this->getMap()->getComm()->getRank() <<
" is [" << minLocalIndex
4174 <<
", " << maxLocalIndex <<
"].");
4175 TEUCHOS_TEST_FOR_EXCEPTION(
4176 vectorIndexOutOfRange(col),
4178 "Tpetra::MultiVector::sumIntoLocalValue: vector index " << col
4179 <<
" of the multivector is invalid.");
4181 const size_t colInd = isConstantStride () ? col : whichVectors_[col];
4183 Kokkos::atomic_add (& (view_.h_view(lclRow, colInd)), value);
4186 view_.h_view (lclRow, colInd) += value;
4191 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4200 const LocalOrdinal lclRow = this->map_->getLocalElement (gblRow);
4201 #ifdef HAVE_TPETRA_DEBUG
4202 const char tfecfFuncName[] =
"replaceGlobalValue: ";
4203 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4204 (lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4206 "Global row index " << gblRow <<
" is not present on this process "
4207 << this->getMap ()->getComm ()->getRank () <<
".");
4208 TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4209 (this->vectorIndexOutOfRange (col), std::runtime_error,
4210 "Vector index " << col <<
" of the MultiVector is invalid.");
4211 #endif // HAVE_TPETRA_DEBUG
4212 this->replaceLocalValue (lclRow, col, ScalarValue);
4215 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4221 const bool atomic)
const
4225 const LocalOrdinal lclRow = this->map_->getLocalElement (globalRow);
4226 #ifdef HAVE_TEUCHOS_DEBUG
4227 TEUCHOS_TEST_FOR_EXCEPTION(
4228 lclRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid (),
4230 "Tpetra::MultiVector::sumIntoGlobalValue: Global row index " << globalRow
4231 <<
" is not present on this process "
4232 << this->getMap ()->getComm ()->getRank () <<
".");
4233 TEUCHOS_TEST_FOR_EXCEPTION(
4234 vectorIndexOutOfRange(col),
4236 "Tpetra::MultiVector::sumIntoGlobalValue: Vector index " << col
4237 <<
" of the multivector is invalid.");
4239 this->sumIntoLocalValue (lclRow, col, value, atomic);
4242 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4244 Teuchos::ArrayRCP<T>
4250 typename dual_view_type::array_layout,
4252 const size_t col = isConstantStride () ? j : whichVectors_[j];
4253 col_dual_view_type X_col =
4254 Kokkos::subview (view_, Kokkos::ALL (), col);
4255 return Kokkos::Compat::persistingView (X_col.d_view);
4258 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
4259 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4266 #endif // TPETRA_ENABLE_DEPRECATED_CODE
4268 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4272 view_.clear_sync_state ();
4275 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4282 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4286 view_.sync_device ();
4289 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4293 return view_.need_sync_host ();
4296 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4300 return view_.need_sync_device ();
4303 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4307 view_.modify_device ();
4310 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4314 view_.modify_host ();
4317 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4321 return view_.view_device ();
4324 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4328 return view_.view_host ();
4331 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4336 using Teuchos::TypeNameTraits;
4338 std::ostringstream out;
4339 out <<
"\"" << className <<
"\": {";
4340 out <<
"Template parameters: {Scalar: " << TypeNameTraits<Scalar>::name ()
4341 <<
", LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name ()
4342 <<
", GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name ()
4343 <<
", Node" << Node::name ()
4345 if (this->getObjectLabel () !=
"") {
4346 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4348 out <<
", numRows: " << this->getGlobalLength ();
4349 if (className !=
"Tpetra::Vector") {
4350 out <<
", numCols: " << this->getNumVectors ()
4351 <<
", isConstantStride: " << this->isConstantStride ();
4353 if (this->isConstantStride ()) {
4354 out <<
", columnStride: " << this->getStride ();
4361 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4366 return this->descriptionImpl (
"Tpetra::MultiVector");
4369 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4374 typedef LocalOrdinal LO;
4377 if (vl <= Teuchos::VERB_LOW) {
4378 return std::string ();
4380 auto map = this->getMap ();
4381 if (map.is_null ()) {
4382 return std::string ();
4384 auto outStringP = Teuchos::rcp (
new std::ostringstream ());
4385 auto outp = Teuchos::getFancyOStream (outStringP);
4386 Teuchos::FancyOStream& out = *outp;
4387 auto comm = map->getComm ();
4388 const int myRank = comm->getRank ();
4389 const int numProcs = comm->getSize ();
4391 out <<
"Process " << myRank <<
" of " << numProcs <<
":" << endl;
4392 Teuchos::OSTab tab1 (out);
4395 const LO lclNumRows =
static_cast<LO
> (this->getLocalLength ());
4396 out <<
"Local number of rows: " << lclNumRows << endl;
4398 if (vl > Teuchos::VERB_MEDIUM) {
4401 if (this->getNumVectors () !=
static_cast<size_t> (1)) {
4402 out <<
"isConstantStride: " << this->isConstantStride () << endl;
4404 if (this->isConstantStride ()) {
4405 out <<
"Column stride: " << this->getStride () << endl;
4408 if (vl > Teuchos::VERB_HIGH && lclNumRows > 0) {
4412 typename dual_view_type::t_host X_host;
4413 if (need_sync_host ()) {
4419 auto X_dev = getLocalViewDevice ();
4420 auto X_host_copy = Kokkos::create_mirror_view (X_dev);
4422 X_host = X_host_copy;
4428 X_host = getLocalViewHost ();
4432 out <<
"Values: " << endl
4434 const LO numCols =
static_cast<LO
> (this->getNumVectors ());
4436 for (LO i = 0; i < lclNumRows; ++i) {
4438 if (i + 1 < lclNumRows) {
4444 for (LO i = 0; i < lclNumRows; ++i) {
4445 for (LO j = 0; j < numCols; ++j) {
4447 if (j + 1 < numCols) {
4451 if (i + 1 < lclNumRows) {
4461 return outStringP->str ();
4464 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4468 const std::string& className,
4469 const Teuchos::EVerbosityLevel verbLevel)
const
4471 using Teuchos::TypeNameTraits;
4472 using Teuchos::VERB_DEFAULT;
4473 using Teuchos::VERB_NONE;
4474 using Teuchos::VERB_LOW;
4476 const Teuchos::EVerbosityLevel vl =
4477 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
4479 if (vl == VERB_NONE) {
4486 auto map = this->getMap ();
4487 if (map.is_null ()) {
4490 auto comm = map->getComm ();
4491 if (comm.is_null ()) {
4495 const int myRank = comm->getRank ();
4504 Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
4508 tab0 = Teuchos::rcp (
new Teuchos::OSTab (out));
4509 out <<
"\"" << className <<
"\":" << endl;
4510 tab1 = Teuchos::rcp (
new Teuchos::OSTab (out));
4512 out <<
"Template parameters:" << endl;
4513 Teuchos::OSTab tab2 (out);
4514 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
4515 <<
"LocalOrdinal: " << TypeNameTraits<LocalOrdinal>::name () << endl
4516 <<
"GlobalOrdinal: " << TypeNameTraits<GlobalOrdinal>::name () << endl
4517 <<
"Node: " << Node::name () << endl;
4519 if (this->getObjectLabel () !=
"") {
4520 out <<
"Label: \"" << this->getObjectLabel () <<
"\", ";
4522 out <<
"Global number of rows: " << this->getGlobalLength () << endl;
4523 if (className !=
"Tpetra::Vector") {
4524 out <<
"Number of columns: " << this->getNumVectors () << endl;
4531 if (vl > VERB_LOW) {
4532 const std::string lclStr = this->localDescribeToString (vl);
4533 ::Tpetra::Details::gathervPrint (out, lclStr, *comm);
4537 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4540 describe (Teuchos::FancyOStream &out,
4541 const Teuchos::EVerbosityLevel verbLevel)
const
4543 this->describeImpl (out,
"Tpetra::MultiVector", verbLevel);
4546 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4551 replaceMap (newMap);
4554 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4559 using ::Tpetra::Details::localDeepCopy;
4560 const char prefix[] =
"Tpetra::MultiVector::assign: ";
4562 TEUCHOS_TEST_FOR_EXCEPTION
4564 this->getNumVectors () != src.
getNumVectors (), std::invalid_argument,
4565 prefix <<
"Global dimensions of the two Tpetra::MultiVector "
4566 "objects do not match. src has dimensions [" << src.
getGlobalLength ()
4567 <<
"," << src.
getNumVectors () <<
"], and *this has dimensions ["
4568 << this->getGlobalLength () <<
"," << this->getNumVectors () <<
"].");
4570 TEUCHOS_TEST_FOR_EXCEPTION
4571 (this->getLocalLength () != src.
getLocalLength (), std::invalid_argument,
4572 prefix <<
"The local row counts of the two Tpetra::MultiVector "
4573 "objects do not match. src has " << src.
getLocalLength () <<
" row(s) "
4574 <<
" and *this has " << this->getLocalLength () <<
" row(s).");
4580 this->clear_sync_state();
4581 this->modify_device ();
4586 if (src_last_updated_on_host) {
4587 localDeepCopy (this->getLocalViewDevice (),
4589 this->isConstantStride (),
4591 this->whichVectors_ (),
4595 localDeepCopy (this->getLocalViewDevice (),
4597 this->isConstantStride (),
4599 this->whichVectors_ (),
4604 template <
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
4608 using ::Tpetra::Details::PackTraits;
4611 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
4613 const size_t l1 = this->getLocalLength();
4615 if ((l1!=l2) || (this->getNumVectors() != vec.
getNumVectors())) {
4622 auto v1 = this->getLocalViewHost ();
4624 if (PackTraits<ST, HES>::packValueCount (v1(0,0)) !=
4625 PackTraits<ST, HES>::packValueCount (v2(0,0))) {
4632 template <
class ST,
class LO,
class GO,
class NT>
4635 std::swap(mv.
map_, this->map_);
4636 std::swap(mv.
view_, this->view_);
4637 std::swap(mv.
origView_, this->origView_);
4641 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4642 template <
class ST,
class LO,
class GO,
class NT>
4645 const Teuchos::SerialDenseMatrix<int, ST>& src)
4647 using ::Tpetra::Details::localDeepCopy;
4649 using IST =
typename MV::impl_scalar_type;
4650 using input_view_type =
4651 Kokkos::View<
const IST**, Kokkos::LayoutLeft,
4652 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4653 using pair_type = std::pair<LO, LO>;
4655 const LO numRows =
static_cast<LO
> (src.numRows ());
4656 const LO numCols =
static_cast<LO
> (src.numCols ());
4658 TEUCHOS_TEST_FOR_EXCEPTION
4661 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4662 << dst.
getMap ()->getComm ()->getRank () <<
", dst is "
4664 <<
", but src is " << numRows <<
" x " << numCols <<
".");
4666 const IST* src_raw =
reinterpret_cast<const IST*
> (src.values ());
4667 input_view_type src_orig (src_raw, src.stride (), numCols);
4668 input_view_type src_view =
4669 Kokkos::subview (src_orig, pair_type (0, numRows), Kokkos::ALL ());
4673 constexpr
bool src_isConstantStride =
true;
4674 Teuchos::ArrayView<const size_t> srcWhichVectors (
nullptr, 0);
4678 src_isConstantStride,
4679 getMultiVectorWhichVectors (dst),
4683 template <
class ST,
class LO,
class GO,
class NT>
4685 deep_copy (Teuchos::SerialDenseMatrix<int, ST>& dst,
4686 const MultiVector<ST, LO, GO, NT>& src)
4688 using ::Tpetra::Details::localDeepCopy;
4689 using MV = MultiVector<ST, LO, GO, NT>;
4690 using IST =
typename MV::impl_scalar_type;
4691 using output_view_type =
4692 Kokkos::View<IST**, Kokkos::LayoutLeft,
4693 Kokkos::HostSpace, Kokkos::MemoryUnmanaged>;
4694 using pair_type = std::pair<LO, LO>;
4696 const LO numRows =
static_cast<LO
> (dst.numRows ());
4697 const LO numCols =
static_cast<LO
> (dst.numCols ());
4699 TEUCHOS_TEST_FOR_EXCEPTION
4700 (numRows !=
static_cast<LO
> (src.getLocalLength ()) ||
4701 numCols !=
static_cast<LO
> (src.getNumVectors ()),
4702 std::invalid_argument,
"Tpetra::deep_copy: On Process "
4703 << src.getMap ()->getComm ()->getRank () <<
", src is "
4704 << src.getLocalLength () <<
" x " << src.getNumVectors ()
4705 <<
", but dst is " << numRows <<
" x " << numCols <<
".");
4707 IST* dst_raw =
reinterpret_cast<IST*
> (dst.values ());
4708 output_view_type dst_orig (dst_raw, dst.stride (), numCols);
4710 Kokkos::subview (dst_orig, pair_type (0, numRows), Kokkos::ALL ());
4712 constexpr
bool dst_isConstantStride =
true;
4713 Teuchos::ArrayView<const size_t> dstWhichVectors (
nullptr, 0);
4716 if (src.need_sync_host ()) {
4717 localDeepCopy (dst_view,
4718 src.getLocalViewDevice (),
4719 dst_isConstantStride,
4720 src.isConstantStride (),
4722 getMultiVectorWhichVectors (src));
4726 src.getLocalViewHost (),
4727 dst_isConstantStride,
4728 src.isConstantStride (),
4730 getMultiVectorWhichVectors (src));
4733 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4735 template <
class Scalar,
class LO,
class GO,
class NT>
4736 Teuchos::RCP<MultiVector<Scalar, LO, GO, NT> >
4740 typedef MultiVector<Scalar, LO, GO, NT> MV;
4741 return Teuchos::rcp (
new MV (map, numVectors));
4744 template <
class ST,
class LO,
class GO,
class NT>
4745 MultiVector<ST, LO, GO, NT>
4762 #ifdef HAVE_TPETRACORE_TEUCHOSNUMERICS
4763 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4764 template class MultiVector< SCALAR , LO , GO , NODE >; \
4765 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src); \
4766 template Teuchos::RCP<MultiVector< SCALAR , LO , GO , NODE > > createMultiVector (const Teuchos::RCP<const Map<LO, GO, NODE> >& map, size_t numVectors); \
4767 template void deep_copy (MultiVector<SCALAR, LO, GO, NODE>& dst, const Teuchos::SerialDenseMatrix<int, SCALAR>& src); \
4768 template void deep_copy (Teuchos::SerialDenseMatrix<int, SCALAR>& dst, const MultiVector<SCALAR, LO, GO, NODE>& src);
4771 # define TPETRA_MULTIVECTOR_INSTANT(SCALAR,LO,GO,NODE) \
4772 template class MultiVector< SCALAR , LO , GO , NODE >; \
4773 template MultiVector< SCALAR , LO , GO , NODE > createCopy( const MultiVector< SCALAR , LO , GO , NODE >& src);
4775 #endif // HAVE_TPETRACORE_TEUCHOSNUMERICS
4777 #endif // TPETRA_MULTIVECTOR_DEF_HPP