42 #ifndef TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 43 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP 50 #include "Teuchos_TimeMonitor.hpp" 51 #ifdef HAVE_TPETRA_DEBUG 53 #endif // HAVE_TPETRA_DEBUG 67 #if defined(__CUDACC__) 69 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 70 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 71 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 73 #elif defined(__GNUC__) 75 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 76 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 77 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 79 #else // some other compiler 82 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 83 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1 84 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA) 85 #endif // defined(__CUDACC__), defined(__GNUC__) 89 namespace Experimental {
95 template<
class AlphaCoeffType,
97 class MatrixValuesType,
101 class BcrsApplyNoTrans1VecTeamFunctor {
103 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
104 "MatrixValuesType must be a Kokkos::View.");
105 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
106 "OutVecType must be a Kokkos::View.");
107 static_assert (Kokkos::Impl::is_view<InVecType>::value,
108 "InVecType must be a Kokkos::View.");
109 static_assert (std::is_same<MatrixValuesType,
110 typename MatrixValuesType::const_type>::value,
111 "MatrixValuesType must be a const Kokkos::View.");
112 static_assert (std::is_same<OutVecType,
113 typename OutVecType::non_const_type>::value,
114 "OutVecType must be a nonconst Kokkos::View.");
115 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
116 "InVecType must be a const Kokkos::View.");
117 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
118 "MatrixValuesType must be a rank-1 Kokkos::View.");
119 static_assert (static_cast<int> (InVecType::rank) == 1,
120 "InVecType must be a rank-1 Kokkos::View.");
121 static_assert (static_cast<int> (OutVecType::rank) == 1,
122 "OutVecType must be a rank-1 Kokkos::View.");
123 typedef typename MatrixValuesType::non_const_value_type scalar_type;
124 typedef typename GraphType::device_type device_type;
125 typedef typename device_type::execution_space execution_space;
126 typedef typename execution_space::scratch_memory_space shmem_space;
130 typedef typename std::remove_const<typename GraphType::data_type>::type
133 typedef Kokkos::TeamPolicy<Kokkos::Schedule<Kokkos::Dynamic>,
135 local_ordinal_type> policy_type;
142 void setX (
const InVecType& X) { X_ = X; }
150 void setY (
const OutVecType& Y) { Y_ = Y; }
155 KOKKOS_INLINE_FUNCTION local_ordinal_type
156 getScratchSizePerTeam ()
const 160 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
161 return blockSize_ *
sizeof (y_value_type);
167 KOKKOS_INLINE_FUNCTION local_ordinal_type
168 getScratchSizePerThread ()
const 172 typedef typename std::decay<decltype (Y_(0))>::type y_value_type;
173 return blockSize_ *
sizeof (y_value_type);
177 KOKKOS_INLINE_FUNCTION local_ordinal_type
178 getNumLclMeshRows ()
const 180 return ptr_.extent (0) == 0 ?
181 static_cast<local_ordinal_type
> (0) :
182 static_cast<local_ordinal_type> (ptr_.extent (0) - 1);
185 static constexpr local_ordinal_type defaultRowsPerTeam = 20;
189 local_ordinal_type getNumTeams ()
const {
196 BcrsApplyNoTrans1VecTeamFunctor (
const typename std::decay<AlphaCoeffType>::type& alpha,
197 const GraphType& graph,
198 const MatrixValuesType& val,
199 const local_ordinal_type blockSize,
201 const typename std::decay<BetaCoeffType>::type& beta,
203 const local_ordinal_type rowsPerTeam = defaultRowsPerTeam) :
205 ptr_ (graph.row_map),
206 ind_ (graph.entries),
208 blockSize_ (blockSize),
212 rowsPerTeam_ (rowsPerTeam)
219 KOKKOS_INLINE_FUNCTION
void 220 operator () (
const typename policy_type::member_type& member)
const 226 using Kokkos::Details::ArithTraits;
232 using Kokkos::parallel_for;
233 using Kokkos::subview;
234 typedef local_ordinal_type LO;
235 typedef typename decltype (ptr_)::non_const_value_type offset_type;
236 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
237 shmem_space, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
239 typedef Kokkos::View<
typename OutVecType::non_const_value_type*,
242 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
244 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
247 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
250 const LO leagueRank = member.league_rank();
255 shared_array_type threadLocalMem =
256 shared_array_type (member.thread_scratch (1), blockSize_ * rowsPerTeam_);
261 const LO numLclMeshRows = getNumLclMeshRows ();
262 const LO rowBeg = leagueRank * rowsPerTeam_;
263 const LO rowTmp = rowBeg + rowsPerTeam_;
264 const LO rowEnd = rowTmp < numLclMeshRows ? rowTmp : numLclMeshRows;
275 parallel_for (Kokkos::TeamThreadRange (member, rowBeg, rowEnd),
276 [&] (
const LO& lclRow) {
278 out_little_vec_type Y_tlm (threadLocalMem.data () + blockSize_ * member.team_rank (), blockSize_);
280 const offset_type Y_ptBeg = lclRow * blockSize_;
281 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
283 subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
284 if (beta_ == ArithTraits<BetaCoeffType>::zero ()) {
285 FILL (Y_tlm, ArithTraits<BetaCoeffType>::zero ());
287 else if (beta_ == ArithTraits<BetaCoeffType>::one ()) {
295 if (alpha_ != ArithTraits<AlphaCoeffType>::zero ()) {
296 const offset_type blkBeg = ptr_[lclRow];
297 const offset_type blkEnd = ptr_[lclRow+1];
299 const offset_type bs2 = blockSize_ * blockSize_;
300 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
302 little_block_type A_cur (val_.data () + absBlkOff * bs2,
303 blockSize_, blockSize_);
304 const offset_type X_blkCol = ind_[absBlkOff];
305 const offset_type X_ptBeg = X_blkCol * blockSize_;
306 const offset_type X_ptEnd = X_ptBeg + blockSize_;
308 subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
310 GEMV (alpha_, A_cur, X_cur, Y_tlm);
318 typename std::decay<AlphaCoeffType>::type alpha_;
319 typename GraphType::row_map_type::const_type ptr_;
320 typename GraphType::entries_type::const_type ind_;
321 MatrixValuesType val_;
322 local_ordinal_type blockSize_;
324 typename std::decay<BetaCoeffType>::type beta_;
326 local_ordinal_type rowsPerTeam_;
330 template<
class AlphaCoeffType,
332 class MatrixValuesType,
336 class BcrsApplyNoTrans1VecFunctor {
338 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
339 "MatrixValuesType must be a Kokkos::View.");
340 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
341 "OutVecType must be a Kokkos::View.");
342 static_assert (Kokkos::Impl::is_view<InVecType>::value,
343 "InVecType must be a Kokkos::View.");
344 static_assert (std::is_same<MatrixValuesType,
345 typename MatrixValuesType::const_type>::value,
346 "MatrixValuesType must be a const Kokkos::View.");
347 static_assert (std::is_same<OutVecType,
348 typename OutVecType::non_const_type>::value,
349 "OutVecType must be a nonconst Kokkos::View.");
350 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
351 "InVecType must be a const Kokkos::View.");
352 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
353 "MatrixValuesType must be a rank-1 Kokkos::View.");
354 static_assert (static_cast<int> (InVecType::rank) == 1,
355 "InVecType must be a rank-1 Kokkos::View.");
356 static_assert (static_cast<int> (OutVecType::rank) == 1,
357 "OutVecType must be a rank-1 Kokkos::View.");
358 typedef typename MatrixValuesType::non_const_value_type scalar_type;
361 typedef typename GraphType::device_type device_type;
364 typedef typename std::remove_const<typename GraphType::data_type>::type
367 typedef Kokkos::RangePolicy<Kokkos::Schedule<Kokkos::Dynamic>,
368 typename device_type::execution_space,
369 local_ordinal_type> policy_type;
376 void setX (
const InVecType& X) { X_ = X; }
384 void setY (
const OutVecType& Y) { Y_ = Y; }
386 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
387 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
390 BcrsApplyNoTrans1VecFunctor (
const alpha_coeff_type& alpha,
391 const GraphType& graph,
392 const MatrixValuesType& val,
393 const local_ordinal_type blockSize,
395 const beta_coeff_type& beta,
396 const OutVecType& Y) :
398 ptr_ (graph.row_map),
399 ind_ (graph.entries),
401 blockSize_ (blockSize),
407 KOKKOS_INLINE_FUNCTION
void 408 operator () (
const local_ordinal_type& lclRow)
const 414 using Kokkos::Details::ArithTraits;
420 using Kokkos::parallel_for;
421 using Kokkos::subview;
422 typedef typename decltype (ptr_)::non_const_value_type offset_type;
423 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
426 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
429 const offset_type Y_ptBeg = lclRow * blockSize_;
430 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
431 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
435 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
436 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
438 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
442 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
443 const offset_type blkBeg = ptr_[lclRow];
444 const offset_type blkEnd = ptr_[lclRow+1];
446 const offset_type bs2 = blockSize_ * blockSize_;
447 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
449 little_block_type A_cur (val_.data () + absBlkOff * bs2,
450 blockSize_, blockSize_);
451 const offset_type X_blkCol = ind_[absBlkOff];
452 const offset_type X_ptBeg = X_blkCol * blockSize_;
453 const offset_type X_ptEnd = X_ptBeg + blockSize_;
454 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
456 GEMV (alpha_, A_cur, X_cur, Y_cur);
462 alpha_coeff_type alpha_;
463 typename GraphType::row_map_type::const_type ptr_;
464 typename GraphType::entries_type::const_type ind_;
465 MatrixValuesType val_;
466 local_ordinal_type blockSize_;
468 beta_coeff_type beta_;
472 template<
class AlphaCoeffType,
474 class MatrixValuesType,
475 class InMultiVecType,
477 class OutMultiVecType>
479 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
480 const GraphType& graph,
481 const MatrixValuesType& val,
482 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
483 const InMultiVecType& X,
484 const BetaCoeffType& beta,
485 const OutMultiVecType& Y
487 ,
const typename std::remove_const<typename GraphType::data_type>::type rowsPerTeam = 20
491 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
492 "MatrixValuesType must be a Kokkos::View.");
493 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
494 "OutMultiVecType must be a Kokkos::View.");
495 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
496 "InMultiVecType must be a Kokkos::View.");
497 static_assert (static_cast<int> (MatrixValuesType::rank) == 1,
498 "MatrixValuesType must be a rank-1 Kokkos::View.");
499 static_assert (static_cast<int> (OutMultiVecType::rank) == 2,
500 "OutMultiVecType must be a rank-2 Kokkos::View.");
501 static_assert (static_cast<int> (InMultiVecType::rank) == 2,
502 "InMultiVecType must be a rank-2 Kokkos::View.");
504 typedef typename MatrixValuesType::const_type matrix_values_type;
505 typedef typename OutMultiVecType::non_const_type out_multivec_type;
506 typedef typename InMultiVecType::const_type in_multivec_type;
507 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
508 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
509 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
511 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
512 static_cast<LO
> (0) :
513 static_cast<LO> (graph.row_map.extent (0) - 1);
514 const LO numVecs = Y.extent (1);
515 if (numLocalMeshRows == 0 || numVecs == 0) {
522 in_multivec_type X_in = X;
523 out_multivec_type Y_out = Y;
528 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
529 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
531 typedef BcrsApplyNoTrans1VecTeamFunctor<alpha_type, GraphType,
532 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
534 typedef BcrsApplyNoTrans1VecFunctor<alpha_type, GraphType,
535 matrix_values_type, in_vec_type, beta_type, out_vec_type> functor_type;
537 typedef typename functor_type::policy_type policy_type;
539 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
540 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
542 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0, rowsPerTeam);
543 const LO numTeams = functor.getNumTeams ();
544 policy_type policy (numTeams, Kokkos::AUTO ());
550 const LO scratchSizePerTeam = functor.getScratchSizePerTeam ();
551 const LO scratchSizePerThread = functor.getScratchSizePerThread ();
553 policy.set_scratch_size (level,
554 Kokkos::PerTeam (scratchSizePerTeam),
555 Kokkos::PerThread (scratchSizePerThread));
558 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
559 policy_type policy (0, numLocalMeshRows);
563 Kokkos::parallel_for (policy, functor);
566 for (LO j = 1; j < numVecs; ++j) {
567 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
568 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
571 Kokkos::parallel_for (policy, functor);
581 template<
class Scalar,
class LO,
class GO,
class Node>
582 class GetLocalDiagCopy {
584 typedef typename Node::device_type device_type;
585 typedef size_t diag_offset_type;
586 typedef Kokkos::View<
const size_t*, device_type,
587 Kokkos::MemoryUnmanaged> diag_offsets_type;
588 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
589 typedef typename global_graph_type::local_graph_type local_graph_type;
590 typedef typename local_graph_type::row_map_type row_offsets_type;
591 typedef typename ::Tpetra::Experimental::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
592 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
593 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
596 GetLocalDiagCopy (
const diag_type& diag,
597 const values_type& val,
598 const diag_offsets_type& diagOffsets,
599 const row_offsets_type& ptr,
600 const LO blockSize) :
602 diagOffsets_ (diagOffsets),
604 blockSize_ (blockSize),
605 offsetPerBlock_ (blockSize_*blockSize_),
609 KOKKOS_INLINE_FUNCTION
void 610 operator() (
const LO& lclRowInd)
const 615 const size_t absOffset = ptr_[lclRowInd];
618 const size_t relOffset = diagOffsets_[lclRowInd];
621 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
625 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
626 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
627 const_little_block_type;
628 const_little_block_type D_in (val_.data () + pointOffset,
629 blockSize_, blockSize_);
630 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
636 diag_offsets_type diagOffsets_;
637 row_offsets_type ptr_;
644 template<
class Scalar,
class LO,
class GO,
class Node>
646 BlockCrsMatrix<Scalar, LO, GO, Node>::
647 markLocalErrorAndGetStream ()
649 * (this->localError_) =
true;
650 if ((*errs_).is_null ()) {
651 *errs_ = Teuchos::rcp (
new std::ostringstream ());
656 template<
class Scalar,
class LO,
class GO,
class Node>
661 blockSize_ (static_cast<LO> (0)),
666 localError_ (new bool (false)),
667 errs_ (new
Teuchos::RCP<std::ostringstream> ())
671 template<
class Scalar,
class LO,
class GO,
class Node>
674 const LO blockSize) :
678 blockSize_ (blockSize),
682 offsetPerBlock_ (blockSize * blockSize),
683 localError_ (new bool (false)),
684 errs_ (new
Teuchos::RCP<std::ostringstream> ())
686 TEUCHOS_TEST_FOR_EXCEPTION(
687 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 688 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 689 "rows (isSorted() is false). This class assumes sorted rows.");
691 graphRCP_ = Teuchos::rcpFromRef(graph_);
697 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
698 TEUCHOS_TEST_FOR_EXCEPTION(
699 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 700 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
701 " <= 0. The block size must be positive.");
707 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
708 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
711 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
716 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
717 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
720 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
726 val_ = decltype (val_) (
"val", numValEnt);
729 template<
class Scalar,
class LO,
class GO,
class Node>
734 const LO blockSize) :
738 domainPointMap_ (domainPointMap),
739 rangePointMap_ (rangePointMap),
740 blockSize_ (blockSize),
744 offsetPerBlock_ (blockSize * blockSize),
745 localError_ (new bool (false)),
746 errs_ (new
Teuchos::RCP<std::ostringstream> ())
748 TEUCHOS_TEST_FOR_EXCEPTION(
749 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::Experimental::" 750 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted " 751 "rows (isSorted() is false). This class assumes sorted rows.");
753 graphRCP_ = Teuchos::rcpFromRef(graph_);
759 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
760 TEUCHOS_TEST_FOR_EXCEPTION(
761 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::Experimental::" 762 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
763 " <= 0. The block size must be positive.");
766 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
767 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
770 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
775 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
776 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
779 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
785 val_ = decltype (val_) (
"val", numValEnt);
788 template<
class Scalar,
class LO,
class GO,
class Node>
789 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
794 return Teuchos::rcp (
new map_type (domainPointMap_));
797 template<
class Scalar,
class LO,
class GO,
class Node>
798 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
803 return Teuchos::rcp (
new map_type (rangePointMap_));
806 template<
class Scalar,
class LO,
class GO,
class Node>
807 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
814 template<
class Scalar,
class LO,
class GO,
class Node>
815 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
822 template<
class Scalar,
class LO,
class GO,
class Node>
830 template<
class Scalar,
class LO,
class GO,
class Node>
838 template<
class Scalar,
class LO,
class GO,
class Node>
846 template<
class Scalar,
class LO,
class GO,
class Node>
851 Teuchos::ETransp mode,
856 TEUCHOS_TEST_FOR_EXCEPTION(
857 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
858 std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 859 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, " 860 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
869 catch (std::invalid_argument& e) {
870 TEUCHOS_TEST_FOR_EXCEPTION(
871 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 872 "apply: Either the input MultiVector X or the output MultiVector Y " 873 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's " 874 "graph. BlockMultiVector's constructor threw the following exception: " 883 const_cast<this_type*
> (
this)->
applyBlock (X_view, Y_view, mode, alpha, beta);
884 }
catch (std::invalid_argument& e) {
885 TEUCHOS_TEST_FOR_EXCEPTION(
886 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 887 "apply: The implementation method applyBlock complained about having " 888 "an invalid argument. It reported the following: " << e.what ());
889 }
catch (std::logic_error& e) {
890 TEUCHOS_TEST_FOR_EXCEPTION(
891 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 892 "apply: The implementation method applyBlock complained about a " 893 "possible bug in its implementation. It reported the following: " 895 }
catch (std::exception& e) {
896 TEUCHOS_TEST_FOR_EXCEPTION(
897 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 898 "apply: The implementation method applyBlock threw an exception which " 899 "is neither std::invalid_argument nor std::logic_error, but is a " 900 "subclass of std::exception. It reported the following: " 903 TEUCHOS_TEST_FOR_EXCEPTION(
904 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 905 "apply: The implementation method applyBlock threw an exception which " 906 "is not an instance of a subclass of std::exception. This probably " 907 "indicates a bug. Please report this to the Tpetra developers.");
911 template<
class Scalar,
class LO,
class GO,
class Node>
916 Teuchos::ETransp mode,
920 TEUCHOS_TEST_FOR_EXCEPTION(
922 "Tpetra::Experimental::BlockCrsMatrix::applyBlock: " 923 "X and Y have different block sizes. X.getBlockSize() = " 927 if (mode == Teuchos::NO_TRANS) {
928 applyBlockNoTrans (X, Y, alpha, beta);
929 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
930 applyBlockTrans (X, Y, mode, alpha, beta);
932 TEUCHOS_TEST_FOR_EXCEPTION(
933 true, std::invalid_argument,
"Tpetra::Experimental::BlockCrsMatrix::" 934 "applyBlock: Invalid 'mode' argument. Valid values are " 935 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
939 template<
class Scalar,
class LO,
class GO,
class Node>
944 #ifdef HAVE_TPETRA_DEBUG 945 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::setAllToScalar: ";
946 #endif // HAVE_TPETRA_DEBUG 948 if (this->
template need_sync<device_type> ()) {
952 #ifdef HAVE_TPETRA_DEBUG 953 TEUCHOS_TEST_FOR_EXCEPTION
954 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
955 prefix <<
"The matrix's values need sync on both device and host.");
956 #endif // HAVE_TPETRA_DEBUG 957 this->
template modify<Kokkos::HostSpace> ();
960 else if (this->
template need_sync<Kokkos::HostSpace> ()) {
964 #ifdef HAVE_TPETRA_DEBUG 965 TEUCHOS_TEST_FOR_EXCEPTION
966 (this->
template need_sync<device_type> (), std::runtime_error,
967 prefix <<
"The matrix's values need sync on both host and device.");
968 #endif // HAVE_TPETRA_DEBUG 969 this->
template modify<device_type> ();
973 this->
template modify<device_type> ();
978 template<
class Scalar,
class LO,
class GO,
class Node>
984 const LO numColInds)
const 986 #ifdef HAVE_TPETRA_DEBUG 987 const char prefix[] =
988 "Tpetra::Experimental::BlockCrsMatrix::replaceLocalValues: ";
989 #endif // HAVE_TPETRA_DEBUG 996 return static_cast<LO
> (0);
1000 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1001 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1002 const LO perBlockSize = this->offsetPerBlock ();
1007 #ifdef HAVE_TPETRA_DEBUG 1008 TEUCHOS_TEST_FOR_EXCEPTION
1009 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1010 prefix <<
"The matrix's data were last modified on device, but have " 1011 "not been sync'd to host. Please sync to host (by calling " 1012 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 1014 #endif // HAVE_TPETRA_DEBUG 1020 auto vals_host_out =
1021 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1024 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1025 const LO relBlockOffset =
1026 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1027 if (relBlockOffset != LINV) {
1036 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1040 vals_host_out_raw + absBlockOffset * perBlockSize;
1045 for (LO i = 0; i < perBlockSize; ++i) {
1046 A_old[i] = A_new[i];
1048 hint = relBlockOffset + 1;
1055 template <
class Scalar,
class LO,
class GO,
class Node>
1059 Kokkos::MemoryUnmanaged>& offsets)
const 1064 template <
class Scalar,
class LO,
class GO,
class Node>
1065 void TPETRA_DEPRECATED
1075 if (static_cast<size_t> (offsets.size ()) < lclNumRows) {
1076 offsets.resize (lclNumRows);
1082 typedef typename device_type::memory_space
memory_space;
1083 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
1088 Kokkos::MemoryUnmanaged> output_type;
1089 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1093 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
1095 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
1096 Kokkos::MemoryUnmanaged> output_type;
1097 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1102 template <
class Scalar,
class LO,
class GO,
class Node>
1108 Kokkos::MemoryUnmanaged>& D_inv,
1109 const Scalar& omega,
1114 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1116 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1117 const LO numLocalMeshRows =
1126 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1127 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1131 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1132 if (direction == Forward) {
1134 rowEnd = numLocalMeshRows+1;
1137 else if (direction == Backward) {
1138 rowBegin = numLocalMeshRows;
1142 else if (direction == Symmetric) {
1148 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1149 const Scalar minus_omega = -omega;
1152 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1153 const LO actlRow = lclRow - 1;
1156 COPY (B_cur, X_lcl);
1157 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1159 const size_t meshBeg = ptrHost_[actlRow];
1160 const size_t meshEnd = ptrHost_[actlRow+1];
1161 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1162 const LO meshCol = indHost_[absBlkOff];
1163 const_little_block_type A_cur =
1164 getConstLocalBlockFromAbsOffset (absBlkOff);
1168 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1170 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1176 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1178 FILL (X_update, zero);
1179 GEMV (one, D_lcl, X_lcl, X_update);
1183 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1184 for (LO j = 0; j < numVecs; ++j) {
1185 LO actlRow = lclRow-1;
1188 COPY (B_cur, X_lcl);
1189 SCAL (static_cast<impl_scalar_type> (omega), X_lcl);
1191 const size_t meshBeg = ptrHost_[actlRow];
1192 const size_t meshEnd = ptrHost_[actlRow+1];
1193 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1194 const LO meshCol = indHost_[absBlkOff];
1195 const_little_block_type A_cur =
1196 getConstLocalBlockFromAbsOffset (absBlkOff);
1200 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1201 GEMV (static_cast<impl_scalar_type> (alpha), A_cur, X_cur, X_lcl);
1204 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1206 FILL (X_update, zero);
1207 GEMV (one, D_lcl, X_lcl, X_update);
1213 template <
class Scalar,
class LO,
class GO,
class Node>
1219 const Scalar& dampingFactor,
1221 const int numSweeps,
1222 const bool zeroInitialGuess)
const 1226 TEUCHOS_TEST_FOR_EXCEPTION(
1227 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1228 "gaussSeidelCopy: Not implemented.");
1231 template <
class Scalar,
class LO,
class GO,
class Node>
1237 const Teuchos::ArrayView<LO>& rowIndices,
1238 const Scalar& dampingFactor,
1240 const int numSweeps,
1241 const bool zeroInitialGuess)
const 1245 TEUCHOS_TEST_FOR_EXCEPTION(
1246 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::" 1247 "reorderedGaussSeidelCopy: Not implemented.");
1250 template <
class Scalar,
class LO,
class GO,
class Node>
1251 void TPETRA_DEPRECATED
1254 const Teuchos::ArrayView<const size_t>& offsets)
const 1256 using Teuchos::ArrayRCP;
1257 using Teuchos::ArrayView;
1258 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1261 const LO* columnIndices;
1264 Teuchos::Array<LO> cols(1);
1267 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_, ZERO);
1268 for (
size_t i = 0; i < myNumRows; ++i) {
1270 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1275 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
1281 template <
class Scalar,
class LO,
class GO,
class Node>
1285 Kokkos::MemoryUnmanaged>& diag,
1286 const Kokkos::View<
const size_t*, device_type,
1287 Kokkos::MemoryUnmanaged>& offsets)
const 1289 using Kokkos::parallel_for;
1291 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1295 TEUCHOS_TEST_FOR_EXCEPTION
1296 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1297 static_cast<LO> (diag.extent (1)) < blockSize ||
1298 static_cast<LO> (diag.extent (2)) < blockSize,
1299 std::invalid_argument, prefix <<
1300 "The input Kokkos::View is not big enough to hold all the data.");
1301 TEUCHOS_TEST_FOR_EXCEPTION
1302 (static_cast<LO> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1303 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of " 1304 "diagonal blocks " << lclNumMeshRows <<
".");
1306 #ifdef HAVE_TPETRA_DEBUG 1307 TEUCHOS_TEST_FOR_EXCEPTION
1308 (this->
template need_sync<device_type> (), std::runtime_error,
1309 prefix <<
"The matrix's data were last modified on host, but have " 1310 "not been sync'd to device. Please sync to device (by calling " 1311 "sync<device_type>() on this matrix) before calling this method.");
1312 #endif // HAVE_TPETRA_DEBUG 1314 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1315 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1323 const_cast<this_type*
> (
this)->
template getValues<device_type> ();
1325 parallel_for (policy_type (0, lclNumMeshRows),
1326 functor_type (diag, vals_dev, offsets,
1331 template <
class Scalar,
class LO,
class GO,
class Node>
1335 Kokkos::MemoryUnmanaged>& diag,
1336 const Teuchos::ArrayView<const size_t>& offsets)
const 1339 using Kokkos::parallel_for;
1341 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1345 TEUCHOS_TEST_FOR_EXCEPTION
1346 (static_cast<LO> (diag.extent (0)) < lclNumMeshRows ||
1347 static_cast<LO> (diag.extent (1)) < blockSize ||
1348 static_cast<LO> (diag.extent (2)) < blockSize,
1349 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 1350 "The input Kokkos::View is not big enough to hold all the data.");
1351 TEUCHOS_TEST_FOR_EXCEPTION
1352 (static_cast<LO> (offsets.size ()) < lclNumMeshRows,
1353 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: " 1354 "offsets.size() = " << offsets.size () <<
" < local number of diagonal " 1355 "blocks " << lclNumMeshRows <<
".");
1359 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1360 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1361 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1362 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1368 template<
class Scalar,
class LO,
class GO,
class Node>
1373 const Scalar vals[],
1374 const LO numColInds)
const 1381 return static_cast<LO
> (0);
1385 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1386 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1387 const LO perBlockSize = this->offsetPerBlock ();
1392 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1393 const LO relBlockOffset =
1394 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1395 if (relBlockOffset != LINV) {
1396 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1397 little_block_type A_old =
1398 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1399 const_little_block_type A_new =
1400 getConstLocalBlockFromInput (vIn, pointOffset);
1402 ::Tpetra::Experimental::Impl::absMax (A_old, A_new);
1403 hint = relBlockOffset + 1;
1411 template<
class Scalar,
class LO,
class GO,
class Node>
1416 const Scalar vals[],
1417 const LO numColInds)
const 1419 #ifdef HAVE_TPETRA_DEBUG 1420 const char prefix[] =
1421 "Tpetra::Experimental::BlockCrsMatrix::sumIntoLocalValues: ";
1422 #endif // HAVE_TPETRA_DEBUG 1429 return static_cast<LO
> (0);
1434 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1435 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1436 const LO perBlockSize = this->offsetPerBlock ();
1441 #ifdef HAVE_TPETRA_DEBUG 1442 TEUCHOS_TEST_FOR_EXCEPTION
1443 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1444 prefix <<
"The matrix's data were last modified on device, but have not " 1445 "been sync'd to host. Please sync to host (by calling " 1446 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1447 #endif // HAVE_TPETRA_DEBUG 1453 auto vals_host_out =
1454 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1457 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1458 const LO relBlockOffset =
1459 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1460 if (relBlockOffset != LINV) {
1469 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1473 vals_host_out_raw + absBlockOffset * perBlockSize;
1478 for (LO i = 0; i < perBlockSize; ++i) {
1479 A_old[i] += A_new[i];
1481 hint = relBlockOffset + 1;
1488 template<
class Scalar,
class LO,
class GO,
class Node>
1496 #ifdef HAVE_TPETRA_DEBUG 1497 const char prefix[] =
1498 "Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: ";
1499 #endif // HAVE_TPETRA_DEBUG 1505 return Teuchos::OrdinalTraits<LO>::invalid ();
1508 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1509 colInds = indHost_.data () + absBlockOffsetStart;
1511 #ifdef HAVE_TPETRA_DEBUG 1512 TEUCHOS_TEST_FOR_EXCEPTION
1513 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1514 prefix <<
"The matrix's data were last modified on device, but have " 1515 "not been sync'd to host. Please sync to host (by calling " 1516 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 1518 #endif // HAVE_TPETRA_DEBUG 1524 auto vals_host_out =
1525 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
1528 absBlockOffsetStart * offsetPerBlock ();
1529 vals =
reinterpret_cast<Scalar*
> (vOut);
1531 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1536 template<
class Scalar,
class LO,
class GO,
class Node>
1540 const Teuchos::ArrayView<LO>& Indices,
1541 const Teuchos::ArrayView<Scalar>& Values,
1542 size_t &NumEntries)
const 1548 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1549 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1550 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold " 1551 << numInds <<
" row entries");
1553 for (LO i=0; i<numInds; ++i) {
1554 Indices[i] = colInds[i];
1556 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1557 Values[i] = vals[i];
1559 NumEntries = numInds;
1562 template<
class Scalar,
class LO,
class GO,
class Node>
1566 ptrdiff_t offsets[],
1568 const LO numColInds)
const 1575 return static_cast<LO
> (0);
1578 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1582 for (LO k = 0; k < numColInds; ++k) {
1583 const LO relBlockOffset =
1584 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1585 if (relBlockOffset != LINV) {
1586 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1587 hint = relBlockOffset + 1;
1595 template<
class Scalar,
class LO,
class GO,
class Node>
1599 const ptrdiff_t offsets[],
1600 const Scalar vals[],
1601 const LO numOffsets)
const 1608 return static_cast<LO
> (0);
1612 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1613 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1614 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1615 size_t pointOffset = 0;
1618 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1619 const size_t relBlockOffset = offsets[k];
1620 if (relBlockOffset != STINV) {
1621 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1622 little_block_type A_old =
1623 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1624 const_little_block_type A_new =
1625 getConstLocalBlockFromInput (vIn, pointOffset);
1626 COPY (A_new, A_old);
1634 template<
class Scalar,
class LO,
class GO,
class Node>
1638 const ptrdiff_t offsets[],
1639 const Scalar vals[],
1640 const LO numOffsets)
const 1647 return static_cast<LO
> (0);
1651 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1652 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1653 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1654 size_t pointOffset = 0;
1657 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1658 const size_t relBlockOffset = offsets[k];
1659 if (relBlockOffset != STINV) {
1660 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1661 little_block_type A_old =
1662 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1663 const_little_block_type A_new =
1664 getConstLocalBlockFromInput (vIn, pointOffset);
1665 ::Tpetra::Experimental::Impl::absMax (A_old, A_new);
1673 template<
class Scalar,
class LO,
class GO,
class Node>
1677 const ptrdiff_t offsets[],
1678 const Scalar vals[],
1679 const LO numOffsets)
const 1686 return static_cast<LO
> (0);
1691 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1692 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1693 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1694 size_t pointOffset = 0;
1697 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1698 const size_t relBlockOffset = offsets[k];
1699 if (relBlockOffset != STINV) {
1700 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1701 little_block_type A_old =
1702 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1703 const_little_block_type A_new =
1704 getConstLocalBlockFromInput (vIn, pointOffset);
1706 AXPY (ONE, A_new, A_old);
1714 template<
class Scalar,
class LO,
class GO,
class Node>
1720 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1721 return static_cast<LO
> (0);
1723 return static_cast<LO
> (numEntInGraph);
1727 template<
class Scalar,
class LO,
class GO,
class Node>
1732 const Teuchos::ETransp mode,
1742 TEUCHOS_TEST_FOR_EXCEPTION(
1743 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::apply: " 1744 "transpose and conjugate transpose modes are not yet implemented.");
1747 template<
class Scalar,
class LO,
class GO,
class Node>
1757 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1758 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1759 const Scalar zero = STS::zero ();
1760 const Scalar one = STS::one ();
1761 RCP<const import_type>
import = graph_.
getImporter ();
1763 RCP<const export_type> theExport = graph_.
getExporter ();
1764 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1766 if (alpha == zero) {
1770 else if (beta != one) {
1775 const BMV* X_colMap = NULL;
1776 if (
import.is_null ()) {
1780 catch (std::exception& e) {
1781 TEUCHOS_TEST_FOR_EXCEPTION
1782 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::" 1783 "operator= threw an exception: " << std::endl << e.what ());
1793 if ((*X_colMap_).is_null () ||
1799 #ifdef HAVE_TPETRA_BCRS_DO_POINT_IMPORT 1800 if (pointImporter_->is_null ()) {
1803 const auto domainPointMap = rcp (
new typename BMV::map_type (domainPointMap_));
1808 domainPointMap, colPointMap));
1817 X_colMap = &(**X_colMap_);
1819 catch (std::exception& e) {
1820 TEUCHOS_TEST_FOR_EXCEPTION
1821 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::" 1822 "operator= threw an exception: " << std::endl << e.what ());
1826 BMV* Y_rowMap = NULL;
1827 if (theExport.is_null ()) {
1830 else if ((*Y_rowMap_).is_null () ||
1836 Y_rowMap = &(**Y_rowMap_);
1838 catch (std::exception& e) {
1839 TEUCHOS_TEST_FOR_EXCEPTION(
1840 true, std::logic_error, prefix <<
"Tpetra::MultiVector::" 1841 "operator= threw an exception: " << std::endl << e.what ());
1846 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1848 catch (std::exception& e) {
1849 TEUCHOS_TEST_FOR_EXCEPTION
1850 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw " 1851 "an exception: " << e.what ());
1854 TEUCHOS_TEST_FOR_EXCEPTION
1855 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw " 1856 "an exception not a subclass of std::exception.");
1859 if (! theExport.is_null ()) {
1865 template<
class Scalar,
class LO,
class GO,
class Node>
1873 using ::Tpetra::Experimental::Classes::Impl::bcrsLocalApplyNoTrans;
1882 Y_mv.template modify<device_type> ();
1884 auto X_lcl = X_mv.template getLocalView<device_type> ();
1885 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1886 auto val = this->val_.template view<device_type> ();
1888 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1892 template<
class Scalar,
class LO,
class GO,
class Node>
1896 const LO colIndexToFind,
1897 const LO hint)
const 1899 const size_t absStartOffset = ptrHost_[localRowIndex];
1900 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1901 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1903 const LO*
const curInd = indHost_.data () + absStartOffset;
1906 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1913 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1918 const LO maxNumEntriesForLinearSearch = 32;
1919 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1924 const LO* beg = curInd;
1925 const LO* end = curInd + numEntriesInRow;
1926 std::pair<const LO*, const LO*> p =
1927 std::equal_range (beg, end, colIndexToFind);
1928 if (p.first != p.second) {
1930 relOffset =
static_cast<LO
> (p.first - beg);
1934 for (LO k = 0; k < numEntriesInRow; ++k) {
1935 if (colIndexToFind == curInd[k]) {
1945 template<
class Scalar,
class LO,
class GO,
class Node>
1950 return offsetPerBlock_;
1953 template<
class Scalar,
class LO,
class GO,
class Node>
1957 const size_t pointOffset)
const 1960 const LO rowStride = blockSize_;
1964 template<
class Scalar,
class LO,
class GO,
class Node>
1968 const size_t pointOffset)
const 1971 const LO rowStride = blockSize_;
1975 template<
class Scalar,
class LO,
class GO,
class Node>
1980 #ifdef HAVE_TPETRA_DEBUG 1981 const char prefix[] =
1982 "Tpetra::Experimental::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
1983 #endif // HAVE_TPETRA_DEBUG 1992 #ifdef HAVE_TPETRA_DEBUG 1993 TEUCHOS_TEST_FOR_EXCEPTION
1994 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
1995 prefix <<
"The matrix's data were last modified on device, but have " 1996 "not been sync'd to host. Please sync to host (by calling " 1997 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 1999 #endif // HAVE_TPETRA_DEBUG 2000 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2007 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
2010 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2014 template<
class Scalar,
class LO,
class GO,
class Node>
2018 const size_t relMeshOffset)
const 2022 const LO* lclColInds = NULL;
2023 Scalar* lclVals = NULL;
2026 LO err = this->
getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
2034 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
2035 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
2036 return this->getConstLocalBlockFromInput (const_cast<const IST*> (lclValsImpl),
2041 template<
class Scalar,
class LO,
class GO,
class Node>
2046 #ifdef HAVE_TPETRA_DEBUG 2047 const char prefix[] =
2048 "Tpetra::Experimental::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
2049 #endif // HAVE_TPETRA_DEBUG 2058 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2059 #ifdef HAVE_TPETRA_DEBUG 2060 TEUCHOS_TEST_FOR_EXCEPTION
2061 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
2062 prefix <<
"The matrix's data were last modified on device, but have " 2063 "not been sync'd to host. Please sync to host (by calling " 2064 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 2066 #endif // HAVE_TPETRA_DEBUG 2072 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
2074 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2078 template<
class Scalar,
class LO,
class GO,
class Node>
2081 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const 2083 const size_t absRowBlockOffset = ptrHost_[localRowInd];
2084 const LO relBlockOffset =
2085 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
2087 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
2088 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
2089 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
2106 template<
class Scalar,
class LO,
class GO,
class Node>
2109 checkSizes (const ::Tpetra::SrcDistObject& source)
2113 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2116 std::ostream& err = markLocalErrorAndGetStream ();
2117 err <<
"checkSizes: The source object of the Import or Export " 2118 "must be a BlockCrsMatrix with the same template parameters as the " 2119 "target object." << endl;
2125 std::ostream& err = markLocalErrorAndGetStream ();
2126 err <<
"checkSizes: The source and target objects of the Import or " 2127 <<
"Export must have the same block sizes. The source's block " 2128 <<
"size = " << src->getBlockSize () <<
" != the target's block " 2131 if (! src->graph_.isFillComplete ()) {
2132 std::ostream& err = markLocalErrorAndGetStream ();
2133 err <<
"checkSizes: The source object of the Import or Export is " 2134 "not fill complete. Both source and target objects must be fill " 2135 "complete." << endl;
2138 std::ostream& err = markLocalErrorAndGetStream ();
2139 err <<
"checkSizes: The target object of the Import or Export is " 2140 "not fill complete. Both source and target objects must be fill " 2141 "complete." << endl;
2143 if (src->graph_.getColMap ().is_null ()) {
2144 std::ostream& err = markLocalErrorAndGetStream ();
2145 err <<
"checkSizes: The source object of the Import or Export does " 2146 "not have a column Map. Both source and target objects must have " 2147 "column Maps." << endl;
2149 if (this->graph_.
getColMap ().is_null ()) {
2150 std::ostream& err = markLocalErrorAndGetStream ();
2151 err <<
"checkSizes: The target object of the Import or Export does " 2152 "not have a column Map. Both source and target objects must have " 2153 "column Maps." << endl;
2157 return ! (* (this->localError_));
2160 template<
class Scalar,
class LO,
class GO,
class Node>
2165 const Teuchos::ArrayView<const LO>& permuteToLIDs,
2166 const Teuchos::ArrayView<const LO>& permuteFromLIDs)
2170 const bool debug =
false;
2173 std::ostringstream os;
2174 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2175 os <<
"Proc " << myRank <<
": copyAndPermute: " 2176 <<
"numSameIDs: " << numSameIDs
2177 <<
", permuteToLIDs.size(): " << permuteToLIDs.size ()
2178 <<
", permuteFromLIDs.size(): " << permuteFromLIDs.size ()
2180 std::cerr << os.str ();
2186 if (* (this->localError_)) {
2187 std::ostream& err = this->markLocalErrorAndGetStream ();
2188 err <<
"copyAndPermute: The target object of the Import or Export is " 2189 "already in an error state." << endl;
2192 if (permuteToLIDs.size () != permuteFromLIDs.size ()) {
2193 std::ostream& err = this->markLocalErrorAndGetStream ();
2194 err <<
"copyAndPermute: permuteToLIDs.size() = " << permuteToLIDs.size ()
2195 <<
" != permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
"." 2200 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2202 std::ostream& err = this->markLocalErrorAndGetStream ();
2203 err <<
"copyAndPermute: The source object of the Import or Export is " 2204 "either not a BlockCrsMatrix, or does not have the right template " 2205 "parameters. checkSizes() should have caught this. " 2206 "Please report this bug to the Tpetra developers." << endl;
2209 if (* (src->localError_)) {
2210 std::ostream& err = this->markLocalErrorAndGetStream ();
2211 err <<
"copyAndPermute: The source object of the Import or Export is " 2212 "already in an error state." << endl;
2216 bool lclErr =
false;
2217 #ifdef HAVE_TPETRA_DEBUG 2218 std::set<LO> invalidSrcCopyRows;
2219 std::set<LO> invalidDstCopyRows;
2220 std::set<LO> invalidDstCopyCols;
2221 std::set<LO> invalidDstPermuteCols;
2222 std::set<LO> invalidPermuteFromRows;
2223 #endif // HAVE_TPETRA_DEBUG 2232 #ifdef HAVE_TPETRA_DEBUG 2233 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2234 #endif // HAVE_TPETRA_DEBUG 2236 const map_type& srcColMap = * (src->graph_.getColMap ());
2238 const bool canUseLocalColumnIndices = srcColMap.
locallySameAs (dstColMap);
2239 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.size ());
2242 std::ostringstream os;
2243 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2244 os <<
"Proc " << myRank <<
": copyAndPermute: " 2245 <<
"canUseLocalColumnIndices: " 2246 << (canUseLocalColumnIndices ?
"true" :
"false")
2247 <<
", numPermute: " << numPermute
2249 std::cerr << os.str ();
2252 if (canUseLocalColumnIndices) {
2254 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2255 #ifdef HAVE_TPETRA_DEBUG 2258 invalidSrcCopyRows.insert (localRow);
2261 #endif // HAVE_TPETRA_DEBUG 2263 const LO* lclSrcCols;
2268 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2271 #ifdef HAVE_TPETRA_DEBUG 2272 (void) invalidSrcCopyRows.insert (localRow);
2273 #endif // HAVE_TPETRA_DEBUG 2277 if (err != numEntries) {
2280 #ifdef HAVE_TPETRA_DEBUG 2281 invalidDstCopyRows.insert (localRow);
2282 #endif // HAVE_TPETRA_DEBUG 2290 for (LO k = 0; k < numEntries; ++k) {
2293 #ifdef HAVE_TPETRA_DEBUG 2294 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2295 #endif // HAVE_TPETRA_DEBUG 2304 for (
size_t k = 0; k < numPermute; ++k) {
2305 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
2306 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
2308 const LO* lclSrcCols;
2311 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2314 #ifdef HAVE_TPETRA_DEBUG 2315 invalidPermuteFromRows.insert (srcLclRow);
2316 #endif // HAVE_TPETRA_DEBUG 2320 if (err != numEntries) {
2322 #ifdef HAVE_TPETRA_DEBUG 2323 for (LO c = 0; c < numEntries; ++c) {
2325 invalidDstPermuteCols.insert (lclSrcCols[c]);
2328 #endif // HAVE_TPETRA_DEBUG 2335 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2336 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2339 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2340 const LO* lclSrcCols;
2347 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2348 }
catch (std::exception& e) {
2350 std::ostringstream os;
2351 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2352 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2353 << localRow <<
", src->getLocalRowView() threw an exception: " 2355 std::cerr << os.str ();
2362 #ifdef HAVE_TPETRA_DEBUG 2363 invalidSrcCopyRows.insert (localRow);
2364 #endif // HAVE_TPETRA_DEBUG 2367 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2370 std::ostringstream os;
2371 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2372 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2373 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = " 2374 << maxNumEnt << endl;
2375 std::cerr << os.str ();
2381 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2382 for (LO j = 0; j < numEntries; ++j) {
2384 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2386 #ifdef HAVE_TPETRA_DEBUG 2387 invalidDstCopyCols.insert (lclDstColsView[j]);
2388 #endif // HAVE_TPETRA_DEBUG 2392 err = this->
replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2393 }
catch (std::exception& e) {
2395 std::ostringstream os;
2396 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2397 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow " 2398 << localRow <<
", this->replaceLocalValues() threw an exception: " 2400 std::cerr << os.str ();
2404 if (err != numEntries) {
2407 std::ostringstream os;
2408 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2409 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" " 2410 "localRow " << localRow <<
", this->replaceLocalValues " 2411 "returned " << err <<
" instead of numEntries = " 2412 << numEntries << endl;
2413 std::cerr << os.str ();
2421 for (
size_t k = 0; k < numPermute; ++k) {
2422 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDs[k]);
2423 const LO dstLclRow =
static_cast<LO
> (permuteToLIDs[k]);
2425 const LO* lclSrcCols;
2430 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2431 }
catch (std::exception& e) {
2433 std::ostringstream os;
2434 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2435 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" " 2436 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2437 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2438 std::cerr << os.str ();
2445 #ifdef HAVE_TPETRA_DEBUG 2446 invalidPermuteFromRows.insert (srcLclRow);
2447 #endif // HAVE_TPETRA_DEBUG 2450 if (static_cast<size_t> (numEntries) > static_cast<size_t> (lclDstCols.size ())) {
2456 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2457 for (LO j = 0; j < numEntries; ++j) {
2459 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2461 #ifdef HAVE_TPETRA_DEBUG 2462 invalidDstPermuteCols.insert (lclDstColsView[j]);
2463 #endif // HAVE_TPETRA_DEBUG 2466 err = this->
replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2467 if (err != numEntries) {
2476 std::ostream& err = this->markLocalErrorAndGetStream ();
2477 #ifdef HAVE_TPETRA_DEBUG 2478 err <<
"copyAndPermute: The graph structure of the source of the " 2479 "Import or Export must be a subset of the graph structure of the " 2481 err <<
"invalidSrcCopyRows = [";
2482 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2483 it != invalidSrcCopyRows.end (); ++it) {
2485 typename std::set<LO>::const_iterator itp1 = it;
2487 if (itp1 != invalidSrcCopyRows.end ()) {
2491 err <<
"], invalidDstCopyRows = [";
2492 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2493 it != invalidDstCopyRows.end (); ++it) {
2495 typename std::set<LO>::const_iterator itp1 = it;
2497 if (itp1 != invalidDstCopyRows.end ()) {
2501 err <<
"], invalidDstCopyCols = [";
2502 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2503 it != invalidDstCopyCols.end (); ++it) {
2505 typename std::set<LO>::const_iterator itp1 = it;
2507 if (itp1 != invalidDstCopyCols.end ()) {
2511 err <<
"], invalidDstPermuteCols = [";
2512 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2513 it != invalidDstPermuteCols.end (); ++it) {
2515 typename std::set<LO>::const_iterator itp1 = it;
2517 if (itp1 != invalidDstPermuteCols.end ()) {
2521 err <<
"], invalidPermuteFromRows = [";
2522 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2523 it != invalidPermuteFromRows.end (); ++it) {
2525 typename std::set<LO>::const_iterator itp1 = it;
2527 if (itp1 != invalidPermuteFromRows.end ()) {
2531 err <<
"]" << std::endl;
2533 err <<
"copyAndPermute: The graph structure of the source of the " 2534 "Import or Export must be a subset of the graph structure of the " 2535 "target." << std::endl;
2536 #endif // HAVE_TPETRA_DEBUG 2540 std::ostringstream os;
2541 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2542 const bool lclSuccess = ! (* (this->localError_));
2543 os <<
"*** Proc " << myRank <<
": copyAndPermute " 2544 << (lclSuccess ?
"succeeded" :
"FAILED");
2550 std::cerr << os.str ();
2574 template<
class LO,
class GO,
class D>
2576 packRowCount (
const size_t numEnt,
2577 const size_t numBytesPerValue,
2578 const size_t blkSize)
2580 using ::Tpetra::Details::PackTraits;
2590 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2591 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2592 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2593 return numEntLen + gidsLen + valsLen;
2607 template<
class ST,
class LO,
class GO,
class D>
2609 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO, D>::input_buffer_type& imports,
2610 const size_t offset,
2611 const size_t numBytes,
2612 const size_t numBytesPerValue)
2614 using Kokkos::subview;
2615 using ::Tpetra::Details::PackTraits;
2617 if (numBytes == 0) {
2619 return static_cast<size_t> (0);
2623 #ifdef HAVE_TPETRA_DEBUG 2624 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2625 TEUCHOS_TEST_FOR_EXCEPTION(
2626 theNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2627 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2629 #endif // HAVE_TPETRA_DEBUG 2630 const char*
const inBuf = imports.data () + offset;
2631 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2632 #ifdef HAVE_TPETRA_DEBUG 2633 TEUCHOS_TEST_FOR_EXCEPTION(
2634 actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: " 2635 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2638 (void) actualNumBytes;
2639 #endif // HAVE_TPETRA_DEBUG 2640 return static_cast<size_t> (numEntLO);
2651 template<
class ST,
class LO,
class GO,
class D>
2653 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO, D>::output_buffer_type& exports,
2654 const size_t offset,
2655 const size_t numEnt,
2656 const typename ::Tpetra::Details::PackTraits<GO, D>::input_array_type& gidsIn,
2657 const typename ::Tpetra::Details::PackTraits<ST, D>::input_array_type& valsIn,
2658 const size_t numBytesPerValue,
2659 const size_t blockSize)
2661 using Kokkos::subview;
2662 using ::Tpetra::Details::PackTraits;
2668 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2671 const LO numEntLO =
static_cast<size_t> (numEnt);
2673 const size_t numEntBeg = offset;
2674 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2675 const size_t gidsBeg = numEntBeg + numEntLen;
2676 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2677 const size_t valsBeg = gidsBeg + gidsLen;
2678 const size_t valsLen = numScalarEnt * numBytesPerValue;
2680 char*
const numEntOut = exports.data () + numEntBeg;
2681 char*
const gidsOut = exports.data () + gidsBeg;
2682 char*
const valsOut = exports.data () + valsBeg;
2684 size_t numBytesOut = 0;
2686 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2689 Kokkos::pair<int, size_t> p;
2690 p = PackTraits<GO, D>::packArray (gidsOut, gidsIn.data (), numEnt);
2691 errorCode += p.first;
2692 numBytesOut += p.second;
2694 p = PackTraits<ST, D>::packArray (valsOut, valsIn.data (), numScalarEnt);
2695 errorCode += p.first;
2696 numBytesOut += p.second;
2699 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2700 TEUCHOS_TEST_FOR_EXCEPTION(
2701 numBytesOut != expectedNumBytes, std::logic_error,
"packRow: " 2702 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2703 << expectedNumBytes <<
".");
2705 TEUCHOS_TEST_FOR_EXCEPTION(
2706 errorCode != 0, std::runtime_error,
"packRow: " 2707 "PackTraits::packArray returned a nonzero error code");
2713 template<
class ST,
class LO,
class GO,
class D>
2715 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO, D>::output_array_type& gidsOut,
2716 const typename ::Tpetra::Details::PackTraits<ST, D>::output_array_type& valsOut,
2717 const typename ::Tpetra::Details::PackTraits<int, D>::input_buffer_type& imports,
2718 const size_t offset,
2719 const size_t numBytes,
2720 const size_t numEnt,
2721 const size_t numBytesPerValue,
2722 const size_t blockSize)
2724 using ::Tpetra::Details::PackTraits;
2726 if (numBytes == 0) {
2730 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2731 TEUCHOS_TEST_FOR_EXCEPTION(
2732 static_cast<size_t> (imports.extent (0)) <= offset,
2733 std::logic_error,
"unpackRow: imports.extent(0) = " 2734 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2735 TEUCHOS_TEST_FOR_EXCEPTION(
2736 static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2737 std::logic_error,
"unpackRow: imports.extent(0) = " 2738 << imports.extent (0) <<
" < offset + numBytes = " 2739 << (offset + numBytes) <<
".");
2744 const size_t numEntBeg = offset;
2745 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2746 const size_t gidsBeg = numEntBeg + numEntLen;
2747 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2748 const size_t valsBeg = gidsBeg + gidsLen;
2749 const size_t valsLen = numScalarEnt * numBytesPerValue;
2751 const char*
const numEntIn = imports.data () + numEntBeg;
2752 const char*
const gidsIn = imports.data () + gidsBeg;
2753 const char*
const valsIn = imports.data () + valsBeg;
2755 size_t numBytesOut = 0;
2758 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2759 TEUCHOS_TEST_FOR_EXCEPTION(
2760 static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2761 "unpackRow: Expected number of entries " << numEnt
2762 <<
" != actual number of entries " << numEntOut <<
".");
2765 Kokkos::pair<int, size_t> p;
2766 p = PackTraits<GO, D>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2767 errorCode += p.first;
2768 numBytesOut += p.second;
2770 p = PackTraits<ST, D>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2771 errorCode += p.first;
2772 numBytesOut += p.second;
2775 TEUCHOS_TEST_FOR_EXCEPTION(
2776 numBytesOut != numBytes, std::logic_error,
"unpackRow: numBytesOut = " 2777 << numBytesOut <<
" != numBytes = " << numBytes <<
".");
2779 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2780 TEUCHOS_TEST_FOR_EXCEPTION(
2781 numBytesOut != expectedNumBytes, std::logic_error,
"unpackRow: " 2782 "numBytesOut = " << numBytesOut <<
" != expectedNumBytes = " 2783 << expectedNumBytes <<
".");
2785 TEUCHOS_TEST_FOR_EXCEPTION(
2786 errorCode != 0, std::runtime_error,
"unpackRow: " 2787 "PackTraits::unpackArray returned a nonzero error code");
2793 template<
class Scalar,
class LO,
class GO,
class Node>
2797 const Teuchos::ArrayView<const LO>& exportLIDs,
2798 Teuchos::Array<packet_type>& exports,
2799 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
2800 size_t& constantNumPackets,
2804 using ::Tpetra::Details::PackTraits;
2805 using Kokkos::MemoryUnmanaged;
2806 using Kokkos::subview;
2808 typedef typename ::Tpetra::MultiVector<Scalar, LO, GO, Node>::impl_scalar_type ST;
2809 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
2811 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
2812 const bool debug =
false;
2815 std::ostringstream os;
2816 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
2817 os <<
"Proc " << myRank <<
": packAndPrepare: exportLIDs.size() = " 2818 << exportLIDs.size () <<
", numPacketsPerLID.size() = " 2819 << numPacketsPerLID.size () << endl;
2820 std::cerr << os.str ();
2823 if (* (this->localError_)) {
2824 std::ostream& err = this->markLocalErrorAndGetStream ();
2825 err <<
"packAndPrepare: The target object of the Import or Export is " 2826 "already in an error state." << endl;
2830 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2833 std::ostream& err = this->markLocalErrorAndGetStream ();
2834 err <<
"packAndPrepare: The source (input) object of the Import or " 2835 "Export is either not a BlockCrsMatrix, or does not have the right " 2836 "template parameters. checkSizes() should have caught this. " 2837 "Please report this bug to the Tpetra developers." << endl;
2842 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2843 const size_type numExportLIDs = exportLIDs.size ();
2845 if (numExportLIDs != numPacketsPerLID.size ()) {
2846 std::ostream& err = this->markLocalErrorAndGetStream ();
2847 err <<
"packAndPrepare: exportLIDs.size() = " << numExportLIDs
2848 <<
" != numPacketsPerLID.size() = " << numPacketsPerLID.size ()
2861 constantNumPackets = 0;
2866 size_t totalNumBytes = 0;
2867 size_t totalNumEntries = 0;
2868 size_t maxRowLength = 0;
2869 for (size_type i = 0; i < numExportLIDs; ++i) {
2870 const LO lclRow = exportLIDs[i];
2878 if (numEnt == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2881 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2885 size_t numBytesPerValue = 0;
2891 Scalar* valsRaw = NULL;
2892 const LO* lidsRaw = NULL;
2893 LO actualNumEnt = 0;
2895 src->getLocalRowView (lclRow, lidsRaw, valsRaw, actualNumEnt);
2897 if (numEnt != static_cast<size_t> (actualNumEnt)) {
2898 std::ostream& err = this->markLocalErrorAndGetStream ();
2899 err <<
"packAndPrepare: Local row " << i <<
" claims to have " <<
2900 numEnt <<
"entry/ies, but the View returned by getLocalRowView() " 2901 "has " << actualNumEnt <<
" entry/ies. This should never happen. " 2902 "Please report this bug to the Tpetra developers." << endl;
2905 if (errCode == Teuchos::OrdinalTraits<LO>::invalid ()) {
2906 std::ostream& err = this->markLocalErrorAndGetStream ();
2907 err <<
"packAndPrepare: Local row " << i <<
" is not in the row Map " 2908 "of the source object on the calling process." << endl;
2912 const ST* valsRawST =
2913 const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2914 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2919 numBytesPerValue = PackTraits<ST, HES>::packValueCount (vals(0));
2922 const size_t numBytes =
2923 packRowCount<LO, GO, HES> (numEnt, numBytesPerValue, blockSize);
2924 numPacketsPerLID[i] = numBytes;
2925 totalNumBytes += numBytes;
2926 totalNumEntries += numEnt;
2927 maxRowLength = std::max (maxRowLength, numEnt);
2931 const int myRank = graph_.
getComm ()->getRank ();
2932 std::ostringstream os;
2933 os <<
"Proc " << myRank <<
": packAndPrepare: totalNumBytes = " 2934 << totalNumBytes << endl;
2935 std::cerr << os.str ();
2941 exports.resize (totalNumBytes);
2942 if (totalNumEntries > 0) {
2943 View<char*, HES, MemoryUnmanaged> exportsK (exports.getRawPtr (),
2958 View<GO*, HES> gblColInds;
2961 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowLength,
"gids");
2965 for (size_type i = 0; i < numExportLIDs; ++i) {
2966 const LO lclRowInd = exportLIDs[i];
2967 const LO* lclColIndsRaw;
2973 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
2974 const size_t numEnt =
static_cast<size_t> (numEntLO);
2975 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2976 View<const LO*, HES, MemoryUnmanaged> lclColInds (lclColIndsRaw, numEnt);
2977 const ST* valsRawST =
const_cast<const ST*
> (
reinterpret_cast<ST*
> (valsRaw));
2978 View<const ST*, HES, MemoryUnmanaged> vals (valsRawST, numScalarEnt);
2983 const size_t numBytesPerValue = numEnt == 0 ?
2984 static_cast<size_t> (0) :
2985 PackTraits<ST, HES>::packValueCount (vals(0));
2988 for (
size_t j = 0; j < numEnt; ++j) {
2993 const size_t numBytes =
2994 packRowForBlockCrs<ST, LO, GO, HES> (exportsK, offset, numEnt, gblColInds,
2995 vals, numBytesPerValue, blockSize);
3000 if (offset != totalNumBytes) {
3001 std::ostream& err = this->markLocalErrorAndGetStream ();
3002 err <<
"packAndPreapre: At end of method, the final offset (in bytes) " 3003 << offset <<
" does not equal the total number of bytes packed " 3004 << totalNumBytes <<
". " 3005 <<
"Please report this bug to the Tpetra developers." << endl;
3011 std::ostringstream os;
3012 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3013 const bool lclSuccess = ! (* (this->localError_));
3014 os <<
"*** Proc " << myRank <<
": packAndPrepare " 3015 << (lclSuccess ?
"succeeded" :
"FAILED")
3016 <<
" (totalNumEntries = " << totalNumEntries <<
") ***" << endl;
3017 std::cerr << os.str ();
3022 template<
class Scalar,
class LO,
class GO,
class Node>
3026 const Teuchos::ArrayView<const packet_type>& imports,
3027 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
3033 using ::Tpetra::Details::PackTraits;
3034 using Kokkos::MemoryUnmanaged;
3035 using Kokkos::subview;
3037 typedef typename ::Tpetra::MultiVector<Scalar, LO, GO, Node>::impl_scalar_type ST;
3038 typedef typename Teuchos::ArrayView<const LO>::size_type size_type;
3039 typedef typename View<int*, device_type>::HostMirror::execution_space HES;
3040 typedef std::pair<typename View<int*, HES>::size_type,
3041 typename View<int*, HES>::size_type> pair_type;
3042 typedef View<GO*, HES, MemoryUnmanaged> gids_out_type;
3043 typedef View<LO*, HES, MemoryUnmanaged> lids_out_type;
3044 typedef View<ST*, HES, MemoryUnmanaged> vals_out_type;
3045 typedef typename PackTraits<GO, HES>::input_buffer_type input_buffer_type;
3046 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::unpackAndCombine: ";
3047 const bool debug =
false;
3050 std::ostringstream os;
3051 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3052 os <<
"Proc " << myRank <<
": unpackAndCombine" << endl;
3053 std::cerr << os.str ();
3059 if (* (this->localError_)) {
3060 std::ostream& err = this->markLocalErrorAndGetStream ();
3061 err << prefix <<
"The target object of the Import or Export is " 3062 "already in an error state." << endl;
3065 if (importLIDs.size () != numPacketsPerLID.size ()) {
3066 std::ostream& err = this->markLocalErrorAndGetStream ();
3067 err << prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 3068 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
"." << endl;
3072 std::ostream& err = this->markLocalErrorAndGetStream ();
3073 err << prefix <<
"Invalid CombineMode value " << CM <<
". Valid " 3074 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO." 3084 const size_type numImportLIDs = importLIDs.size ();
3085 if (CM ==
ZERO || numImportLIDs == 0) {
3087 std::ostringstream os;
3088 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3089 os <<
"Proc " << myRank <<
": unpackAndCombine: Nothing to do" << endl;
3090 std::cerr << os.str ();
3096 std::ostringstream os;
3097 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3098 os <<
"Proc " << myRank <<
": unpackAndCombine: Getting ready" << endl;
3099 std::cerr << os.str ();
3102 input_buffer_type importsK (imports.getRawPtr (), imports.size ());
3105 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3108 size_t numBytesPerValue;
3115 if (this->val_.h_view.extent (0) != 0) {
3116 const ST& val = this->val_.h_view[0];
3117 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
3127 numBytesPerValue = PackTraits<ST, HES>::packValueCount (val);
3134 View<GO*, HES> gblColInds;
3135 View<LO*, HES> lclColInds;
3136 View<ST*, HES> vals;
3150 gblColInds = PackTraits<GO, HES>::allocateArray (gid, maxRowNumEnt,
"gids");
3151 lclColInds = PackTraits<LO, HES>::allocateArray (lid, maxRowNumEnt,
"lids");
3152 vals = PackTraits<ST, HES>::allocateArray (val, maxRowNumScalarEnt,
"vals");
3156 bool errorDuringUnpack =
false;
3157 for (size_type i = 0; i < numImportLIDs; ++i) {
3158 const size_t numBytes = numPacketsPerLID[i];
3159 if (numBytes == 0) {
3162 const size_t numEnt =
3163 unpackRowCount<ST, LO, GO, HES> (importsK, offset, numBytes,
3165 if (numEnt > maxRowNumEnt) {
3166 errorDuringUnpack =
true;
3167 #ifdef HAVE_TPETRA_DEBUG 3168 std::ostream& err = this->markLocalErrorAndGetStream ();
3169 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3170 <<
" > maxRowNumEnt = " << maxRowNumEnt << endl;
3171 #endif // HAVE_TPETRA_DEBUG 3175 const size_t numScalarEnt = numEnt * blockSize * blockSize;
3176 const LO lclRow = importLIDs[i];
3178 gids_out_type gidsOut = subview (gblColInds, pair_type (0, numEnt));
3179 vals_out_type valsOut = subview (vals, pair_type (0, numScalarEnt));
3181 const size_t numBytesOut =
3182 unpackRowForBlockCrs<ST, LO, GO, HES> (gidsOut, valsOut, importsK,
3183 offset, numBytes, numEnt,
3184 numBytesPerValue, blockSize);
3185 if (numBytes != numBytesOut) {
3186 errorDuringUnpack =
true;
3187 #ifdef HAVE_TPETRA_DEBUG 3188 std::ostream& err = this->markLocalErrorAndGetStream ();
3189 err << prefix <<
"At i = " << i <<
", numBytes = " << numBytes
3190 <<
" != numBytesOut = " << numBytesOut <<
".";
3191 #endif // HAVE_TPETRA_DEBUG 3196 lids_out_type lidsOut = subview (lclColInds, pair_type (0, numEnt));
3197 for (
size_t k = 0; k < numEnt; ++k) {
3199 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3200 errorDuringUnpack =
true;
3201 #ifdef HAVE_TPETRA_DEBUG 3202 std::ostream& err = this->markLocalErrorAndGetStream ();
3203 err << prefix <<
"At i = " << i <<
", GID " << gidsOut(k)
3204 <<
" is not owned by the calling process.";
3205 #endif // HAVE_TPETRA_DEBUG 3212 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
3213 const Scalar*
const valsRaw =
3214 reinterpret_cast<const Scalar*
> (
const_cast<const ST*
> (valsOut.data ()));
3219 }
else if (CM ==
ABSMAX) {
3223 if (static_cast<LO> (numEnt) != numCombd) {
3224 errorDuringUnpack =
true;
3225 #ifdef HAVE_TPETRA_DEBUG 3226 std::ostream& err = this->markLocalErrorAndGetStream ();
3227 err << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3228 <<
" != numCombd = " << numCombd <<
".";
3229 #endif // HAVE_TPETRA_DEBUG 3237 if (errorDuringUnpack) {
3238 std::ostream& err = this->markLocalErrorAndGetStream ();
3239 err << prefix <<
"Unpacking failed.";
3240 #ifndef HAVE_TPETRA_DEBUG 3241 err <<
" Please run again with a debug build to get more verbose " 3242 "diagnostic output.";
3243 #endif // ! HAVE_TPETRA_DEBUG 3248 std::ostringstream os;
3249 const int myRank = this->graph_.
getRowMap ()->getComm ()->getRank ();
3250 const bool lclSuccess = ! (* (this->localError_));
3251 os <<
"*** Proc " << myRank <<
": unpackAndCombine " 3252 << (lclSuccess ?
"succeeded" :
"FAILED")
3254 std::cerr << os.str ();
3259 template<
class Scalar,
class LO,
class GO,
class Node>
3263 using Teuchos::TypeNameTraits;
3264 std::ostringstream os;
3265 os <<
"\"Tpetra::BlockCrsMatrix\": { " 3266 <<
"Template parameters: { " 3267 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3268 <<
"LO: " << TypeNameTraits<LO>::name ()
3269 <<
"GO: " << TypeNameTraits<GO>::name ()
3270 <<
"Node: " << TypeNameTraits<Node>::name ()
3272 <<
", Label: \"" << this->getObjectLabel () <<
"\"" 3273 <<
", Global dimensions: [" 3274 << graph_.
getDomainMap ()->getGlobalNumElements () <<
", " 3275 << graph_.
getRangeMap ()->getGlobalNumElements () <<
"]" 3282 template<
class Scalar,
class LO,
class GO,
class Node>
3286 const Teuchos::EVerbosityLevel verbLevel)
const 3288 using Teuchos::ArrayRCP;
3289 using Teuchos::CommRequest;
3290 using Teuchos::FancyOStream;
3291 using Teuchos::getFancyOStream;
3292 using Teuchos::ireceive;
3293 using Teuchos::isend;
3294 using Teuchos::outArg;
3295 using Teuchos::TypeNameTraits;
3296 using Teuchos::VERB_DEFAULT;
3297 using Teuchos::VERB_NONE;
3298 using Teuchos::VERB_LOW;
3299 using Teuchos::VERB_MEDIUM;
3301 using Teuchos::VERB_EXTREME;
3303 using Teuchos::wait;
3305 #ifdef HAVE_TPETRA_DEBUG 3306 const char prefix[] =
"Tpetra::Experimental::BlockCrsMatrix::describe: ";
3307 #endif // HAVE_TPETRA_DEBUG 3310 const Teuchos::EVerbosityLevel vl =
3311 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3313 if (vl == VERB_NONE) {
3318 Teuchos::OSTab tab0 (out);
3320 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3321 Teuchos::OSTab tab1 (out);
3323 out <<
"Template parameters:" << endl;
3325 Teuchos::OSTab tab2 (out);
3326 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3327 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3328 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3329 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3331 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3332 <<
"Global dimensions: [" 3333 << graph_.
getDomainMap ()->getGlobalNumElements () <<
", " 3334 << graph_.
getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3337 out <<
"Block size: " << blockSize << endl;
3340 if (vl >= VERB_MEDIUM) {
3341 const Teuchos::Comm<int>& comm = * (graph_.
getMap ()->getComm ());
3342 const int myRank = comm.getRank ();
3344 out <<
"Row Map:" << endl;
3351 out <<
"Column Map: same as row Map" << endl;
3356 out <<
"Column Map:" << endl;
3364 out <<
"Domain Map: same as row Map" << endl;
3369 out <<
"Domain Map: same as column Map" << endl;
3374 out <<
"Domain Map:" << endl;
3382 out <<
"Range Map: same as domain Map" << endl;
3387 out <<
"Range Map: same as row Map" << endl;
3392 out <<
"Range Map: " << endl;
3399 if (vl >= VERB_EXTREME) {
3405 const_cast<this_type*
> (
this)->
template sync<Kokkos::HostSpace> ();
3407 #ifdef HAVE_TPETRA_DEBUG 3408 TEUCHOS_TEST_FOR_EXCEPTION
3409 (this->
template need_sync<Kokkos::HostSpace> (), std::logic_error,
3410 prefix <<
"Right after sync to host, the matrix claims that it needs " 3411 "sync to host. Please report this bug to the Tpetra developers.");
3412 #endif // HAVE_TPETRA_DEBUG 3414 const Teuchos::Comm<int>& comm = * (graph_.
getMap ()->getComm ());
3415 const int myRank = comm.getRank ();
3416 const int numProcs = comm.getSize ();
3419 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3420 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3421 FancyOStream& os = *osPtr;
3422 os <<
"Process " << myRank <<
":" << endl;
3423 Teuchos::OSTab tab2 (os);
3431 os <<
"Row " << meshGblRow <<
": {";
3433 const LO* lclColInds = NULL;
3434 Scalar* vals = NULL;
3438 for (LO k = 0; k < numInds; ++k) {
3441 os <<
"Col " << gblCol <<
": [";
3442 for (LO i = 0; i < blockSize; ++i) {
3443 for (LO j = 0; j < blockSize; ++j) {
3444 os << vals[blockSize*blockSize*k + i*blockSize + j];
3445 if (j + 1 < blockSize) {
3449 if (i + 1 < blockSize) {
3454 if (k + 1 < numInds) {
3464 out << lclOutStrPtr->str ();
3465 lclOutStrPtr = Teuchos::null;
3468 const int sizeTag = 1337;
3469 const int dataTag = 1338;
3471 ArrayRCP<char> recvDataBuf;
3475 for (
int p = 1; p < numProcs; ++p) {
3478 ArrayRCP<size_t> recvSize (1);
3480 RCP<CommRequest<int> > recvSizeReq =
3481 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3482 wait<int> (comm, outArg (recvSizeReq));
3483 const size_t numCharsToRecv = recvSize[0];
3490 if (static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3491 recvDataBuf.resize (numCharsToRecv + 1);
3493 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3495 RCP<CommRequest<int> > recvDataReq =
3496 ireceive<int, char> (recvData, p, dataTag, comm);
3497 wait<int> (comm, outArg (recvDataReq));
3502 recvDataBuf[numCharsToRecv] =
'\0';
3503 out << recvDataBuf.getRawPtr ();
3505 else if (myRank == p) {
3509 const std::string stringToSend = lclOutStrPtr->str ();
3510 lclOutStrPtr = Teuchos::null;
3513 const size_t numCharsToSend = stringToSend.size ();
3514 ArrayRCP<size_t> sendSize (1);
3515 sendSize[0] = numCharsToSend;
3516 RCP<CommRequest<int> > sendSizeReq =
3517 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3518 wait<int> (comm, outArg (sendSizeReq));
3526 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3527 RCP<CommRequest<int> > sendDataReq =
3528 isend<int, char> (sendData, 0, dataTag, comm);
3529 wait<int> (comm, outArg (sendDataReq));
3535 template<
class Scalar,
class LO,
class GO,
class Node>
3536 Teuchos::RCP<const Teuchos::Comm<int> >
3543 template<
class Scalar,
class LO,
class GO,
class Node>
3552 template<
class Scalar,
class LO,
class GO,
class Node>
3560 template<
class Scalar,
class LO,
class GO,
class Node>
3568 template<
class Scalar,
class LO,
class GO,
class Node>
3576 template<
class Scalar,
class LO,
class GO,
class Node>
3584 template<
class Scalar,
class LO,
class GO,
class Node>
3592 template<
class Scalar,
class LO,
class GO,
class Node>
3600 template<
class Scalar,
class LO,
class GO,
class Node>
3606 return dynamic_cast<const HDM&
> (this->graph_).getGlobalNumDiagsImpl ();
3609 template<
class Scalar,
class LO,
class GO,
class Node>
3615 return dynamic_cast<const HDM&
> (this->graph_).getNodeNumDiagsImpl ();
3618 template<
class Scalar,
class LO,
class GO,
class Node>
3626 template<
class Scalar,
class LO,
class GO,
class Node>
3634 template<
class Scalar,
class LO,
class GO,
class Node>
3640 return dynamic_cast<const HDM&
> (this->graph_).isLowerTriangularImpl ();
3643 template<
class Scalar,
class LO,
class GO,
class Node>
3649 return dynamic_cast<const HDM&
> (this->graph_).isUpperTriangularImpl ();
3652 template<
class Scalar,
class LO,
class GO,
class Node>
3660 template<
class Scalar,
class LO,
class GO,
class Node>
3668 template<
class Scalar,
class LO,
class GO,
class Node>
3676 template<
class Scalar,
class LO,
class GO,
class Node>
3685 template<
class Scalar,
class LO,
class GO,
class Node>
3689 const Teuchos::ArrayView<GO> &Indices,
3690 const Teuchos::ArrayView<Scalar> &Values,
3691 size_t &NumEntries)
const 3693 TEUCHOS_TEST_FOR_EXCEPTION(
3694 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowCopy: " 3695 "This class doesn't support global matrix indexing.");
3699 template<
class Scalar,
class LO,
class GO,
class Node>
3703 Teuchos::ArrayView<const GO> &indices,
3704 Teuchos::ArrayView<const Scalar> &values)
const 3706 TEUCHOS_TEST_FOR_EXCEPTION(
3707 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getGlobalRowView: " 3708 "This class doesn't support global matrix indexing.");
3712 template<
class Scalar,
class LO,
class GO,
class Node>
3716 Teuchos::ArrayView<const LO>& indices,
3717 Teuchos::ArrayView<const Scalar>& values)
const 3719 TEUCHOS_TEST_FOR_EXCEPTION(
3720 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getLocalRowView: " 3721 "This class doesn't support local matrix indexing.");
3724 template<
class Scalar,
class LO,
class GO,
class Node>
3729 #ifdef HAVE_TPETRA_DEBUG 3730 const char prefix[] =
3731 "Tpetra::Experimental::BlockCrsMatrix::getLocalDiagCopy: ";
3732 #endif // HAVE_TPETRA_DEBUG 3736 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3740 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3743 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3745 #ifdef HAVE_TPETRA_DEBUG 3746 TEUCHOS_TEST_FOR_EXCEPTION
3747 (this->
template need_sync<Kokkos::HostSpace> (), std::runtime_error,
3748 prefix <<
"The matrix's data were last modified on device, but have " 3749 "not been sync'd to host. Please sync to host (by calling " 3750 "sync<Kokkos::HostSpace>() on this matrix) before calling this " 3752 #endif // HAVE_TPETRA_DEBUG 3758 auto vals_host_out =
3759 const_cast<this_type*
> (
this)->
template getValues<Kokkos::HostSpace> ();
3760 Scalar* vals_host_out_raw =
3761 reinterpret_cast<Scalar*
> (vals_host_out.data ());
3764 size_t rowOffset = 0;
3770 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3771 for(
int b=0; b<bs; b++)
3779 diag.template sync<memory_space> ();
3782 template<
class Scalar,
class LO,
class GO,
class Node>
3787 TEUCHOS_TEST_FOR_EXCEPTION(
3788 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::leftScale: " 3789 "not implemented.");
3793 template<
class Scalar,
class LO,
class GO,
class Node>
3798 TEUCHOS_TEST_FOR_EXCEPTION(
3799 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::rightScale: " 3800 "not implemented.");
3804 template<
class Scalar,
class LO,
class GO,
class Node>
3805 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3812 template<
class Scalar,
class LO,
class GO,
class Node>
3813 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3817 TEUCHOS_TEST_FOR_EXCEPTION(
3818 true, std::logic_error,
"Tpetra::Experimental::BlockCrsMatrix::getFrobeniusNorm: " 3819 "not implemented.");
3831 #define TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \ 3832 namespace Experimental { \ 3833 namespace Classes { \ 3834 template class BlockCrsMatrix< S, LO, GO, NODE >; \ 3838 #endif // TPETRA_EXPERIMENTAL_BLOCKCRSMATRIX_DEF_HPP Base class for distributed Tpetra objects that support data redistribution.
virtual void rightScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the right with the given Vector x.
virtual size_t getNumEntriesInGlobalRow(GO globalRow) const
The current number of entries on the calling process in the specified global row. ...
global_size_t getGlobalNumCols() const override
Returns the number of global columns in the graph.
size_t getNodeMaxNumRowEntries() const
Maximum number of entries in any row of the matrix, on this process.
mv_type getMultiVectorView() const
Get a Tpetra::MultiVector that views this BlockMultiVector's data.
void setAllToScalar(const Scalar &alpha)
Set all matrix entries equal to alpha.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
virtual size_t TPETRA_DEPRECATED getNodeNumDiags() const
Number of diagonal entries in the matrix's graph, on the calling process.
LO getNumVectors() const
Get the number of columns (vectors) in the BlockMultiVector.
virtual Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which this matrix is distributed.
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
KOKKOS_INLINE_FUNCTION void AXPY(const CoefficientType &alpha, const ViewType1 &x, const ViewType2 &y)
y := y + alpha * x (dense vector or matrix update)
LocalOrdinal getMinLocalIndex() const
The minimum local index.
KOKKOS_INLINE_FUNCTION void GEMV(const CoeffType &alpha, const BlkType &A, const VecType1 &x, const VecType2 &y)
y := y + alpha * A * x (dense matrix-vector multiply)
Teuchos::RCP< const map_type > getColMap() const
get the (mesh) map for the columns of this block matrix.
LO absMaxLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValuesByOffsets, but for the ABSMAX combine mode.
KOKKOS_INLINE_FUNCTION void SCAL(const CoefficientType &alpha, const ViewType &x)
x := alpha*x, where x is either rank 1 (a vector) or rank 2 (a matrix).
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the matrix.
void replaceLocalValue(const LocalOrdinal myRow, const Scalar &value) const
Replace current value at the specified location with specified values.
device_type::execution_space execution_space
The Kokkos execution space that this class uses.
virtual bool supportsRowViews() const
Whether this object implements getLocalRowView() and getGlobalRowView().
size_t getNodeNumRows() const override
Returns the number of graph rows owned on the calling node.
local_graph_type getLocalGraph() const
Get the local graph.
A distributed dense vector.
void getLocalDiagCopy(const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &diag, const Kokkos::View< const size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Variant of getLocalDiagCopy() that uses precomputed offsets and puts diagonal blocks in a 3-D Kokkos:...
void putScalar(const Scalar &val)
Fill all entries with the given value val.
void reorderedGaussSeidelCopy(::Tpetra::MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Teuchos::ArrayView< LO > &rowIndices, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of reorderedGaussSeidel(), with fewer requirements on X.
Teuchos::RCP< const map_type > getRangeMap() const override
Returns the Map associated with the domain of this graph.
LO sumIntoLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like sumIntoLocalValues, but avoids computing row offsets.
Teuchos::RCP< const map_type > getDomainMap() const override
Returns the Map associated with the domain of this graph.
LocalOrdinal getMaxLocalIndex() const
The maximum local index on the calling process.
LO sumIntoLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Sum into values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
LO getLocalRowOffsets(const LO localRowInd, ptrdiff_t offsets[], const LO colInds[], const LO numColInds) const
Get relative offsets corresponding to the given rows, given by local row index.
std::string description() const
One-line description of this object.
One or more distributed dense vectors.
void localGaussSeidel(const BlockMultiVector< Scalar, LO, GO, Node > &Residual, BlockMultiVector< Scalar, LO, GO, Node > &Solution, const Kokkos::View< impl_scalar_type ***, device_type, Kokkos::MemoryUnmanaged > &D_inv, const Scalar &omega, const ESweepDirection direction) const
Local Gauss-Seidel solve, given a factorized diagonal.
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
virtual global_size_t getGlobalNumCols() const
The global number of columns of this matrix.
size_t getNodeNumEntries() const override
The local number of entries in the graph.
virtual bool isGloballyIndexed() const
Whether matrix indices are globally indexed.
MultiVector for multiple degrees of freedom per mesh point.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
size_t getNodeNumRows() const
get the local number of block rows
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
Sparse matrix whose entries are small dense square blocks, all of the same dimensions.
KOKKOS_INLINE_FUNCTION void FILL(const ViewType &x, const InputType &val)
Set every entry of x to val.
virtual bool isLocallyIndexed() const
Whether matrix indices are locally indexed.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
size_t getNumEntriesInGlobalRow(GlobalOrdinal globalRow) const override
Returns the current number of entries on this node in the specified global row.
std::string errorMessages() const
The current stream of error messages.
int local_ordinal_type
Default value of Scalar template parameter.
virtual bool TPETRA_DEPRECATED isUpperTriangular() const
Whether the matrix's graph is locally upper triangular.
virtual void leftScale(const ::Tpetra::Vector< Scalar, LO, GO, Node > &x)
Scale the RowMatrix on the left with the given Vector x.
little_vec_type::HostMirror getLocalBlock(const LO localRowIndex, const LO colIndex) const
Get a host view of the degrees of freedom at the given mesh point.
Teuchos::RCP< const map_type > getRangeMap() const
Get the (point) range Map of this matrix.
bool hasColMap() const override
Whether the graph has a column Map.
global_size_t getGlobalNumRows() const
get the global number of block rows
size_t global_size_t
Global size_t object.
size_t getGlobalMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, over all processes in the graph's communicator...
GlobalOrdinal getIndexBase() const override
Returns the index base for global indices for this graph.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
LO getBlockSize() const
Get the number of degrees of freedom per mesh point.
virtual size_t getNodeNumEntries() const
The local number of stored (structurally nonzero) entries.
virtual bool hasColMap() const
Whether this matrix has a well-defined column map.
size_t getNodeNumCols() const override
Returns the number of columns connected to the locally owned rows of this graph.
LO replaceLocalValuesByOffsets(const LO localRowInd, const ptrdiff_t offsets[], const Scalar vals[], const LO numOffsets) const
Like replaceLocalValues, but avoids computing row offsets.
Insert new values that don't currently exist.
KOKKOS_INLINE_FUNCTION void COPY(const ViewType1 &x, const ViewType2 &y)
Deep copy x into y, where x and y are either rank 1 (vectors) or rank 2 (matrices) with the same dime...
ESweepDirection
Sweep direction for Gauss-Seidel or Successive Over-Relaxation (SOR).
Kokkos::View< impl_scalar_type *, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_vec_type
The type used to access nonconst vector blocks.
global_size_t getGlobalNumEntries() const override
Returns the global number of entries in the graph.
bool isSorted() const
Whether graph indices in all rows are known to be sorted.
static map_type makePointMap(const map_type &meshMap, const LO blockSize)
Create and return the point Map corresponding to the given mesh Map and block size.
void getLocalDiagOffsets(const Kokkos::View< size_t *, device_type, Kokkos::MemoryUnmanaged > &offsets) const
Get offsets of the diagonal entries in the graph.
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
virtual global_size_t TPETRA_DEPRECATED getGlobalNumDiags() const
Number of diagonal entries in the matrix's graph, over all processes in the matrix's communicator...
Sum new values into existing values.
bool locallySameAs(const Map< LocalOrdinal, GlobalOrdinal, node_type > &map) const
Is this Map locally the same as the input Map?
bool isGloballyIndexed() const override
If graph indices are in the global range, this function returns true. Otherwise, this function return...
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
void getLocalRowCopy(LO LocalRow, const Teuchos::ArrayView< LO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Not implemented.
virtual bool isFillComplete() const
Whether fillComplete() has been called.
Replace old value with maximum of magnitudes of old and new values.
virtual size_t getGlobalMaxNumRowEntries() const
The maximum number of entries across all rows/columns on all nodes.
virtual Teuchos::RCP< const ::Tpetra::RowGraph< LO, GO, Node > > getGraph() const
Get the (mesh) graph.
Replace existing values with new values.
Replace old values with zero.
Teuchos::RCP< const map_type > getRowMap() const
get the (mesh) map for the rows of this block matrix.
LO getLocalRowView(const LO localRowInd, const LO *&colInds, Scalar *&vals, LO &numInds) const
Get a view of the (mesh, i.e., block) row, using local (mesh, i.e., block) indices.
virtual bool TPETRA_DEPRECATED isLowerTriangular() const
Whether the matrix's graph is locally lower triangular.
virtual void getGlobalRowView(GO GlobalRow, Teuchos::ArrayView< const GO > &indices, Teuchos::ArrayView< const Scalar > &values) const
Get a constant, nonpersisting, globally indexed view of the given row of the matrix.
Node::device_type device_type
The Kokkos::Device specialization that this class uses.
void apply(const mv_type &X, mv_type &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), Scalar beta=Teuchos::ScalarTraits< Scalar >::zero()) const
For this matrix A, compute Y := beta * Y + alpha * Op(A) * X.
LO getBlockSize() const
The number of degrees of freedom per mesh point.
Teuchos::RCP< node_type > getNode() const override
Returns the underlying node.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
size_t getNumEntriesInLocalRow(LocalOrdinal localRow) const override
Get the number of entries in the given row (local index).
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
virtual GO getIndexBase() const
The index base for global indices in this matrix.
virtual global_size_t getGlobalNumEntries() const
The global number of stored (structurally nonzero) entries.
LO replaceLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Replace values at the given (mesh, i.e., block) column indices, in the given (mesh, i.e., block) row.
bool isLocallyIndexed() const override
If graph indices are in the local range, this function returns true. Otherwise, this function returns...
BlockCrsMatrix()
Default constructor: Makes an empty block matrix.
bool isFillComplete() const override
Returns true if fillComplete() has been called and the graph is in compute mode.
global_size_t getGlobalNumRows() const override
Returns the number of global rows in the graph.
device_type::memory_space memory_space
The Kokkos memory space that this class uses.
Teuchos::RCP< const map_type > getDomainMap() const
Get the (point) domain Map of this matrix.
A parallel distribution of indices over processes.
virtual typename ::Tpetra::RowMatrix< Scalar, LO, GO, Node >::mag_type getFrobeniusNorm() const
The Frobenius norm of the matrix.
size_t getNumEntriesInLocalRow(const LO localRowInd) const
Return the number of entries in the given row on the calling process.
virtual Teuchos::RCP< Node > getNode() const
The Kokkos Node instance.
Teuchos::RCP< const export_type > getExporter() const override
Returns the exporter associated with this graph.
Teuchos::RCP< const import_type > getImporter() const override
Returns the importer associated with this graph.
void applyBlock(const BlockMultiVector< Scalar, LO, GO, Node > &X, BlockMultiVector< Scalar, LO, GO, Node > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, const Scalar alpha=Teuchos::ScalarTraits< Scalar >::one(), const Scalar beta=Teuchos::ScalarTraits< Scalar >::zero())
Version of apply() that takes BlockMultiVector input and output.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the graph, on this process.
Declaration of Tpetra::Experimental::BlockCrsMatrix.
Teuchos::RCP< const map_type > getRowMap() const override
Returns the Map that describes the row distribution in this graph.
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel) const
Print a description of this object to the given output stream.
virtual size_t getNodeNumCols() const
The number of columns needed to apply the forward operator on this node.
Mix-in to avoid spurious deprecation warnings due to #2630.
void gaussSeidelCopy(MultiVector< Scalar, LO, GO, Node > &X, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &B, const ::Tpetra::MultiVector< Scalar, LO, GO, Node > &D, const Scalar &dampingFactor, const ESweepDirection direction, const int numSweeps, const bool zeroInitialGuess) const
Version of gaussSeidel(), with fewer requirements on X.
Kokkos::View< impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > little_block_type
The type used to access nonconst matrix blocks.
void scale(const Scalar &val)
Multiply all entries in place by the given value val.
virtual void getGlobalRowCopy(GO GlobalRow, const Teuchos::ArrayView< GO > &Indices, const Teuchos::ArrayView< Scalar > &Values, size_t &NumEntries) const
Get a copy of the given global row's entries.
LO absMaxLocalValues(const LO localRowInd, const LO colInds[], const Scalar vals[], const LO numColInds) const
Like sumIntoLocalValues, but for the ABSMAX combine mode.
Kokkos::View< const impl_scalar_type **, Kokkos::LayoutRight, device_type, Kokkos::MemoryTraits< Kokkos::Unmanaged > > const_little_block_type
The type used to access const matrix blocks.
typename BMV::impl_scalar_type impl_scalar_type
The implementation type of entries in the matrix.