42 #ifndef TPETRA_BLOCKCRSMATRIX_DEF_HPP
43 #define TPETRA_BLOCKCRSMATRIX_DEF_HPP
51 #include "Tpetra_BlockMultiVector.hpp"
54 #include "Teuchos_TimeMonitor.hpp"
55 #ifdef HAVE_TPETRA_DEBUG
57 #endif // HAVE_TPETRA_DEBUG
71 #if defined(__CUDACC__)
73 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
74 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
75 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
77 #elif defined(__GNUC__)
79 # if defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
80 # undef TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA
81 # endif // defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
83 #else // some other compiler
86 # if ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
87 # define TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA 1
88 # endif // ! defined(TPETRA_BLOCKCRSMATRIX_APPLY_USE_LAMBDA)
89 #endif // defined(__CUDACC__), defined(__GNUC__)
97 struct BlockCrsRowStruct {
98 T totalNumEntries, totalNumBytes, maxRowLength;
99 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct() =
default;
100 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const BlockCrsRowStruct &b) =
default;
101 KOKKOS_INLINE_FUNCTION BlockCrsRowStruct(
const T& numEnt,
const T& numBytes,
const T& rowLength)
102 : totalNumEntries(numEnt), totalNumBytes(numBytes), maxRowLength(rowLength) {}
107 KOKKOS_INLINE_FUNCTION
108 void operator+=(
volatile BlockCrsRowStruct<T> &a,
109 volatile const BlockCrsRowStruct<T> &b) {
110 a.totalNumEntries += b.totalNumEntries;
111 a.totalNumBytes += b.totalNumBytes;
112 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
117 KOKKOS_INLINE_FUNCTION
118 void operator+=(BlockCrsRowStruct<T> &a,
const BlockCrsRowStruct<T> &b) {
119 a.totalNumEntries += b.totalNumEntries;
120 a.totalNumBytes += b.totalNumBytes;
121 a.maxRowLength = a.maxRowLength > b.maxRowLength ? a.maxRowLength : b.maxRowLength;
124 template<
typename T,
typename ExecSpace>
125 struct BlockCrsReducer {
126 typedef BlockCrsReducer reducer;
127 typedef T value_type;
128 typedef Kokkos::View<value_type,ExecSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged> > result_view_type;
131 KOKKOS_INLINE_FUNCTION
132 BlockCrsReducer(value_type &val) : value(&val) {}
134 KOKKOS_INLINE_FUNCTION
void join(value_type &dst, value_type &src)
const { dst += src; }
135 KOKKOS_INLINE_FUNCTION
void join(
volatile value_type &dst,
const volatile value_type &src)
const { dst += src; }
136 KOKKOS_INLINE_FUNCTION
void init(value_type &val)
const { val = value_type(); }
137 KOKKOS_INLINE_FUNCTION value_type& reference() {
return *value; }
138 KOKKOS_INLINE_FUNCTION result_view_type view()
const {
return result_view_type(value); }
141 template<
class AlphaCoeffType,
143 class MatrixValuesType,
148 class BcrsApplyNoTransFunctor {
150 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
151 "MatrixValuesType must be a Kokkos::View.");
152 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
153 "OutVecType must be a Kokkos::View.");
154 static_assert (Kokkos::Impl::is_view<InVecType>::value,
155 "InVecType must be a Kokkos::View.");
156 static_assert (std::is_same<MatrixValuesType,
157 typename MatrixValuesType::const_type>::value,
158 "MatrixValuesType must be a const Kokkos::View.");
159 static_assert (std::is_same<OutVecType,
160 typename OutVecType::non_const_type>::value,
161 "OutVecType must be a nonconst Kokkos::View.");
162 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
163 "InVecType must be a const Kokkos::View.");
164 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
165 "MatrixValuesType must be a rank-1 Kokkos::View.");
166 static_assert (
static_cast<int> (InVecType::rank) == 1,
167 "InVecType must be a rank-1 Kokkos::View.");
168 static_assert (
static_cast<int> (OutVecType::rank) == 1,
169 "OutVecType must be a rank-1 Kokkos::View.");
170 typedef typename MatrixValuesType::non_const_value_type scalar_type;
173 typedef typename GraphType::device_type device_type;
176 typedef typename std::remove_const<typename GraphType::data_type>::type
185 void setX (
const InVecType& X) { X_ = X; }
193 void setY (
const OutVecType& Y) { Y_ = Y; }
195 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
196 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
199 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
200 const GraphType& graph,
201 const MatrixValuesType& val,
202 const local_ordinal_type blockSize,
204 const beta_coeff_type& beta,
205 const OutVecType& Y) :
207 ptr_ (graph.row_map),
208 ind_ (graph.entries),
210 blockSize_ (blockSize),
217 KOKKOS_INLINE_FUNCTION
void
218 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
220 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
224 KOKKOS_INLINE_FUNCTION
void
225 operator () (
const local_ordinal_type& lclRow)
const
231 using Kokkos::Details::ArithTraits;
237 using Kokkos::parallel_for;
238 using Kokkos::subview;
239 typedef typename decltype (ptr_)::non_const_value_type offset_type;
240 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
243 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
246 const offset_type Y_ptBeg = lclRow * blockSize_;
247 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
248 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
252 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
253 FILL (Y_cur, ArithTraits<beta_coeff_type>::zero ());
255 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
259 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
260 const offset_type blkBeg = ptr_[lclRow];
261 const offset_type blkEnd = ptr_[lclRow+1];
263 const offset_type bs2 = blockSize_ * blockSize_;
264 for (offset_type absBlkOff = blkBeg; absBlkOff < blkEnd;
266 little_block_type A_cur (val_.data () + absBlkOff * bs2,
267 blockSize_, blockSize_);
268 const offset_type X_blkCol = ind_[absBlkOff];
269 const offset_type X_ptBeg = X_blkCol * blockSize_;
270 const offset_type X_ptEnd = X_ptBeg + blockSize_;
271 auto X_cur = subview (X_, ::Kokkos::make_pair (X_ptBeg, X_ptEnd));
273 GEMV (alpha_, A_cur, X_cur, Y_cur);
279 alpha_coeff_type alpha_;
280 typename GraphType::row_map_type::const_type ptr_;
281 typename GraphType::entries_type::const_type ind_;
282 MatrixValuesType val_;
285 beta_coeff_type beta_;
289 template<
class AlphaCoeffType,
291 class MatrixValuesType,
295 class BcrsApplyNoTransFunctor<AlphaCoeffType,
303 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
304 "MatrixValuesType must be a Kokkos::View.");
305 static_assert (Kokkos::Impl::is_view<OutVecType>::value,
306 "OutVecType must be a Kokkos::View.");
307 static_assert (Kokkos::Impl::is_view<InVecType>::value,
308 "InVecType must be a Kokkos::View.");
309 static_assert (std::is_same<MatrixValuesType,
310 typename MatrixValuesType::const_type>::value,
311 "MatrixValuesType must be a const Kokkos::View.");
312 static_assert (std::is_same<OutVecType,
313 typename OutVecType::non_const_type>::value,
314 "OutVecType must be a nonconst Kokkos::View.");
315 static_assert (std::is_same<InVecType, typename InVecType::const_type>::value,
316 "InVecType must be a const Kokkos::View.");
317 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
318 "MatrixValuesType must be a rank-1 Kokkos::View.");
319 static_assert (
static_cast<int> (InVecType::rank) == 1,
320 "InVecType must be a rank-1 Kokkos::View.");
321 static_assert (
static_cast<int> (OutVecType::rank) == 1,
322 "OutVecType must be a rank-1 Kokkos::View.");
323 typedef typename MatrixValuesType::non_const_value_type scalar_type;
326 typedef typename GraphType::device_type device_type;
329 typedef typename std::remove_const<typename GraphType::data_type>::type
338 void setX (
const InVecType& X) { X_ = X; }
346 void setY (
const OutVecType& Y) { Y_ = Y; }
348 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_coeff_type;
349 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_coeff_type;
352 BcrsApplyNoTransFunctor (
const alpha_coeff_type& alpha,
353 const GraphType& graph,
354 const MatrixValuesType& val,
355 const local_ordinal_type blockSize,
357 const beta_coeff_type& beta,
358 const OutVecType& Y) :
360 ptr_ (graph.row_map),
361 ind_ (graph.entries),
363 blockSize_ (blockSize),
370 KOKKOS_INLINE_FUNCTION
void
371 operator () (
const local_ordinal_type& lclRow)
const
373 Kokkos::abort(
"Tpetra::BcrsApplyNoTransFunctor:: this should not be called");
377 KOKKOS_INLINE_FUNCTION
void
378 operator () (
const typename Kokkos::TeamPolicy<typename device_type::execution_space>::member_type & member)
const
382 using Kokkos::Details::ArithTraits;
388 using Kokkos::parallel_for;
389 using Kokkos::subview;
390 typedef typename decltype (ptr_)::non_const_value_type offset_type;
391 typedef Kokkos::View<
typename MatrixValuesType::const_value_type**,
394 Kokkos::MemoryTraits<Kokkos::Unmanaged> >
397 const offset_type Y_ptBeg = lclRow * blockSize_;
398 const offset_type Y_ptEnd = Y_ptBeg + blockSize_;
399 auto Y_cur = subview (Y_, ::Kokkos::make_pair (Y_ptBeg, Y_ptEnd));
403 if (beta_ == ArithTraits<beta_coeff_type>::zero ()) {
404 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
405 [&](
const local_ordinal_type &i) {
406 Y_cur(i) = ArithTraits<beta_coeff_type>::zero ();
409 else if (beta_ != ArithTraits<beta_coeff_type>::one ()) {
410 Kokkos::parallel_for(Kokkos::TeamVectorRange(member, blockSize_),
411 [&](
const local_ordinal_type &i) {
415 member.team_barrier();
417 if (alpha_ != ArithTraits<alpha_coeff_type>::zero ()) {
418 const offset_type blkBeg = ptr_[lclRow];
419 const offset_type blkEnd = ptr_[lclRow+1];
421 const offset_type bs2 = blockSize_ * blockSize_;
422 little_block_type A_cur (val_.data (), blockSize_, blockSize_);
423 auto X_cur = subview (X_, ::Kokkos::make_pair (0, blockSize_));
425 (Kokkos::TeamThreadRange(member, blkBeg, blkEnd),
426 [&](
const local_ordinal_type &absBlkOff) {
427 A_cur.assign_data(val_.data () + absBlkOff * bs2);
428 const offset_type X_blkCol = ind_[absBlkOff];
429 const offset_type X_ptBeg = X_blkCol * blockSize_;
430 X_cur.assign_data(&X_(X_ptBeg));
433 (Kokkos::ThreadVectorRange(member, blockSize_),
434 [&](
const local_ordinal_type &k0) {
436 for (local_ordinal_type k1=0;k1<blockSize_;++k1)
437 val += A_cur(k0,k1)*X_cur(k1);
438 #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
440 Y_cur(k0) += alpha_*val;
446 Kokkos::atomic_add(&Y_cur(k0), alpha_*val);
454 alpha_coeff_type alpha_;
455 typename GraphType::row_map_type::const_type ptr_;
456 typename GraphType::entries_type::const_type ind_;
457 MatrixValuesType val_;
460 beta_coeff_type beta_;
465 template<
class AlphaCoeffType,
467 class MatrixValuesType,
468 class InMultiVecType,
470 class OutMultiVecType>
472 bcrsLocalApplyNoTrans (
const AlphaCoeffType& alpha,
473 const GraphType& graph,
474 const MatrixValuesType& val,
475 const typename std::remove_const<typename GraphType::data_type>::type blockSize,
476 const InMultiVecType& X,
477 const BetaCoeffType& beta,
478 const OutMultiVecType& Y
481 static_assert (Kokkos::Impl::is_view<MatrixValuesType>::value,
482 "MatrixValuesType must be a Kokkos::View.");
483 static_assert (Kokkos::Impl::is_view<OutMultiVecType>::value,
484 "OutMultiVecType must be a Kokkos::View.");
485 static_assert (Kokkos::Impl::is_view<InMultiVecType>::value,
486 "InMultiVecType must be a Kokkos::View.");
487 static_assert (
static_cast<int> (MatrixValuesType::rank) == 1,
488 "MatrixValuesType must be a rank-1 Kokkos::View.");
489 static_assert (
static_cast<int> (OutMultiVecType::rank) == 2,
490 "OutMultiVecType must be a rank-2 Kokkos::View.");
491 static_assert (
static_cast<int> (InMultiVecType::rank) == 2,
492 "InMultiVecType must be a rank-2 Kokkos::View.");
494 typedef typename GraphType::device_type::execution_space execution_space;
495 typedef typename MatrixValuesType::const_type matrix_values_type;
496 typedef typename OutMultiVecType::non_const_type out_multivec_type;
497 typedef typename InMultiVecType::const_type in_multivec_type;
498 typedef typename Kokkos::ArithTraits<typename std::decay<AlphaCoeffType>::type>::val_type alpha_type;
499 typedef typename Kokkos::ArithTraits<typename std::decay<BetaCoeffType>::type>::val_type beta_type;
500 typedef typename std::remove_const<typename GraphType::data_type>::type LO;
502 constexpr
bool is_builtin_type_enabled = std::is_arithmetic<typename InMultiVecType::non_const_value_type>::value;
504 const LO numLocalMeshRows = graph.row_map.extent (0) == 0 ?
505 static_cast<LO
> (0) :
506 static_cast<LO
> (graph.row_map.extent (0) - 1);
507 const LO numVecs = Y.extent (1);
508 if (numLocalMeshRows == 0 || numVecs == 0) {
515 in_multivec_type X_in = X;
516 out_multivec_type Y_out = Y;
521 typedef decltype (Kokkos::subview (X_in, Kokkos::ALL (), 0)) in_vec_type;
522 typedef decltype (Kokkos::subview (Y_out, Kokkos::ALL (), 0)) out_vec_type;
523 typedef BcrsApplyNoTransFunctor<alpha_type, GraphType,
524 matrix_values_type, in_vec_type, beta_type, out_vec_type,
525 is_builtin_type_enabled> functor_type;
527 auto X_0 = Kokkos::subview (X_in, Kokkos::ALL (), 0);
528 auto Y_0 = Kokkos::subview (Y_out, Kokkos::ALL (), 0);
531 if (is_builtin_type_enabled) {
532 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
534 typedef Kokkos::TeamPolicy<execution_space> policy_type;
535 policy_type policy(1,1);
536 #if defined(KOKKOS_ENABLE_CUDA)
537 constexpr
bool is_cuda = std::is_same<execution_space, Kokkos::Cuda>::value;
539 constexpr
bool is_cuda =
false;
540 #endif // defined(KOKKOS_ENABLE_CUDA)
542 LO team_size = 8, vector_size = 1;
543 if (blockSize <= 5) vector_size = 4;
544 else if (blockSize <= 9) vector_size = 8;
545 else if (blockSize <= 12) vector_size = 12;
546 else if (blockSize <= 20) vector_size = 20;
547 else vector_size = 20;
548 policy = policy_type(numLocalMeshRows, team_size, vector_size);
550 policy = policy_type(numLocalMeshRows, 1, 1);
552 Kokkos::parallel_for (policy, functor);
555 for (LO j = 1; j < numVecs; ++j) {
556 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
557 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
560 Kokkos::parallel_for (policy, functor);
563 functor_type functor (alpha, graph, val, blockSize, X_0, beta, Y_0);
564 Kokkos::RangePolicy<execution_space,LO> policy(0, numLocalMeshRows);
565 Kokkos::parallel_for (policy, functor);
566 for (LO j = 1; j < numVecs; ++j) {
567 auto X_j = Kokkos::subview (X_in, Kokkos::ALL (), j);
568 auto Y_j = Kokkos::subview (Y_out, Kokkos::ALL (), j);
571 Kokkos::parallel_for (policy, functor);
581 template<
class Scalar,
class LO,
class GO,
class Node>
582 class GetLocalDiagCopy {
584 typedef typename Node::device_type device_type;
585 typedef size_t diag_offset_type;
586 typedef Kokkos::View<
const size_t*, device_type,
587 Kokkos::MemoryUnmanaged> diag_offsets_type;
588 typedef typename ::Tpetra::CrsGraph<LO, GO, Node> global_graph_type;
590 typedef typename local_graph_type::row_map_type row_offsets_type;
591 typedef typename ::Tpetra::BlockMultiVector<Scalar, LO, GO, Node>::impl_scalar_type IST;
592 typedef Kokkos::View<IST***, device_type, Kokkos::MemoryUnmanaged> diag_type;
593 typedef Kokkos::View<const IST*, device_type, Kokkos::MemoryUnmanaged> values_type;
596 GetLocalDiagCopy (
const diag_type& diag,
597 const values_type& val,
598 const diag_offsets_type& diagOffsets,
599 const row_offsets_type& ptr,
600 const LO blockSize) :
602 diagOffsets_ (diagOffsets),
604 blockSize_ (blockSize),
605 offsetPerBlock_ (blockSize_*blockSize_),
609 KOKKOS_INLINE_FUNCTION
void
610 operator() (
const LO& lclRowInd)
const
615 const size_t absOffset = ptr_[lclRowInd];
618 const size_t relOffset = diagOffsets_[lclRowInd];
621 const size_t pointOffset = (absOffset+relOffset)*offsetPerBlock_;
625 typedef Kokkos::View<
const IST**, Kokkos::LayoutRight,
626 device_type, Kokkos::MemoryTraits<Kokkos::Unmanaged> >
627 const_little_block_type;
628 const_little_block_type D_in (val_.data () + pointOffset,
629 blockSize_, blockSize_);
630 auto D_out = Kokkos::subview (diag_, lclRowInd, ALL (), ALL ());
636 diag_offsets_type diagOffsets_;
637 row_offsets_type ptr_;
644 template<
class Scalar,
class LO,
class GO,
class Node>
646 BlockCrsMatrix<Scalar, LO, GO, Node>::
647 markLocalErrorAndGetStream ()
649 * (this->localError_) =
true;
650 if ((*errs_).is_null ()) {
651 *errs_ = Teuchos::rcp (
new std::ostringstream ());
656 template<
class Scalar,
class LO,
class GO,
class Node>
660 graph_ (Teuchos::rcp (new
map_type ()), 0),
661 blockSize_ (static_cast<LO> (0)),
662 X_colMap_ (new Teuchos::RCP<
BMV> ()),
663 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
664 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
666 localError_ (new bool (false)),
667 errs_ (new Teuchos::RCP<std::ostringstream> ())
671 template<
class Scalar,
class LO,
class GO,
class Node>
674 const LO blockSize) :
677 rowMeshMap_ (* (graph.getRowMap ())),
678 blockSize_ (blockSize),
679 X_colMap_ (new Teuchos::RCP<
BMV> ()),
680 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
681 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
682 offsetPerBlock_ (blockSize * blockSize),
683 localError_ (new bool (false)),
684 errs_ (new Teuchos::RCP<std::ostringstream> ())
686 TEUCHOS_TEST_FOR_EXCEPTION(
687 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
688 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
689 "rows (isSorted() is false). This class assumes sorted rows.");
691 graphRCP_ = Teuchos::rcpFromRef(graph_);
697 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
698 TEUCHOS_TEST_FOR_EXCEPTION(
699 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
700 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
701 " <= 0. The block size must be positive.");
709 const auto colPointMap = Teuchos::rcp
711 *pointImporter_ = Teuchos::rcp
715 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
716 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
719 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
724 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
725 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
728 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
734 val_ = decltype (val_) (
"val", numValEnt);
737 template<
class Scalar,
class LO,
class GO,
class Node>
742 const LO blockSize) :
745 rowMeshMap_ (* (graph.getRowMap ())),
746 domainPointMap_ (domainPointMap),
747 rangePointMap_ (rangePointMap),
748 blockSize_ (blockSize),
749 X_colMap_ (new Teuchos::RCP<
BMV> ()),
750 Y_rowMap_ (new Teuchos::RCP<
BMV> ()),
751 pointImporter_ (new Teuchos::RCP<typename
crs_graph_type::import_type> ()),
752 offsetPerBlock_ (blockSize * blockSize),
753 localError_ (new bool (false)),
754 errs_ (new Teuchos::RCP<std::ostringstream> ())
756 TEUCHOS_TEST_FOR_EXCEPTION(
757 ! graph_.
isSorted (), std::invalid_argument,
"Tpetra::"
758 "BlockCrsMatrix constructor: The input CrsGraph does not have sorted "
759 "rows (isSorted() is false). This class assumes sorted rows.");
761 graphRCP_ = Teuchos::rcpFromRef(graph_);
767 const bool blockSizeIsNonpositive = (blockSize + 1 <= 1);
768 TEUCHOS_TEST_FOR_EXCEPTION(
769 blockSizeIsNonpositive, std::invalid_argument,
"Tpetra::"
770 "BlockCrsMatrix constructor: The input blockSize = " << blockSize <<
771 " <= 0. The block size must be positive.");
775 const auto colPointMap = Teuchos::rcp
777 *pointImporter_ = Teuchos::rcp
781 typedef typename crs_graph_type::local_graph_type::row_map_type row_map_type;
782 typedef typename row_map_type::HostMirror::non_const_type nc_host_row_map_type;
785 nc_host_row_map_type ptr_h_nc = Kokkos::create_mirror_view (ptr_d);
790 typedef typename crs_graph_type::local_graph_type::entries_type entries_type;
791 typedef typename entries_type::HostMirror::non_const_type nc_host_entries_type;
794 nc_host_entries_type ind_h_nc = Kokkos::create_mirror_view (ind_d);
800 val_ = decltype (val_) (
"val", numValEnt);
803 template<
class Scalar,
class LO,
class GO,
class Node>
804 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
809 return Teuchos::rcp (
new map_type (domainPointMap_));
812 template<
class Scalar,
class LO,
class GO,
class Node>
813 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
818 return Teuchos::rcp (
new map_type (rangePointMap_));
821 template<
class Scalar,
class LO,
class GO,
class Node>
822 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
826 return graph_.getRowMap();
829 template<
class Scalar,
class LO,
class GO,
class Node>
830 Teuchos::RCP<const typename BlockCrsMatrix<Scalar, LO, GO, Node>::map_type>
834 return graph_.getColMap();
837 template<
class Scalar,
class LO,
class GO,
class Node>
842 return graph_.getGlobalNumRows();
845 template<
class Scalar,
class LO,
class GO,
class Node>
850 return graph_.getNodeNumRows();
853 template<
class Scalar,
class LO,
class GO,
class Node>
858 return graph_.getNodeMaxNumRowEntries();
861 template<
class Scalar,
class LO,
class GO,
class Node>
866 Teuchos::ETransp mode,
871 TEUCHOS_TEST_FOR_EXCEPTION(
872 mode != Teuchos::NO_TRANS && mode != Teuchos::TRANS && mode != Teuchos::CONJ_TRANS,
873 std::invalid_argument,
"Tpetra::BlockCrsMatrix::apply: "
874 "Invalid 'mode' argument. Valid values are Teuchos::NO_TRANS, "
875 "Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
879 const LO blockSize = getBlockSize ();
881 X_view =
BMV (X, * (graph_.getDomainMap ()), blockSize);
882 Y_view =
BMV (Y, * (graph_.getRangeMap ()), blockSize);
884 catch (std::invalid_argument& e) {
885 TEUCHOS_TEST_FOR_EXCEPTION(
886 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
887 "apply: Either the input MultiVector X or the output MultiVector Y "
888 "cannot be viewed as a BlockMultiVector, given this BlockCrsMatrix's "
889 "graph. BlockMultiVector's constructor threw the following exception: "
898 const_cast<this_type*
> (
this)->applyBlock (X_view, Y_view, mode, alpha, beta);
899 }
catch (std::invalid_argument& e) {
900 TEUCHOS_TEST_FOR_EXCEPTION(
901 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
902 "apply: The implementation method applyBlock complained about having "
903 "an invalid argument. It reported the following: " << e.what ());
904 }
catch (std::logic_error& e) {
905 TEUCHOS_TEST_FOR_EXCEPTION(
906 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
907 "apply: The implementation method applyBlock complained about a "
908 "possible bug in its implementation. It reported the following: "
910 }
catch (std::exception& e) {
911 TEUCHOS_TEST_FOR_EXCEPTION(
912 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
913 "apply: The implementation method applyBlock threw an exception which "
914 "is neither std::invalid_argument nor std::logic_error, but is a "
915 "subclass of std::exception. It reported the following: "
918 TEUCHOS_TEST_FOR_EXCEPTION(
919 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
920 "apply: The implementation method applyBlock threw an exception which "
921 "is not an instance of a subclass of std::exception. This probably "
922 "indicates a bug. Please report this to the Tpetra developers.");
926 template<
class Scalar,
class LO,
class GO,
class Node>
931 Teuchos::ETransp mode,
935 TEUCHOS_TEST_FOR_EXCEPTION(
937 "Tpetra::BlockCrsMatrix::applyBlock: "
938 "X and Y have different block sizes. X.getBlockSize() = "
942 if (mode == Teuchos::NO_TRANS) {
943 applyBlockNoTrans (X, Y, alpha, beta);
944 }
else if (mode == Teuchos::TRANS || mode == Teuchos::CONJ_TRANS) {
945 applyBlockTrans (X, Y, mode, alpha, beta);
947 TEUCHOS_TEST_FOR_EXCEPTION(
948 true, std::invalid_argument,
"Tpetra::BlockCrsMatrix::"
949 "applyBlock: Invalid 'mode' argument. Valid values are "
950 "Teuchos::NO_TRANS, Teuchos::TRANS, and Teuchos::CONJ_TRANS.");
954 template<
class Scalar,
class LO,
class GO,
class Node>
959 #ifdef HAVE_TPETRA_DEBUG
960 const char prefix[] =
"Tpetra::BlockCrsMatrix::setAllToScalar: ";
961 #endif // HAVE_TPETRA_DEBUG
963 if (this->need_sync_device ()) {
967 #ifdef HAVE_TPETRA_DEBUG
968 TEUCHOS_TEST_FOR_EXCEPTION
969 (this->need_sync_host (), std::runtime_error,
970 prefix <<
"The matrix's values need sync on both device and host.");
971 #endif // HAVE_TPETRA_DEBUG
972 this->modify_host ();
980 this->modify_device ();
985 template<
class Scalar,
class LO,
class GO,
class Node>
991 const LO numColInds)
const
993 #ifdef HAVE_TPETRA_DEBUG
994 const char prefix[] =
995 "Tpetra::BlockCrsMatrix::replaceLocalValues: ";
996 #endif // HAVE_TPETRA_DEBUG
998 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1003 return static_cast<LO
> (0);
1007 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1008 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1009 const LO perBlockSize = this->offsetPerBlock ();
1014 #ifdef HAVE_TPETRA_DEBUG
1015 TEUCHOS_TEST_FOR_EXCEPTION
1016 (this->need_sync_host (), std::runtime_error,
1017 prefix <<
"The matrix's data were last modified on device, but have "
1018 "not been sync'd to host. Please sync to host (by calling "
1019 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1021 #endif // HAVE_TPETRA_DEBUG
1023 auto vals_host_out = getValuesHost ();
1026 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1027 const LO relBlockOffset =
1028 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1029 if (relBlockOffset != LINV) {
1038 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1042 vals_host_out_raw + absBlockOffset * perBlockSize;
1047 for (LO i = 0; i < perBlockSize; ++i) {
1048 A_old[i] = A_new[i];
1050 hint = relBlockOffset + 1;
1057 template <
class Scalar,
class LO,
class GO,
class Node>
1061 Kokkos::MemoryUnmanaged>& offsets)
const
1063 graph_.getLocalDiagOffsets (offsets);
1066 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1067 template <
class Scalar,
class LO,
class GO,
class Node>
1068 void TPETRA_DEPRECATED
1077 const size_t lclNumRows = graph_.getNodeNumRows ();
1078 if (
static_cast<size_t> (offsets.size ()) < lclNumRows) {
1079 offsets.resize (lclNumRows);
1085 typedef typename device_type::memory_space memory_space;
1086 if (std::is_same<memory_space, Kokkos::HostSpace>::value) {
1090 typedef Kokkos::View<
size_t*, device_type,
1091 Kokkos::MemoryUnmanaged> output_type;
1092 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1093 graph_.getLocalDiagOffsets (offsetsOut);
1096 Kokkos::View<size_t*, device_type> offsetsTmp (
"diagOffsets", lclNumRows);
1097 graph_.getLocalDiagOffsets (offsetsTmp);
1098 typedef Kokkos::View<
size_t*, Kokkos::HostSpace,
1099 Kokkos::MemoryUnmanaged> output_type;
1100 output_type offsetsOut (offsets.getRawPtr (), lclNumRows);
1104 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1106 template <
class Scalar,
class LO,
class GO,
class Node>
1112 Kokkos::MemoryUnmanaged>& D_inv,
1113 const Scalar& omega,
1118 Kokkos::Details::ArithTraits<impl_scalar_type>::zero ();
1120 Kokkos::Details::ArithTraits<impl_scalar_type>::one ();
1121 const LO numLocalMeshRows =
1122 static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1129 const LO blockSize = getBlockSize ();
1130 Teuchos::Array<impl_scalar_type> localMem (blockSize);
1131 Teuchos::Array<impl_scalar_type> localMat (blockSize*blockSize);
1135 LO rowBegin = 0, rowEnd = 0, rowStride = 0;
1136 if (direction == Forward) {
1138 rowEnd = numLocalMeshRows+1;
1141 else if (direction == Backward) {
1142 rowBegin = numLocalMeshRows;
1146 else if (direction == Symmetric) {
1147 this->localGaussSeidel (B, X, D_inv, omega, Forward);
1148 this->localGaussSeidel (B, X, D_inv, omega, Backward);
1152 const Scalar one_minus_omega = Teuchos::ScalarTraits<Scalar>::one()-omega;
1153 const Scalar minus_omega = -omega;
1156 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1157 const LO actlRow = lclRow - 1;
1160 COPY (B_cur, X_lcl);
1163 const size_t meshBeg = ptrHost_[actlRow];
1164 const size_t meshEnd = ptrHost_[actlRow+1];
1165 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1166 const LO meshCol = indHost_[absBlkOff];
1168 getConstLocalBlockFromAbsOffset (absBlkOff);
1172 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1180 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1182 FILL (X_update, zero);
1183 GEMV (one, D_lcl, X_lcl, X_update);
1187 for (LO lclRow = rowBegin; lclRow != rowEnd; lclRow += rowStride) {
1188 for (LO j = 0; j < numVecs; ++j) {
1189 LO actlRow = lclRow-1;
1192 COPY (B_cur, X_lcl);
1195 const size_t meshBeg = ptrHost_[actlRow];
1196 const size_t meshEnd = ptrHost_[actlRow+1];
1197 for (
size_t absBlkOff = meshBeg; absBlkOff < meshEnd; ++absBlkOff) {
1198 const LO meshCol = indHost_[absBlkOff];
1200 getConstLocalBlockFromAbsOffset (absBlkOff);
1204 const Scalar alpha = meshCol == actlRow ? one_minus_omega : minus_omega;
1208 auto D_lcl = Kokkos::subview (D_inv, actlRow, ALL (), ALL ());
1210 FILL (X_update, zero);
1211 GEMV (one, D_lcl, X_lcl, X_update);
1217 template <
class Scalar,
class LO,
class GO,
class Node>
1230 TEUCHOS_TEST_FOR_EXCEPTION(
1231 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
1232 "gaussSeidelCopy: Not implemented.");
1235 template <
class Scalar,
class LO,
class GO,
class Node>
1241 const Teuchos::ArrayView<LO>& ,
1249 TEUCHOS_TEST_FOR_EXCEPTION(
1250 true, std::logic_error,
"Tpetra::BlockCrsMatrix::"
1251 "reorderedGaussSeidelCopy: Not implemented.");
1254 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1255 template <
class Scalar,
class LO,
class GO,
class Node>
1256 void TPETRA_DEPRECATED
1259 const Teuchos::ArrayView<const size_t>& offsets)
const
1261 using Teuchos::ArrayRCP;
1262 using Teuchos::ArrayView;
1263 const Scalar
ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1265 const size_t myNumRows = rowMeshMap_.getNodeNumElements();
1266 const LO* columnIndices;
1269 Teuchos::Array<LO> cols(1);
1272 Teuchos::Array<Scalar> zeroMat (blockSize_*blockSize_,
ZERO);
1273 for (
size_t i = 0; i < myNumRows; ++i) {
1275 if (offsets[i] == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1279 getLocalRowView (i, columnIndices, vals, numColumns);
1280 diag.
replaceLocalValues (i, cols.getRawPtr(), &vals[offsets[i]*blockSize_*blockSize_], 1);
1284 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1286 template <
class Scalar,
class LO,
class GO,
class Node>
1290 Kokkos::MemoryUnmanaged>& diag,
1292 Kokkos::MemoryUnmanaged>& offsets)
const
1294 using Kokkos::parallel_for;
1296 const char prefix[] =
"Tpetra::BlockCrsMatrix::getLocalDiagCopy (2-arg): ";
1298 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1299 const LO blockSize = this->getBlockSize ();
1300 TEUCHOS_TEST_FOR_EXCEPTION
1301 (
static_cast<LO
> (diag.extent (0)) < lclNumMeshRows ||
1302 static_cast<LO
> (diag.extent (1)) < blockSize ||
1303 static_cast<LO
> (diag.extent (2)) < blockSize,
1304 std::invalid_argument, prefix <<
1305 "The input Kokkos::View is not big enough to hold all the data.");
1306 TEUCHOS_TEST_FOR_EXCEPTION
1307 (
static_cast<LO
> (offsets.size ()) < lclNumMeshRows, std::invalid_argument,
1308 prefix <<
"offsets.size() = " << offsets.size () <<
" < local number of "
1309 "diagonal blocks " << lclNumMeshRows <<
".");
1311 #ifdef HAVE_TPETRA_DEBUG
1312 TEUCHOS_TEST_FOR_EXCEPTION
1313 (this->
template need_sync<device_type> (), std::runtime_error,
1314 prefix <<
"The matrix's data were last modified on host, but have "
1315 "not been sync'd to device. Please sync to device (by calling "
1316 "sync<device_type>() on this matrix) before calling this method.");
1317 #endif // HAVE_TPETRA_DEBUG
1319 typedef Kokkos::RangePolicy<execution_space, LO> policy_type;
1320 typedef GetLocalDiagCopy<Scalar, LO, GO, Node> functor_type;
1328 const_cast<this_type*
> (
this)->
template getValues<device_type> ();
1330 parallel_for (policy_type (0, lclNumMeshRows),
1331 functor_type (diag, vals_dev, offsets,
1332 graph_.getLocalGraph ().row_map, blockSize_));
1336 template <
class Scalar,
class LO,
class GO,
class Node>
1340 Kokkos::MemoryUnmanaged>& diag,
1341 const Teuchos::ArrayView<const size_t>& offsets)
const
1344 using Kokkos::parallel_for;
1346 Kokkos::MemoryUnmanaged>::HostMirror::execution_space host_exec_space;
1348 const LO lclNumMeshRows =
static_cast<LO
> (rowMeshMap_.getNodeNumElements ());
1349 const LO blockSize = this->getBlockSize ();
1350 TEUCHOS_TEST_FOR_EXCEPTION
1351 (
static_cast<LO
> (diag.extent (0)) < lclNumMeshRows ||
1352 static_cast<LO
> (diag.extent (1)) < blockSize ||
1353 static_cast<LO
> (diag.extent (2)) < blockSize,
1354 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1355 "The input Kokkos::View is not big enough to hold all the data.");
1356 TEUCHOS_TEST_FOR_EXCEPTION
1357 (
static_cast<LO
> (offsets.size ()) < lclNumMeshRows,
1358 std::invalid_argument,
"Tpetra::BlockCrsMatrix::getLocalDiagCopy: "
1359 "offsets.size() = " << offsets.size () <<
" < local number of diagonal "
1360 "blocks " << lclNumMeshRows <<
".");
1364 typedef Kokkos::RangePolicy<host_exec_space, LO> policy_type;
1365 parallel_for (policy_type (0, lclNumMeshRows), [=] (
const LO& lclMeshRow) {
1366 auto D_in = this->getConstLocalBlockFromRelOffset (lclMeshRow, offsets[lclMeshRow]);
1367 auto D_out = Kokkos::subview (diag, lclMeshRow, ALL (), ALL ());
1373 template<
class Scalar,
class LO,
class GO,
class Node>
1378 const Scalar vals[],
1379 const LO numColInds)
const
1381 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1386 return static_cast<LO
> (0);
1390 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1391 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1392 const LO perBlockSize = this->offsetPerBlock ();
1397 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1398 const LO relBlockOffset =
1399 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1400 if (relBlockOffset != LINV) {
1401 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1403 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1405 getConstLocalBlockFromInput (vIn, pointOffset);
1408 hint = relBlockOffset + 1;
1416 template<
class Scalar,
class LO,
class GO,
class Node>
1421 const Scalar vals[],
1422 const LO numColInds)
const
1424 #ifdef HAVE_TPETRA_DEBUG
1425 const char prefix[] =
1426 "Tpetra::BlockCrsMatrix::sumIntoLocalValues: ";
1427 #endif // HAVE_TPETRA_DEBUG
1429 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1434 return static_cast<LO
> (0);
1439 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1440 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1441 const LO perBlockSize = this->offsetPerBlock ();
1446 #ifdef HAVE_TPETRA_DEBUG
1447 TEUCHOS_TEST_FOR_EXCEPTION
1448 (this->need_sync_host (), std::runtime_error,
1449 prefix <<
"The matrix's data were last modified on device, but have not "
1450 "been sync'd to host. Please sync to host (by calling "
1451 "sync<Kokkos::HostSpace>() on this matrix) before calling this method.");
1452 #endif // HAVE_TPETRA_DEBUG
1454 auto vals_host_out =
1458 for (LO k = 0; k < numColInds; ++k, pointOffset += perBlockSize) {
1459 const LO relBlockOffset =
1460 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1461 if (relBlockOffset != LINV) {
1470 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1474 vals_host_out_raw + absBlockOffset * perBlockSize;
1479 for (LO i = 0; i < perBlockSize; ++i) {
1480 A_old[i] += A_new[i];
1482 hint = relBlockOffset + 1;
1489 template<
class Scalar,
class LO,
class GO,
class Node>
1497 #ifdef HAVE_TPETRA_DEBUG
1498 const char prefix[] =
1499 "Tpetra::BlockCrsMatrix::getLocalRowView: ";
1500 #endif // HAVE_TPETRA_DEBUG
1502 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1506 return Teuchos::OrdinalTraits<LO>::invalid ();
1509 const size_t absBlockOffsetStart = ptrHost_[localRowInd];
1510 colInds = indHost_.data () + absBlockOffsetStart;
1512 #ifdef HAVE_TPETRA_DEBUG
1513 TEUCHOS_TEST_FOR_EXCEPTION
1514 (this->need_sync_host (), std::runtime_error,
1515 prefix <<
"The matrix's data were last modified on device, but have "
1516 "not been sync'd to host. Please sync to host (by calling "
1517 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1519 #endif // HAVE_TPETRA_DEBUG
1521 auto vals_host_out = getValuesHost ();
1524 absBlockOffsetStart * offsetPerBlock ();
1525 vals =
reinterpret_cast<Scalar*
> (vOut);
1527 numInds = ptrHost_[localRowInd + 1] - absBlockOffsetStart;
1532 template<
class Scalar,
class LO,
class GO,
class Node>
1536 const Teuchos::ArrayView<LO>& Indices,
1537 const Teuchos::ArrayView<Scalar>& Values,
1538 size_t &NumEntries)
const
1543 getLocalRowView(LocalRow,colInds,vals,numInds);
1544 if (numInds > Indices.size() || numInds*blockSize_*blockSize_ > Values.size()) {
1545 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
1546 "Tpetra::BlockCrsMatrix::getLocalRowCopy : Column and/or values array is not large enough to hold "
1547 << numInds <<
" row entries");
1549 for (LO i=0; i<numInds; ++i) {
1550 Indices[i] = colInds[i];
1552 for (LO i=0; i<numInds*blockSize_*blockSize_; ++i) {
1553 Values[i] = vals[i];
1555 NumEntries = numInds;
1558 template<
class Scalar,
class LO,
class GO,
class Node>
1562 ptrdiff_t offsets[],
1564 const LO numColInds)
const
1566 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1571 return static_cast<LO
> (0);
1574 const LO LINV = Teuchos::OrdinalTraits<LO>::invalid ();
1578 for (LO k = 0; k < numColInds; ++k) {
1579 const LO relBlockOffset =
1580 this->findRelOffsetOfColumnIndex (localRowInd, colInds[k], hint);
1581 if (relBlockOffset != LINV) {
1582 offsets[k] =
static_cast<ptrdiff_t
> (relBlockOffset);
1583 hint = relBlockOffset + 1;
1591 template<
class Scalar,
class LO,
class GO,
class Node>
1595 const ptrdiff_t offsets[],
1596 const Scalar vals[],
1597 const LO numOffsets)
const
1599 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1604 return static_cast<LO
> (0);
1608 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1609 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1610 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1611 size_t pointOffset = 0;
1614 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1615 const size_t relBlockOffset = offsets[k];
1616 if (relBlockOffset != STINV) {
1617 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1619 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1621 getConstLocalBlockFromInput (vIn, pointOffset);
1622 COPY (A_new, A_old);
1630 template<
class Scalar,
class LO,
class GO,
class Node>
1634 const ptrdiff_t offsets[],
1635 const Scalar vals[],
1636 const LO numOffsets)
const
1638 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1643 return static_cast<LO
> (0);
1647 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1648 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1649 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1650 size_t pointOffset = 0;
1653 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1654 const size_t relBlockOffset = offsets[k];
1655 if (relBlockOffset != STINV) {
1656 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1658 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1660 getConstLocalBlockFromInput (vIn, pointOffset);
1669 template<
class Scalar,
class LO,
class GO,
class Node>
1673 const ptrdiff_t offsets[],
1674 const Scalar vals[],
1675 const LO numOffsets)
const
1677 if (! rowMeshMap_.isNodeLocalElement (localRowInd)) {
1682 return static_cast<LO
> (0);
1687 const size_t absRowBlockOffset = ptrHost_[localRowInd];
1688 const size_t perBlockSize =
static_cast<LO
> (offsetPerBlock ());
1689 const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1690 size_t pointOffset = 0;
1693 for (LO k = 0; k < numOffsets; ++k, pointOffset += perBlockSize) {
1694 const size_t relBlockOffset = offsets[k];
1695 if (relBlockOffset != STINV) {
1696 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
1698 getNonConstLocalBlockFromAbsOffset (absBlockOffset);
1700 getConstLocalBlockFromInput (vIn, pointOffset);
1702 AXPY (ONE, A_new, A_old);
1710 template<
class Scalar,
class LO,
class GO,
class Node>
1715 const size_t numEntInGraph = graph_.getNumEntriesInLocalRow (localRowInd);
1716 if (numEntInGraph == Teuchos::OrdinalTraits<size_t>::invalid ()) {
1717 return static_cast<LO
> (0);
1719 return static_cast<LO
> (numEntInGraph);
1723 template<
class Scalar,
class LO,
class GO,
class Node>
1728 const Teuchos::ETransp mode,
1738 TEUCHOS_TEST_FOR_EXCEPTION(
1739 true, std::logic_error,
"Tpetra::BlockCrsMatrix::apply: "
1740 "transpose and conjugate transpose modes are not yet implemented.");
1743 template<
class Scalar,
class LO,
class GO,
class Node>
1745 BlockCrsMatrix<Scalar, LO, GO, Node>::
1746 applyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1747 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1753 typedef ::Tpetra::Import<LO, GO, Node> import_type;
1754 typedef ::Tpetra::Export<LO, GO, Node> export_type;
1755 const Scalar zero = STS::zero ();
1756 const Scalar one = STS::one ();
1757 RCP<const import_type>
import = graph_.getImporter ();
1759 RCP<const export_type> theExport = graph_.getExporter ();
1760 const char prefix[] =
"Tpetra::BlockCrsMatrix::applyBlockNoTrans: ";
1762 if (alpha == zero) {
1766 else if (beta != one) {
1771 const BMV* X_colMap = NULL;
1772 if (
import.is_null ()) {
1776 catch (std::exception& e) {
1777 TEUCHOS_TEST_FOR_EXCEPTION
1778 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1779 "operator= threw an exception: " << std::endl << e.what ());
1789 if ((*X_colMap_).is_null () ||
1790 (**X_colMap_).getNumVectors () != X.getNumVectors () ||
1791 (**X_colMap_).getBlockSize () != X.getBlockSize ()) {
1792 *X_colMap_ = rcp (
new BMV (* (graph_.getColMap ()), getBlockSize (),
1793 static_cast<LO
> (X.getNumVectors ())));
1795 (*X_colMap_)->getMultiVectorView().doImport (X.getMultiVectorView (),
1799 X_colMap = &(**X_colMap_);
1801 catch (std::exception& e) {
1802 TEUCHOS_TEST_FOR_EXCEPTION
1803 (
true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1804 "operator= threw an exception: " << std::endl << e.what ());
1808 BMV* Y_rowMap = NULL;
1809 if (theExport.is_null ()) {
1812 else if ((*Y_rowMap_).is_null () ||
1813 (**Y_rowMap_).getNumVectors () != Y.getNumVectors () ||
1814 (**Y_rowMap_).getBlockSize () != Y.getBlockSize ()) {
1815 *Y_rowMap_ = rcp (
new BMV (* (graph_.getRowMap ()), getBlockSize (),
1816 static_cast<LO
> (X.getNumVectors ())));
1818 Y_rowMap = &(**Y_rowMap_);
1820 catch (std::exception& e) {
1821 TEUCHOS_TEST_FOR_EXCEPTION(
1822 true, std::logic_error, prefix <<
"Tpetra::MultiVector::"
1823 "operator= threw an exception: " << std::endl << e.what ());
1828 localApplyBlockNoTrans (*X_colMap, *Y_rowMap, alpha, beta);
1830 catch (std::exception& e) {
1831 TEUCHOS_TEST_FOR_EXCEPTION
1832 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1833 "an exception: " << e.what ());
1836 TEUCHOS_TEST_FOR_EXCEPTION
1837 (
true, std::runtime_error, prefix <<
"localApplyBlockNoTrans threw "
1838 "an exception not a subclass of std::exception.");
1841 if (! theExport.is_null ()) {
1847 template<
class Scalar,
class LO,
class GO,
class Node>
1849 BlockCrsMatrix<Scalar, LO, GO, Node>::
1850 localApplyBlockNoTrans (
const BlockMultiVector<Scalar, LO, GO, Node>& X,
1851 BlockMultiVector<Scalar, LO, GO, Node>& Y,
1855 using ::Tpetra::Impl::bcrsLocalApplyNoTrans;
1857 const impl_scalar_type alpha_impl = alpha;
1858 const auto graph = this->graph_.getLocalGraph ();
1859 const impl_scalar_type beta_impl = beta;
1860 const LO blockSize = this->getBlockSize ();
1862 auto X_mv = X.getMultiVectorView ();
1863 auto Y_mv = Y.getMultiVectorView ();
1864 Y_mv.template modify<device_type> ();
1866 auto X_lcl = X_mv.template getLocalView<device_type> ();
1867 auto Y_lcl = Y_mv.template getLocalView<device_type> ();
1868 auto val = this->val_.template view<device_type> ();
1870 bcrsLocalApplyNoTrans (alpha_impl, graph, val, blockSize, X_lcl,
1874 template<
class Scalar,
class LO,
class GO,
class Node>
1876 BlockCrsMatrix<Scalar, LO, GO, Node>::
1877 findRelOffsetOfColumnIndex (
const LO localRowIndex,
1878 const LO colIndexToFind,
1879 const LO hint)
const
1881 const size_t absStartOffset = ptrHost_[localRowIndex];
1882 const size_t absEndOffset = ptrHost_[localRowIndex+1];
1883 const LO numEntriesInRow =
static_cast<LO
> (absEndOffset - absStartOffset);
1885 const LO*
const curInd = indHost_.data () + absStartOffset;
1888 if (hint < numEntriesInRow && curInd[hint] == colIndexToFind) {
1895 LO relOffset = Teuchos::OrdinalTraits<LO>::invalid ();
1900 const LO maxNumEntriesForLinearSearch = 32;
1901 if (numEntriesInRow > maxNumEntriesForLinearSearch) {
1906 const LO* beg = curInd;
1907 const LO* end = curInd + numEntriesInRow;
1908 std::pair<const LO*, const LO*> p =
1909 std::equal_range (beg, end, colIndexToFind);
1910 if (p.first != p.second) {
1912 relOffset =
static_cast<LO
> (p.first - beg);
1916 for (LO k = 0; k < numEntriesInRow; ++k) {
1917 if (colIndexToFind == curInd[k]) {
1927 template<
class Scalar,
class LO,
class GO,
class Node>
1929 BlockCrsMatrix<Scalar, LO, GO, Node>::
1930 offsetPerBlock ()
const
1932 return offsetPerBlock_;
1935 template<
class Scalar,
class LO,
class GO,
class Node>
1937 BlockCrsMatrix<Scalar, LO, GO, Node>::
1938 getConstLocalBlockFromInput (
const impl_scalar_type* val,
1939 const size_t pointOffset)
const
1942 const LO rowStride = blockSize_;
1943 return const_little_block_type (val + pointOffset, blockSize_, rowStride);
1946 template<
class Scalar,
class LO,
class GO,
class Node>
1948 BlockCrsMatrix<Scalar, LO, GO, Node>::
1949 getNonConstLocalBlockFromInput (impl_scalar_type* val,
1950 const size_t pointOffset)
const
1953 const LO rowStride = blockSize_;
1954 return little_block_type (val + pointOffset, blockSize_, rowStride);
1957 template<
class Scalar,
class LO,
class GO,
class Node>
1959 BlockCrsMatrix<Scalar, LO, GO, Node>::
1960 getConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
1962 #ifdef HAVE_TPETRA_DEBUG
1963 const char prefix[] =
1964 "Tpetra::BlockCrsMatrix::getConstLocalBlockFromAbsOffset: ";
1965 #endif // HAVE_TPETRA_DEBUG
1967 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
1971 return const_little_block_type ();
1974 #ifdef HAVE_TPETRA_DEBUG
1975 TEUCHOS_TEST_FOR_EXCEPTION
1976 (this->need_sync_host (), std::runtime_error,
1977 prefix <<
"The matrix's data were last modified on device, but have "
1978 "not been sync'd to host. Please sync to host (by calling "
1979 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
1981 #endif // HAVE_TPETRA_DEBUG
1982 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
1984 auto vals_host = getValuesHost ();
1985 const impl_scalar_type* vals_host_raw = vals_host.data ();
1987 return getConstLocalBlockFromInput (vals_host_raw, absPointOffset);
1991 template<
class Scalar,
class LO,
class GO,
class Node>
1993 BlockCrsMatrix<Scalar, LO, GO, Node>::
1994 getConstLocalBlockFromRelOffset (
const LO lclMeshRow,
1995 const size_t relMeshOffset)
const
1997 typedef impl_scalar_type IST;
1999 const LO* lclColInds = NULL;
2000 Scalar* lclVals = NULL;
2003 LO err = this->getLocalRowView (lclMeshRow, lclColInds, lclVals, numEnt);
2008 return const_little_block_type ();
2011 const size_t relPointOffset = relMeshOffset * this->offsetPerBlock ();
2012 IST* lclValsImpl =
reinterpret_cast<IST*
> (lclVals);
2013 return this->getConstLocalBlockFromInput (
const_cast<const IST*
> (lclValsImpl),
2018 template<
class Scalar,
class LO,
class GO,
class Node>
2020 BlockCrsMatrix<Scalar, LO, GO, Node>::
2021 getNonConstLocalBlockFromAbsOffset (
const size_t absBlockOffset)
const
2023 #ifdef HAVE_TPETRA_DEBUG
2024 const char prefix[] =
2025 "Tpetra::BlockCrsMatrix::getNonConstLocalBlockFromAbsOffset: ";
2026 #endif // HAVE_TPETRA_DEBUG
2028 if (absBlockOffset >= ptrHost_[rowMeshMap_.getNodeNumElements ()]) {
2032 return little_block_type ();
2035 const size_t absPointOffset = absBlockOffset * offsetPerBlock ();
2036 #ifdef HAVE_TPETRA_DEBUG
2037 TEUCHOS_TEST_FOR_EXCEPTION
2038 (this->need_sync_host (), std::runtime_error,
2039 prefix <<
"The matrix's data were last modified on device, but have "
2040 "not been sync'd to host. Please sync to host (by calling "
2041 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
2043 #endif // HAVE_TPETRA_DEBUG
2044 auto vals_host = getValuesHost();
2045 impl_scalar_type* vals_host_raw = vals_host.data ();
2046 return getNonConstLocalBlockFromInput (vals_host_raw, absPointOffset);
2050 template<
class Scalar,
class LO,
class GO,
class Node>
2052 BlockCrsMatrix<Scalar, LO, GO, Node>::
2053 getLocalBlock (
const LO localRowInd,
const LO localColInd)
const
2055 const size_t absRowBlockOffset = ptrHost_[localRowInd];
2056 const LO relBlockOffset =
2057 this->findRelOffsetOfColumnIndex (localRowInd, localColInd);
2059 if (relBlockOffset != Teuchos::OrdinalTraits<LO>::invalid ()) {
2060 const size_t absBlockOffset = absRowBlockOffset + relBlockOffset;
2061 return getNonConstLocalBlockFromAbsOffset (absBlockOffset);
2064 return little_block_type ();
2078 template<
class Scalar,
class LO,
class GO,
class Node>
2080 BlockCrsMatrix<Scalar, LO, GO, Node>::
2081 checkSizes (const ::Tpetra::SrcDistObject& source)
2084 typedef BlockCrsMatrix<Scalar, LO, GO, Node> this_type;
2085 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2088 std::ostream& err = markLocalErrorAndGetStream ();
2089 err <<
"checkSizes: The source object of the Import or Export "
2090 "must be a BlockCrsMatrix with the same template parameters as the "
2091 "target object." << endl;
2096 if (src->getBlockSize () != this->getBlockSize ()) {
2097 std::ostream& err = markLocalErrorAndGetStream ();
2098 err <<
"checkSizes: The source and target objects of the Import or "
2099 <<
"Export must have the same block sizes. The source's block "
2100 <<
"size = " << src->getBlockSize () <<
" != the target's block "
2101 <<
"size = " << this->getBlockSize () <<
"." << endl;
2103 if (! src->graph_.isFillComplete ()) {
2104 std::ostream& err = markLocalErrorAndGetStream ();
2105 err <<
"checkSizes: The source object of the Import or Export is "
2106 "not fill complete. Both source and target objects must be fill "
2107 "complete." << endl;
2109 if (! this->graph_.isFillComplete ()) {
2110 std::ostream& err = markLocalErrorAndGetStream ();
2111 err <<
"checkSizes: The target object of the Import or Export is "
2112 "not fill complete. Both source and target objects must be fill "
2113 "complete." << endl;
2115 if (src->graph_.getColMap ().is_null ()) {
2116 std::ostream& err = markLocalErrorAndGetStream ();
2117 err <<
"checkSizes: The source object of the Import or Export does "
2118 "not have a column Map. Both source and target objects must have "
2119 "column Maps." << endl;
2121 if (this->graph_.getColMap ().is_null ()) {
2122 std::ostream& err = markLocalErrorAndGetStream ();
2123 err <<
"checkSizes: The target object of the Import or Export does "
2124 "not have a column Map. Both source and target objects must have "
2125 "column Maps." << endl;
2129 return ! (* (this->localError_));
2132 template<
class Scalar,
class LO,
class GO,
class Node>
2134 BlockCrsMatrix<Scalar, LO, GO, Node>::
2135 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2137 #else // TPETRA_ENABLE_DEPRECATED_CODE
2139 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2140 (const ::Tpetra::SrcDistObject& source,
2141 const size_t numSameIDs,
2142 const Kokkos::DualView<
const local_ordinal_type*,
2143 buffer_device_type>& permuteToLIDs,
2144 const Kokkos::DualView<
const local_ordinal_type*,
2145 buffer_device_type>& permuteFromLIDs)
2147 using ::Tpetra::Details::Behavior;
2149 using ::Tpetra::Details::ProfilingRegion;
2153 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::copyAndPermute");
2154 const bool debug = Behavior::debug();
2155 const bool verbose = Behavior::verbose();
2160 std::ostringstream os;
2161 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2162 os <<
"Proc " << myRank <<
": BlockCrsMatrix::copyAndPermute : " << endl;
2167 if (* (this->localError_)) {
2168 std::ostream& err = this->markLocalErrorAndGetStream ();
2170 <<
"The target object of the Import or Export is already in an error state."
2179 std::ostringstream os;
2180 os << prefix << endl
2183 std::cerr << os.str ();
2189 if (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0)) {
2190 std::ostream& err = this->markLocalErrorAndGetStream ();
2192 <<
"permuteToLIDs.extent(0) = " << permuteToLIDs.extent (0)
2193 <<
" != permuteFromLIDs.extent(0) = " << permuteFromLIDs.extent(0)
2197 if (permuteToLIDs.need_sync_host () || permuteFromLIDs.need_sync_host ()) {
2198 std::ostream& err = this->markLocalErrorAndGetStream ();
2200 <<
"Both permuteToLIDs and permuteFromLIDs must be sync'd to host."
2205 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2207 std::ostream& err = this->markLocalErrorAndGetStream ();
2209 <<
"The source (input) object of the Import or "
2210 "Export is either not a BlockCrsMatrix, or does not have the right "
2211 "template parameters. checkSizes() should have caught this. "
2212 "Please report this bug to the Tpetra developers." << endl;
2221 const_cast<this_type*
>(src)->sync_host();
2225 bool lclErr =
false;
2226 #ifdef HAVE_TPETRA_DEBUG
2227 std::set<LO> invalidSrcCopyRows;
2228 std::set<LO> invalidDstCopyRows;
2229 std::set<LO> invalidDstCopyCols;
2230 std::set<LO> invalidDstPermuteCols;
2231 std::set<LO> invalidPermuteFromRows;
2232 #endif // HAVE_TPETRA_DEBUG
2241 #ifdef HAVE_TPETRA_DEBUG
2242 const map_type& srcRowMap = * (src->graph_.getRowMap ());
2243 #endif // HAVE_TPETRA_DEBUG
2244 const map_type& dstRowMap = * (this->graph_.getRowMap ());
2245 const map_type& srcColMap = * (src->graph_.getColMap ());
2246 const map_type& dstColMap = * (this->graph_.getColMap ());
2247 const bool canUseLocalColumnIndices = srcColMap.locallySameAs (dstColMap);
2249 const size_t numPermute =
static_cast<size_t> (permuteFromLIDs.extent(0));
2251 std::ostringstream os;
2253 <<
"canUseLocalColumnIndices: "
2254 << (canUseLocalColumnIndices ?
"true" :
"false")
2255 <<
", numPermute: " << numPermute
2257 std::cerr << os.str ();
2260 const auto permuteToLIDsHost = permuteToLIDs.view_host();
2261 const auto permuteFromLIDsHost = permuteFromLIDs.view_host();
2263 if (canUseLocalColumnIndices) {
2265 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2266 #ifdef HAVE_TPETRA_DEBUG
2267 if (! srcRowMap.isNodeLocalElement (localRow)) {
2269 invalidSrcCopyRows.insert (localRow);
2272 #endif // HAVE_TPETRA_DEBUG
2274 const LO* lclSrcCols;
2279 LO err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2282 #ifdef HAVE_TPETRA_DEBUG
2283 (void) invalidSrcCopyRows.insert (localRow);
2284 #endif // HAVE_TPETRA_DEBUG
2287 err = this->replaceLocalValues (localRow, lclSrcCols, vals, numEntries);
2288 if (err != numEntries) {
2290 if (! dstRowMap.isNodeLocalElement (localRow)) {
2291 #ifdef HAVE_TPETRA_DEBUG
2292 invalidDstCopyRows.insert (localRow);
2293 #endif // HAVE_TPETRA_DEBUG
2301 for (LO k = 0; k < numEntries; ++k) {
2302 if (! dstColMap.isNodeLocalElement (lclSrcCols[k])) {
2304 #ifdef HAVE_TPETRA_DEBUG
2305 (void) invalidDstCopyCols.insert (lclSrcCols[k]);
2306 #endif // HAVE_TPETRA_DEBUG
2315 for (
size_t k = 0; k < numPermute; ++k) {
2316 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2317 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2319 const LO* lclSrcCols;
2322 LO err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2325 #ifdef HAVE_TPETRA_DEBUG
2326 invalidPermuteFromRows.insert (srcLclRow);
2327 #endif // HAVE_TPETRA_DEBUG
2330 err = this->replaceLocalValues (dstLclRow, lclSrcCols, vals, numEntries);
2331 if (err != numEntries) {
2333 #ifdef HAVE_TPETRA_DEBUG
2334 for (LO c = 0; c < numEntries; ++c) {
2335 if (! dstColMap.isNodeLocalElement (lclSrcCols[c])) {
2336 invalidDstPermuteCols.insert (lclSrcCols[c]);
2339 #endif // HAVE_TPETRA_DEBUG
2346 const size_t maxNumEnt = src->graph_.getNodeMaxNumRowEntries ();
2347 Teuchos::Array<LO> lclDstCols (maxNumEnt);
2350 for (LO localRow = 0; localRow < static_cast<LO> (numSameIDs); ++localRow) {
2351 const LO* lclSrcCols;
2358 err = src->getLocalRowView (localRow, lclSrcCols, vals, numEntries);
2359 }
catch (std::exception& e) {
2361 std::ostringstream os;
2362 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2363 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2364 << localRow <<
", src->getLocalRowView() threw an exception: "
2366 std::cerr << os.str ();
2373 #ifdef HAVE_TPETRA_DEBUG
2374 invalidSrcCopyRows.insert (localRow);
2375 #endif // HAVE_TPETRA_DEBUG
2378 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (lclDstCols.size ())) {
2381 std::ostringstream os;
2382 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2383 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2384 << localRow <<
", numEntries = " << numEntries <<
" > maxNumEnt = "
2385 << maxNumEnt << endl;
2386 std::cerr << os.str ();
2392 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2393 for (LO j = 0; j < numEntries; ++j) {
2394 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2395 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2397 #ifdef HAVE_TPETRA_DEBUG
2398 invalidDstCopyCols.insert (lclDstColsView[j]);
2399 #endif // HAVE_TPETRA_DEBUG
2403 err = this->replaceLocalValues (localRow, lclDstColsView.getRawPtr (), vals, numEntries);
2404 }
catch (std::exception& e) {
2406 std::ostringstream os;
2407 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2408 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" localRow "
2409 << localRow <<
", this->replaceLocalValues() threw an exception: "
2411 std::cerr << os.str ();
2415 if (err != numEntries) {
2418 std::ostringstream os;
2419 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2420 os <<
"Proc " << myRank <<
": copyAndPermute: At \"same\" "
2421 "localRow " << localRow <<
", this->replaceLocalValues "
2422 "returned " << err <<
" instead of numEntries = "
2423 << numEntries << endl;
2424 std::cerr << os.str ();
2432 for (
size_t k = 0; k < numPermute; ++k) {
2433 const LO srcLclRow =
static_cast<LO
> (permuteFromLIDsHost(k));
2434 const LO dstLclRow =
static_cast<LO
> (permuteToLIDsHost(k));
2436 const LO* lclSrcCols;
2441 err = src->getLocalRowView (srcLclRow, lclSrcCols, vals, numEntries);
2442 }
catch (std::exception& e) {
2444 std::ostringstream os;
2445 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2446 os <<
"Proc " << myRank <<
": copyAndPermute: At \"permute\" "
2447 "srcLclRow " << srcLclRow <<
" and dstLclRow " << dstLclRow
2448 <<
", src->getLocalRowView() threw an exception: " << e.what ();
2449 std::cerr << os.str ();
2456 #ifdef HAVE_TPETRA_DEBUG
2457 invalidPermuteFromRows.insert (srcLclRow);
2458 #endif // HAVE_TPETRA_DEBUG
2461 if (
static_cast<size_t> (numEntries) >
static_cast<size_t> (lclDstCols.size ())) {
2467 Teuchos::ArrayView<LO> lclDstColsView = lclDstCols.view (0, numEntries);
2468 for (LO j = 0; j < numEntries; ++j) {
2469 lclDstColsView[j] = dstColMap.getLocalElement (srcColMap.getGlobalElement (lclSrcCols[j]));
2470 if (lclDstColsView[j] == Teuchos::OrdinalTraits<LO>::invalid ()) {
2472 #ifdef HAVE_TPETRA_DEBUG
2473 invalidDstPermuteCols.insert (lclDstColsView[j]);
2474 #endif // HAVE_TPETRA_DEBUG
2477 err = this->replaceLocalValues (dstLclRow, lclDstColsView.getRawPtr (), vals, numEntries);
2478 if (err != numEntries) {
2487 std::ostream& err = this->markLocalErrorAndGetStream ();
2488 #ifdef HAVE_TPETRA_DEBUG
2489 err <<
"copyAndPermute: The graph structure of the source of the "
2490 "Import or Export must be a subset of the graph structure of the "
2492 err <<
"invalidSrcCopyRows = [";
2493 for (
typename std::set<LO>::const_iterator it = invalidSrcCopyRows.begin ();
2494 it != invalidSrcCopyRows.end (); ++it) {
2496 typename std::set<LO>::const_iterator itp1 = it;
2498 if (itp1 != invalidSrcCopyRows.end ()) {
2502 err <<
"], invalidDstCopyRows = [";
2503 for (
typename std::set<LO>::const_iterator it = invalidDstCopyRows.begin ();
2504 it != invalidDstCopyRows.end (); ++it) {
2506 typename std::set<LO>::const_iterator itp1 = it;
2508 if (itp1 != invalidDstCopyRows.end ()) {
2512 err <<
"], invalidDstCopyCols = [";
2513 for (
typename std::set<LO>::const_iterator it = invalidDstCopyCols.begin ();
2514 it != invalidDstCopyCols.end (); ++it) {
2516 typename std::set<LO>::const_iterator itp1 = it;
2518 if (itp1 != invalidDstCopyCols.end ()) {
2522 err <<
"], invalidDstPermuteCols = [";
2523 for (
typename std::set<LO>::const_iterator it = invalidDstPermuteCols.begin ();
2524 it != invalidDstPermuteCols.end (); ++it) {
2526 typename std::set<LO>::const_iterator itp1 = it;
2528 if (itp1 != invalidDstPermuteCols.end ()) {
2532 err <<
"], invalidPermuteFromRows = [";
2533 for (
typename std::set<LO>::const_iterator it = invalidPermuteFromRows.begin ();
2534 it != invalidPermuteFromRows.end (); ++it) {
2536 typename std::set<LO>::const_iterator itp1 = it;
2538 if (itp1 != invalidPermuteFromRows.end ()) {
2544 err <<
"copyAndPermute: The graph structure of the source of the "
2545 "Import or Export must be a subset of the graph structure of the "
2547 #endif // HAVE_TPETRA_DEBUG
2551 std::ostringstream os;
2552 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2553 const bool lclSuccess = ! (* (this->localError_));
2554 os <<
"*** Proc " << myRank <<
": copyAndPermute "
2555 << (lclSuccess ?
"succeeded" :
"FAILED");
2559 os <<
": error messages: " << this->errorMessages ();
2561 std::cerr << os.str ();
2585 template<
class LO,
class GO,
class D>
2587 packRowCount (
const size_t numEnt,
2588 const size_t numBytesPerValue,
2589 const size_t blkSize)
2591 using ::Tpetra::Details::PackTraits;
2601 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2602 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2603 const size_t valsLen = numEnt * numBytesPerValue * blkSize * blkSize;
2604 return numEntLen + gidsLen + valsLen;
2618 template<
class ST,
class LO,
class GO,
class D>
2620 unpackRowCount (
const typename ::Tpetra::Details::PackTraits<LO, D>::input_buffer_type& imports,
2621 const size_t offset,
2622 const size_t numBytes,
2625 using Kokkos::subview;
2626 using ::Tpetra::Details::PackTraits;
2628 if (numBytes == 0) {
2630 return static_cast<size_t> (0);
2634 const size_t theNumBytes = PackTraits<LO, D>::packValueCount (numEntLO);
2635 TEUCHOS_TEST_FOR_EXCEPTION
2636 (theNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2637 "theNumBytes = " << theNumBytes <<
" < numBytes = " << numBytes
2639 const char*
const inBuf = imports.data () + offset;
2640 const size_t actualNumBytes = PackTraits<LO, D>::unpackValue (numEntLO, inBuf);
2641 TEUCHOS_TEST_FOR_EXCEPTION
2642 (actualNumBytes > numBytes, std::logic_error,
"unpackRowCount: "
2643 "actualNumBytes = " << actualNumBytes <<
" < numBytes = " << numBytes
2645 return static_cast<size_t> (numEntLO);
2652 template<
class ST,
class LO,
class GO,
class D>
2654 packRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<LO, D>::output_buffer_type exports,
2655 const size_t offset,
2656 const size_t numEnt,
2657 const typename ::Tpetra::Details::PackTraits<GO, D>::input_array_type& gidsIn,
2658 const typename ::Tpetra::Details::PackTraits<ST, D>::input_array_type& valsIn,
2659 const size_t numBytesPerValue,
2660 const size_t blockSize)
2662 using Kokkos::subview;
2663 using ::Tpetra::Details::PackTraits;
2669 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2672 const LO numEntLO =
static_cast<size_t> (numEnt);
2674 const size_t numEntBeg = offset;
2675 const size_t numEntLen = PackTraits<LO, D>::packValueCount (numEntLO);
2676 const size_t gidsBeg = numEntBeg + numEntLen;
2677 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2678 const size_t valsBeg = gidsBeg + gidsLen;
2679 const size_t valsLen = numScalarEnt * numBytesPerValue;
2681 char*
const numEntOut = exports.data () + numEntBeg;
2682 char*
const gidsOut = exports.data () + gidsBeg;
2683 char*
const valsOut = exports.data () + valsBeg;
2685 size_t numBytesOut = 0;
2687 numBytesOut += PackTraits<LO, D>::packValue (numEntOut, numEntLO);
2690 Kokkos::pair<int, size_t> p;
2691 p = PackTraits<GO, D>::packArray (gidsOut, gidsIn.data (), numEnt);
2692 errorCode += p.first;
2693 numBytesOut += p.second;
2695 p = PackTraits<ST, D>::packArray (valsOut, valsIn.data (), numScalarEnt);
2696 errorCode += p.first;
2697 numBytesOut += p.second;
2700 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2701 TEUCHOS_TEST_FOR_EXCEPTION
2702 (numBytesOut != expectedNumBytes, std::logic_error,
2703 "packRowForBlockCrs: numBytesOut = " << numBytesOut
2704 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2706 TEUCHOS_TEST_FOR_EXCEPTION
2707 (errorCode != 0, std::runtime_error,
"packRowForBlockCrs: "
2708 "PackTraits::packArray returned a nonzero error code");
2714 template<
class ST,
class LO,
class GO,
class D>
2716 unpackRowForBlockCrs (
const typename ::Tpetra::Details::PackTraits<GO, D>::output_array_type& gidsOut,
2717 const typename ::Tpetra::Details::PackTraits<ST, D>::output_array_type& valsOut,
2718 const typename ::Tpetra::Details::PackTraits<int, D>::input_buffer_type& imports,
2719 const size_t offset,
2720 const size_t numBytes,
2721 const size_t numEnt,
2722 const size_t numBytesPerValue,
2723 const size_t blockSize)
2725 using ::Tpetra::Details::PackTraits;
2727 if (numBytes == 0) {
2731 const size_t numScalarEnt = numEnt * blockSize * blockSize;
2732 TEUCHOS_TEST_FOR_EXCEPTION
2733 (
static_cast<size_t> (imports.extent (0)) <= offset,
2734 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2735 << imports.extent (0) <<
" <= offset = " << offset <<
".");
2736 TEUCHOS_TEST_FOR_EXCEPTION
2737 (
static_cast<size_t> (imports.extent (0)) < offset + numBytes,
2738 std::logic_error,
"unpackRowForBlockCrs: imports.extent(0) = "
2739 << imports.extent (0) <<
" < offset + numBytes = "
2740 << (offset + numBytes) <<
".");
2745 const size_t numEntBeg = offset;
2746 const size_t numEntLen = PackTraits<LO, D>::packValueCount (lid);
2747 const size_t gidsBeg = numEntBeg + numEntLen;
2748 const size_t gidsLen = numEnt * PackTraits<GO, D>::packValueCount (gid);
2749 const size_t valsBeg = gidsBeg + gidsLen;
2750 const size_t valsLen = numScalarEnt * numBytesPerValue;
2752 const char*
const numEntIn = imports.data () + numEntBeg;
2753 const char*
const gidsIn = imports.data () + gidsBeg;
2754 const char*
const valsIn = imports.data () + valsBeg;
2756 size_t numBytesOut = 0;
2759 numBytesOut += PackTraits<LO, D>::unpackValue (numEntOut, numEntIn);
2760 TEUCHOS_TEST_FOR_EXCEPTION
2761 (
static_cast<size_t> (numEntOut) != numEnt, std::logic_error,
2762 "unpackRowForBlockCrs: Expected number of entries " << numEnt
2763 <<
" != actual number of entries " << numEntOut <<
".");
2766 Kokkos::pair<int, size_t> p;
2767 p = PackTraits<GO, D>::unpackArray (gidsOut.data (), gidsIn, numEnt);
2768 errorCode += p.first;
2769 numBytesOut += p.second;
2771 p = PackTraits<ST, D>::unpackArray (valsOut.data (), valsIn, numScalarEnt);
2772 errorCode += p.first;
2773 numBytesOut += p.second;
2776 TEUCHOS_TEST_FOR_EXCEPTION
2777 (numBytesOut != numBytes, std::logic_error,
2778 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2779 <<
" != numBytes = " << numBytes <<
".");
2781 const size_t expectedNumBytes = numEntLen + gidsLen + valsLen;
2782 TEUCHOS_TEST_FOR_EXCEPTION
2783 (numBytesOut != expectedNumBytes, std::logic_error,
2784 "unpackRowForBlockCrs: numBytesOut = " << numBytesOut
2785 <<
" != expectedNumBytes = " << expectedNumBytes <<
".");
2787 TEUCHOS_TEST_FOR_EXCEPTION
2788 (errorCode != 0, std::runtime_error,
"unpackRowForBlockCrs: "
2789 "PackTraits::unpackArray returned a nonzero error code");
2795 template<
class Scalar,
class LO,
class GO,
class Node>
2797 BlockCrsMatrix<Scalar, LO, GO, Node>::
2798 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
2800 #else // TPETRA_ENABLE_DEPRECATED_CODE
2802 #endif // TPETRA_ENABLE_DEPRECATED_CODE
2803 (const ::Tpetra::SrcDistObject& source,
2804 const Kokkos::DualView<
const local_ordinal_type*,
2805 buffer_device_type>& exportLIDs,
2806 Kokkos::DualView<packet_type*,
2807 buffer_device_type>& exports,
2808 Kokkos::DualView<
size_t*,
2809 buffer_device_type> numPacketsPerLID,
2810 size_t& constantNumPackets,
2813 using ::Tpetra::Details::Behavior;
2815 using ::Tpetra::Details::ProfilingRegion;
2816 using ::Tpetra::Details::PackTraits;
2818 typedef typename Kokkos::View<int*, device_type>::HostMirror::execution_space host_exec;
2822 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::packAndPrepare");
2824 const bool debug = Behavior::debug();
2825 const bool verbose = Behavior::verbose();
2830 std::ostringstream os;
2831 const int myRank = this->graph_.getRowMap ()->getComm ()->getRank ();
2832 os <<
"Proc " << myRank <<
": BlockCrsMatrix::packAndPrepare: " << std::endl;
2837 if (* (this->localError_)) {
2838 std::ostream& err = this->markLocalErrorAndGetStream ();
2840 <<
"The target object of the Import or Export is already in an error state."
2849 std::ostringstream os;
2850 os << prefix << std::endl
2854 std::cerr << os.str ();
2860 if (exportLIDs.extent (0) != numPacketsPerLID.extent (0)) {
2861 std::ostream& err = this->markLocalErrorAndGetStream ();
2863 <<
"exportLIDs.extent(0) = " << exportLIDs.extent (0)
2864 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
2865 <<
"." << std::endl;
2868 if (exportLIDs.need_sync_host ()) {
2869 std::ostream& err = this->markLocalErrorAndGetStream ();
2870 err << prefix <<
"exportLIDs be sync'd to host." << std::endl;
2874 const this_type* src =
dynamic_cast<const this_type*
> (&source);
2876 std::ostream& err = this->markLocalErrorAndGetStream ();
2878 <<
"The source (input) object of the Import or "
2879 "Export is either not a BlockCrsMatrix, or does not have the right "
2880 "template parameters. checkSizes() should have caught this. "
2881 "Please report this bug to the Tpetra developers." << std::endl;
2893 constantNumPackets = 0;
2896 const crs_graph_type& srcGraph = src->graph_;
2897 const size_t blockSize =
static_cast<size_t> (src->getBlockSize ());
2898 const size_t numExportLIDs = exportLIDs.extent (0);
2899 const size_t numBytesPerValue =
2900 PackTraits<impl_scalar_type, host_exec>
2901 ::packValueCount(this->val_.extent(0) ? this->val_.view_host()(0) : impl_scalar_type());
2907 Impl::BlockCrsRowStruct<size_t> rowReducerStruct;
2910 auto exportLIDsHost = exportLIDs.view_host();
2911 auto numPacketsPerLIDHost = numPacketsPerLID.view_host();
2912 numPacketsPerLID.modify_host ();
2914 using reducer_type = Impl::BlockCrsReducer<Impl::BlockCrsRowStruct<size_t>,host_exec>;
2915 const auto policy = Kokkos::RangePolicy<host_exec>(
size_t(0), numExportLIDs);
2916 Kokkos::parallel_reduce
2918 [=](
const size_t &i,
typename reducer_type::value_type &update) {
2919 const LO lclRow = exportLIDsHost(i);
2920 size_t numEnt = srcGraph.getNumEntriesInLocalRow (lclRow);
2921 numEnt = (numEnt == Teuchos::OrdinalTraits<size_t>::invalid () ? 0 : numEnt);
2923 const size_t numBytes =
2924 packRowCount<LO, GO, host_exec> (numEnt, numBytesPerValue, blockSize);
2925 numPacketsPerLIDHost(i) = numBytes;
2926 update +=
typename reducer_type::value_type(numEnt, numBytes, numEnt);
2927 }, rowReducerStruct);
2933 const size_t totalNumBytes = rowReducerStruct.totalNumBytes;
2934 const size_t totalNumEntries = rowReducerStruct.totalNumEntries;
2935 const size_t maxRowLength = rowReducerStruct.maxRowLength;
2938 std::ostringstream os;
2940 <<
"totalNumBytes = " << totalNumBytes <<
", totalNumEntries = " << totalNumEntries
2942 std::cerr << os.str ();
2948 if(exports.extent(0) != totalNumBytes)
2950 const std::string oldLabel = exports.d_view.label ();
2951 const std::string newLabel = (oldLabel ==
"") ?
"exports" : oldLabel;
2952 exports = Kokkos::DualView<packet_type*, buffer_device_type>(newLabel, totalNumBytes);
2954 if (totalNumEntries > 0) {
2956 Kokkos::View<size_t*, host_exec> offset(
"offset", numExportLIDs+1);
2958 const auto policy = Kokkos::RangePolicy<host_exec>(
size_t(0), numExportLIDs+1);
2959 Kokkos::parallel_scan
2961 [=](
const size_t &i,
size_t &update,
const bool &
final) {
2962 if (
final) offset(i) = update;
2963 update += (i == numExportLIDs ? 0 : numPacketsPerLIDHost(i));
2966 if (offset(numExportLIDs) != totalNumBytes) {
2967 std::ostream& err = this->markLocalErrorAndGetStream ();
2969 <<
"At end of method, the final offset (in bytes) "
2970 << offset(numExportLIDs) <<
" does not equal the total number of bytes packed "
2971 << totalNumBytes <<
". "
2972 <<
"Please report this bug to the Tpetra developers." << std::endl;
2982 const map_type& srcColMap = * (srcGraph.getColMap ());
2986 exports.modify_host();
2988 typedef Kokkos::TeamPolicy<host_exec> policy_type;
2990 policy_type(numExportLIDs, 1, 1)
2991 .set_scratch_size(0, Kokkos::PerTeam(
sizeof(GO)*maxRowLength));
2992 Kokkos::parallel_for
2994 [=](
const typename policy_type::member_type &member) {
2995 const size_t i = member.league_rank();
2996 Kokkos::View<GO*, typename host_exec::scratch_memory_space>
2997 gblColInds(member.team_scratch(0), maxRowLength);
2999 const LO lclRowInd = exportLIDsHost(i);
3000 const LO* lclColIndsRaw;
3006 (void) src->getLocalRowView (lclRowInd, lclColIndsRaw, valsRaw, numEntLO);
3008 const size_t numEnt =
static_cast<size_t> (numEntLO);
3009 Kokkos::View<const LO*,host_exec> lclColInds (lclColIndsRaw, numEnt);
3012 for (
size_t j = 0; j < numEnt; ++j)
3013 gblColInds(j) = srcColMap.getGlobalElement (lclColInds(j));
3019 const size_t numBytes = packRowForBlockCrs<impl_scalar_type,LO,GO,host_exec>
3020 (exports.view_host(),
3023 Kokkos::View<const GO*,host_exec>(gblColInds.data(), maxRowLength),
3024 Kokkos::View<const impl_scalar_type*,host_exec>(
reinterpret_cast<const impl_scalar_type*
>(valsRaw), numEnt*blockSize*blockSize),
3030 const size_t offsetDiff = offset(i+1) - offset(i);
3031 if (numBytes != offsetDiff) {
3032 std::ostringstream os;
3034 <<
"numBytes computed from packRowForBlockCrs is different from "
3035 <<
"precomputed offset values, LID = " << i << std::endl;
3036 std::cerr << os.str ();
3044 std::ostringstream os;
3045 const bool lclSuccess = ! (* (this->localError_));
3047 << (lclSuccess ?
"succeeded" :
"FAILED")
3049 std::cerr << os.str ();
3053 template<
class Scalar,
class LO,
class GO,
class Node>
3055 BlockCrsMatrix<Scalar, LO, GO, Node>::
3056 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3058 #else // TPETRA_ENABLE_DEPRECATED_CODE
3060 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3061 (
const Kokkos::DualView<
const local_ordinal_type*,
3062 buffer_device_type>& importLIDs,
3063 Kokkos::DualView<packet_type*,
3064 buffer_device_type> imports,
3065 Kokkos::DualView<
size_t*,
3066 buffer_device_type> numPacketsPerLID,
3071 using ::Tpetra::Details::Behavior;
3073 using ::Tpetra::Details::ProfilingRegion;
3074 using ::Tpetra::Details::PackTraits;
3077 typename Kokkos::View<int*, device_type>::HostMirror::execution_space;
3079 ProfilingRegion profile_region(
"Tpetra::BlockCrsMatrix::unpackAndCombine");
3080 const bool verbose = Behavior::verbose ();
3085 std::ostringstream os;
3086 auto map = this->graph_.getRowMap ();
3087 auto comm = map.is_null () ? Teuchos::null : map->getComm ();
3088 const int myRank = comm.is_null () ? -1 : comm->getRank ();
3089 os <<
"Proc " << myRank <<
": Tpetra::BlockCrsMatrix::unpackAndCombine: ";
3092 os <<
"Start" << endl;
3093 std::cerr << os.str ();
3098 if (* (this->localError_)) {
3099 std::ostream& err = this->markLocalErrorAndGetStream ();
3100 std::ostringstream os;
3101 os << prefix <<
"{Im/Ex}port target is already in error." << endl;
3103 std::cerr << os.str ();
3112 if (importLIDs.extent (0) != numPacketsPerLID.extent (0)) {
3113 std::ostream& err = this->markLocalErrorAndGetStream ();
3114 std::ostringstream os;
3115 os << prefix <<
"importLIDs.extent(0) = " << importLIDs.extent (0)
3116 <<
" != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent(0)
3119 std::cerr << os.str ();
3125 if (combineMode !=
ADD && combineMode !=
INSERT &&
3127 combineMode !=
ZERO) {
3128 std::ostream& err = this->markLocalErrorAndGetStream ();
3129 std::ostringstream os;
3131 <<
"Invalid CombineMode value " << combineMode <<
". Valid "
3132 <<
"values include ADD, INSERT, REPLACE, ABSMAX, and ZERO."
3135 std::cerr << os.str ();
3141 if (this->graph_.getColMap ().is_null ()) {
3142 std::ostream& err = this->markLocalErrorAndGetStream ();
3143 std::ostringstream os;
3144 os << prefix <<
"Target matrix's column Map is null." << endl;
3146 std::cerr << os.str ();
3155 const map_type& tgtColMap = * (this->graph_.getColMap ());
3158 const size_t blockSize = this->getBlockSize ();
3159 const size_t numImportLIDs = importLIDs.extent(0);
3166 const size_t numBytesPerValue =
3167 PackTraits<impl_scalar_type, host_exec>::packValueCount
3168 (this->val_.extent (0) ? this->val_.view_host () (0) : impl_scalar_type ());
3169 const size_t maxRowNumEnt = graph_.getNodeMaxNumRowEntries ();
3170 const size_t maxRowNumScalarEnt = maxRowNumEnt * blockSize * blockSize;
3173 std::ostringstream os;
3174 os << prefix <<
"combineMode: "
3175 << ::Tpetra::combineModeToString (combineMode)
3176 <<
", blockSize: " << blockSize
3177 <<
", numImportLIDs: " << numImportLIDs
3178 <<
", numBytesPerValue: " << numBytesPerValue
3179 <<
", maxRowNumEnt: " << maxRowNumEnt
3180 <<
", maxRowNumScalarEnt: " << maxRowNumScalarEnt << endl;
3181 std::cerr << os.str ();
3184 if (combineMode ==
ZERO || numImportLIDs == 0) {
3186 std::ostringstream os;
3187 os << prefix <<
"Nothing to unpack. Done!" << std::endl;
3188 std::cerr << os.str ();
3195 if (imports.need_sync_host ()) {
3196 imports.sync_host ();
3206 if (imports.need_sync_host () || numPacketsPerLID.need_sync_host () ||
3207 importLIDs.need_sync_host ()) {
3208 std::ostream& err = this->markLocalErrorAndGetStream ();
3209 std::ostringstream os;
3210 os << prefix <<
"All input DualViews must be sync'd to host by now. "
3211 <<
"imports_nc.need_sync_host()="
3212 << (imports.need_sync_host () ?
"true" :
"false")
3213 <<
", numPacketsPerLID.need_sync_host()="
3214 << (numPacketsPerLID.need_sync_host () ?
"true" :
"false")
3215 <<
", importLIDs.need_sync_host()="
3216 << (importLIDs.need_sync_host () ?
"true" :
"false")
3219 std::cerr << os.str ();
3225 const auto importLIDsHost = importLIDs.view_host ();
3226 const auto numPacketsPerLIDHost = numPacketsPerLID.view_host ();
3232 Kokkos::View<size_t*, host_exec> offset (
"offset", numImportLIDs+1);
3234 const auto policy = Kokkos::RangePolicy<host_exec>(
size_t(0), numImportLIDs+1);
3235 Kokkos::parallel_scan
3236 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: offsets", policy,
3237 [=] (
const size_t &i,
size_t &update,
const bool &
final) {
3238 if (
final) offset(i) = update;
3239 update += (i == numImportLIDs ? 0 : numPacketsPerLIDHost(i));
3249 Kokkos::View<int, host_exec, Kokkos::MemoryTraits<Kokkos::Atomic> >
3250 errorDuringUnpack (
"errorDuringUnpack");
3251 errorDuringUnpack () = 0;
3253 using policy_type = Kokkos::TeamPolicy<host_exec>;
3254 const auto policy = policy_type (numImportLIDs, 1, 1)
3255 .set_scratch_size (0, Kokkos::PerTeam (
sizeof (GO) * maxRowNumEnt +
3256 sizeof (LO) * maxRowNumEnt +
3257 numBytesPerValue * maxRowNumScalarEnt));
3258 using host_scratch_space =
typename host_exec::scratch_memory_space;
3259 using pair_type = Kokkos::pair<size_t, size_t>;
3260 Kokkos::parallel_for
3261 (
"Tpetra::BlockCrsMatrix::unpackAndCombine: unpack", policy,
3262 [=] (
const typename policy_type::member_type& member) {
3263 const size_t i = member.league_rank();
3265 Kokkos::View<GO*, host_scratch_space> gblColInds
3266 (member.team_scratch (0), maxRowNumEnt);
3267 Kokkos::View<LO*, host_scratch_space> lclColInds
3268 (member.team_scratch (0), maxRowNumEnt);
3269 Kokkos::View<impl_scalar_type*, host_scratch_space> vals
3270 (member.team_scratch (0), maxRowNumScalarEnt);
3272 const size_t offval = offset(i);
3273 const LO lclRow = importLIDsHost(i);
3274 const size_t numBytes = numPacketsPerLIDHost(i);
3275 const size_t numEnt =
3276 unpackRowCount<impl_scalar_type, LO, GO, host_exec>
3277 (imports.view_host (), offval, numBytes, numBytesPerValue);
3280 if (numEnt > maxRowNumEnt) {
3281 errorDuringUnpack() = 1;
3283 std::ostringstream os;
3285 <<
"At i = " << i <<
", numEnt = " << numEnt
3286 <<
" > maxRowNumEnt = " << maxRowNumEnt
3288 std::cerr << os.str ();
3293 const size_t numScalarEnt = numEnt*blockSize*blockSize;
3294 auto gidsOut = Kokkos::subview(gblColInds, pair_type(0, numEnt));
3295 auto lidsOut = Kokkos::subview(lclColInds, pair_type(0, numEnt));
3296 auto valsOut = Kokkos::subview(vals, pair_type(0, numScalarEnt));
3301 size_t numBytesOut = 0;
3304 unpackRowForBlockCrs<impl_scalar_type, LO, GO, host_exec>
3305 (Kokkos::View<GO*,host_exec>(gidsOut.data(), numEnt),
3306 Kokkos::View<impl_scalar_type*,host_exec>(valsOut.data(), numScalarEnt),
3307 imports.view_host(),
3308 offval, numBytes, numEnt,
3309 numBytesPerValue, blockSize);
3311 catch (std::exception& e) {
3312 errorDuringUnpack() = 1;
3314 std::ostringstream os;
3315 os << prefix <<
"At i = " << i <<
", unpackRowForBlockCrs threw: "
3316 << e.what () << endl;
3317 std::cerr << os.str ();
3322 if (numBytes != numBytesOut) {
3323 errorDuringUnpack() = 1;
3325 std::ostringstream os;
3327 <<
"At i = " << i <<
", numBytes = " << numBytes
3328 <<
" != numBytesOut = " << numBytesOut <<
"."
3330 std::cerr << os.str();
3336 for (
size_t k = 0; k < numEnt; ++k) {
3337 lidsOut(k) = tgtColMap.getLocalElement (gidsOut(k));
3338 if (lidsOut(k) == Teuchos::OrdinalTraits<LO>::invalid ()) {
3340 std::ostringstream os;
3342 <<
"At i = " << i <<
", GID " << gidsOut(k)
3343 <<
" is not owned by the calling process."
3345 std::cerr << os.str();
3353 const LO*
const lidsRaw =
const_cast<const LO*
> (lidsOut.data ());
3354 const Scalar*
const valsRaw =
reinterpret_cast<const Scalar*
>
3355 (
const_cast<const impl_scalar_type*
> (valsOut.data ()));
3356 if (combineMode ==
ADD) {
3357 numCombd = this->sumIntoLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3358 }
else if (combineMode ==
INSERT || combineMode ==
REPLACE) {
3359 numCombd = this->replaceLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3360 }
else if (combineMode ==
ABSMAX) {
3361 numCombd = this->absMaxLocalValues (lclRow, lidsRaw, valsRaw, numEnt);
3364 if (
static_cast<LO
> (numEnt) != numCombd) {
3365 errorDuringUnpack() = 1;
3367 std::ostringstream os;
3368 os << prefix <<
"At i = " << i <<
", numEnt = " << numEnt
3369 <<
" != numCombd = " << numCombd <<
"."
3371 std::cerr << os.str ();
3378 if (errorDuringUnpack () != 0) {
3379 std::ostream& err = this->markLocalErrorAndGetStream ();
3380 err << prefix <<
"Unpacking failed.";
3382 err <<
" Please run again with the environment variable setting "
3383 "TPETRA_VERBOSE=1 to get more verbose diagnostic output.";
3389 std::ostringstream os;
3390 const bool lclSuccess = ! (* (this->localError_));
3392 << (lclSuccess ?
"succeeded" :
"FAILED")
3394 std::cerr << os.str ();
3398 template<
class Scalar,
class LO,
class GO,
class Node>
3402 using Teuchos::TypeNameTraits;
3403 std::ostringstream os;
3404 os <<
"\"Tpetra::BlockCrsMatrix\": { "
3405 <<
"Template parameters: { "
3406 <<
"Scalar: " << TypeNameTraits<Scalar>::name ()
3407 <<
"LO: " << TypeNameTraits<LO>::name ()
3408 <<
"GO: " << TypeNameTraits<GO>::name ()
3409 <<
"Node: " << TypeNameTraits<Node>::name ()
3411 <<
", Label: \"" << this->getObjectLabel () <<
"\""
3412 <<
", Global dimensions: ["
3413 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3414 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]"
3415 <<
", Block size: " << getBlockSize ()
3421 template<
class Scalar,
class LO,
class GO,
class Node>
3424 describe (Teuchos::FancyOStream& out,
3425 const Teuchos::EVerbosityLevel verbLevel)
const
3427 using Teuchos::ArrayRCP;
3428 using Teuchos::CommRequest;
3429 using Teuchos::FancyOStream;
3430 using Teuchos::getFancyOStream;
3431 using Teuchos::ireceive;
3432 using Teuchos::isend;
3433 using Teuchos::outArg;
3434 using Teuchos::TypeNameTraits;
3435 using Teuchos::VERB_DEFAULT;
3436 using Teuchos::VERB_NONE;
3437 using Teuchos::VERB_LOW;
3438 using Teuchos::VERB_MEDIUM;
3440 using Teuchos::VERB_EXTREME;
3442 using Teuchos::wait;
3444 #ifdef HAVE_TPETRA_DEBUG
3445 const char prefix[] =
"Tpetra::BlockCrsMatrix::describe: ";
3446 #endif // HAVE_TPETRA_DEBUG
3449 const Teuchos::EVerbosityLevel vl =
3450 (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
3452 if (vl == VERB_NONE) {
3457 Teuchos::OSTab tab0 (out);
3459 out <<
"\"Tpetra::BlockCrsMatrix\":" << endl;
3460 Teuchos::OSTab tab1 (out);
3462 out <<
"Template parameters:" << endl;
3464 Teuchos::OSTab tab2 (out);
3465 out <<
"Scalar: " << TypeNameTraits<Scalar>::name () << endl
3466 <<
"LO: " << TypeNameTraits<LO>::name () << endl
3467 <<
"GO: " << TypeNameTraits<GO>::name () << endl
3468 <<
"Node: " << TypeNameTraits<Node>::name () << endl;
3470 out <<
"Label: \"" << this->getObjectLabel () <<
"\"" << endl
3471 <<
"Global dimensions: ["
3472 << graph_.getDomainMap ()->getGlobalNumElements () <<
", "
3473 << graph_.getRangeMap ()->getGlobalNumElements () <<
"]" << endl;
3475 const LO blockSize = getBlockSize ();
3476 out <<
"Block size: " << blockSize << endl;
3479 if (vl >= VERB_MEDIUM) {
3480 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3481 const int myRank = comm.getRank ();
3483 out <<
"Row Map:" << endl;
3485 getRowMap()->describe (out, vl);
3487 if (! getColMap ().is_null ()) {
3488 if (getColMap() == getRowMap()) {
3490 out <<
"Column Map: same as row Map" << endl;
3495 out <<
"Column Map:" << endl;
3497 getColMap ()->describe (out, vl);
3500 if (! getDomainMap ().is_null ()) {
3501 if (getDomainMap () == getRowMap ()) {
3503 out <<
"Domain Map: same as row Map" << endl;
3506 else if (getDomainMap () == getColMap ()) {
3508 out <<
"Domain Map: same as column Map" << endl;
3513 out <<
"Domain Map:" << endl;
3515 getDomainMap ()->describe (out, vl);
3518 if (! getRangeMap ().is_null ()) {
3519 if (getRangeMap () == getDomainMap ()) {
3521 out <<
"Range Map: same as domain Map" << endl;
3524 else if (getRangeMap () == getRowMap ()) {
3526 out <<
"Range Map: same as row Map" << endl;
3531 out <<
"Range Map: " << endl;
3533 getRangeMap ()->describe (out, vl);
3538 if (vl >= VERB_EXTREME) {
3544 const_cast<this_type*
> (
this)->sync_host ();
3546 #ifdef HAVE_TPETRA_DEBUG
3547 TEUCHOS_TEST_FOR_EXCEPTION
3548 (this->need_sync_host (), std::logic_error,
3549 prefix <<
"Right after sync to host, the matrix claims that it needs "
3550 "sync to host. Please report this bug to the Tpetra developers.");
3551 #endif // HAVE_TPETRA_DEBUG
3553 const Teuchos::Comm<int>& comm = * (graph_.getMap ()->getComm ());
3554 const int myRank = comm.getRank ();
3555 const int numProcs = comm.getSize ();
3558 RCP<std::ostringstream> lclOutStrPtr (
new std::ostringstream ());
3559 RCP<FancyOStream> osPtr = getFancyOStream (lclOutStrPtr);
3560 FancyOStream& os = *osPtr;
3561 os <<
"Process " << myRank <<
":" << endl;
3562 Teuchos::OSTab tab2 (os);
3564 const map_type& meshRowMap = * (graph_.getRowMap ());
3565 const map_type& meshColMap = * (graph_.getColMap ());
3570 os <<
"Row " << meshGblRow <<
": {";
3572 const LO* lclColInds = NULL;
3573 Scalar* vals = NULL;
3575 this->getLocalRowView (meshLclRow, lclColInds, vals, numInds);
3577 for (LO k = 0; k < numInds; ++k) {
3580 os <<
"Col " << gblCol <<
": [";
3581 for (LO i = 0; i < blockSize; ++i) {
3582 for (LO j = 0; j < blockSize; ++j) {
3583 os << vals[blockSize*blockSize*k + i*blockSize + j];
3584 if (j + 1 < blockSize) {
3588 if (i + 1 < blockSize) {
3593 if (k + 1 < numInds) {
3603 out << lclOutStrPtr->str ();
3604 lclOutStrPtr = Teuchos::null;
3607 const int sizeTag = 1337;
3608 const int dataTag = 1338;
3610 ArrayRCP<char> recvDataBuf;
3614 for (
int p = 1; p < numProcs; ++p) {
3617 ArrayRCP<size_t> recvSize (1);
3619 RCP<CommRequest<int> > recvSizeReq =
3620 ireceive<int, size_t> (recvSize, p, sizeTag, comm);
3621 wait<int> (comm, outArg (recvSizeReq));
3622 const size_t numCharsToRecv = recvSize[0];
3629 if (
static_cast<size_t>(recvDataBuf.size()) < numCharsToRecv + 1) {
3630 recvDataBuf.resize (numCharsToRecv + 1);
3632 ArrayRCP<char> recvData = recvDataBuf.persistingView (0, numCharsToRecv);
3634 RCP<CommRequest<int> > recvDataReq =
3635 ireceive<int, char> (recvData, p, dataTag, comm);
3636 wait<int> (comm, outArg (recvDataReq));
3641 recvDataBuf[numCharsToRecv] =
'\0';
3642 out << recvDataBuf.getRawPtr ();
3644 else if (myRank == p) {
3648 const std::string stringToSend = lclOutStrPtr->str ();
3649 lclOutStrPtr = Teuchos::null;
3652 const size_t numCharsToSend = stringToSend.size ();
3653 ArrayRCP<size_t> sendSize (1);
3654 sendSize[0] = numCharsToSend;
3655 RCP<CommRequest<int> > sendSizeReq =
3656 isend<int, size_t> (sendSize, 0, sizeTag, comm);
3657 wait<int> (comm, outArg (sendSizeReq));
3665 ArrayRCP<const char> sendData (&stringToSend[0], 0, numCharsToSend,
false);
3666 RCP<CommRequest<int> > sendDataReq =
3667 isend<int, char> (sendData, 0, dataTag, comm);
3668 wait<int> (comm, outArg (sendDataReq));
3674 template<
class Scalar,
class LO,
class GO,
class Node>
3675 Teuchos::RCP<const Teuchos::Comm<int> >
3679 return graph_.getComm();
3682 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3683 template<
class Scalar,
class LO,
class GO,
class Node>
3689 return Teuchos::null;
3692 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3694 template<
class Scalar,
class LO,
class GO,
class Node>
3699 return graph_.getGlobalNumCols();
3702 template<
class Scalar,
class LO,
class GO,
class Node>
3707 return graph_.getNodeNumCols();
3710 template<
class Scalar,
class LO,
class GO,
class Node>
3712 BlockCrsMatrix<Scalar, LO, GO, Node>::
3713 getIndexBase()
const
3715 return graph_.getIndexBase();
3718 template<
class Scalar,
class LO,
class GO,
class Node>
3723 return graph_.getGlobalNumEntries();
3726 template<
class Scalar,
class LO,
class GO,
class Node>
3731 return graph_.getNodeNumEntries();
3734 template<
class Scalar,
class LO,
class GO,
class Node>
3739 return graph_.getNumEntriesInGlobalRow(globalRow);
3742 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3743 template<
class Scalar,
class LO,
class GO,
class Node>
3748 using HDM = Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3749 return dynamic_cast<const HDM&
> (this->graph_).getGlobalNumDiagsImpl ();
3752 template<
class Scalar,
class LO,
class GO,
class Node>
3753 size_t TPETRA_DEPRECATED
3754 BlockCrsMatrix<Scalar, LO, GO, Node>::
3755 getNodeNumDiags ()
const
3757 using HDM = Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3758 return dynamic_cast<const HDM&
> (this->graph_).getNodeNumDiagsImpl ();
3760 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3762 template<
class Scalar,
class LO,
class GO,
class Node>
3767 return graph_.getGlobalMaxNumRowEntries();
3770 template<
class Scalar,
class LO,
class GO,
class Node>
3775 return graph_.hasColMap();
3778 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
3779 template<
class Scalar,
class LO,
class GO,
class Node>
3780 bool TPETRA_DEPRECATED
3784 using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3785 return dynamic_cast<const HDM&
> (this->graph_).isLowerTriangularImpl ();
3788 template<
class Scalar,
class LO,
class GO,
class Node>
3789 bool TPETRA_DEPRECATED
3790 BlockCrsMatrix<Scalar, LO, GO, Node>::
3791 isUpperTriangular ()
const
3793 using HDM = ::Tpetra::Details::HasDeprecatedMethods2630_WarningThisClassIsNotForUsers;
3794 return dynamic_cast<const HDM&
> (this->graph_).isUpperTriangularImpl ();
3796 #endif // TPETRA_ENABLE_DEPRECATED_CODE
3798 template<
class Scalar,
class LO,
class GO,
class Node>
3803 return graph_.isLocallyIndexed();
3806 template<
class Scalar,
class LO,
class GO,
class Node>
3811 return graph_.isGloballyIndexed();
3814 template<
class Scalar,
class LO,
class GO,
class Node>
3819 return graph_.isFillComplete ();
3822 template<
class Scalar,
class LO,
class GO,
class Node>
3831 template<
class Scalar,
class LO,
class GO,
class Node>
3835 const Teuchos::ArrayView<GO> &,
3836 const Teuchos::ArrayView<Scalar> &,
3839 TEUCHOS_TEST_FOR_EXCEPTION(
3840 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowCopy: "
3841 "This class doesn't support global matrix indexing.");
3845 template<
class Scalar,
class LO,
class GO,
class Node>
3849 Teuchos::ArrayView<const GO> &,
3850 Teuchos::ArrayView<const Scalar> &)
const
3852 TEUCHOS_TEST_FOR_EXCEPTION(
3853 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getGlobalRowView: "
3854 "This class doesn't support global matrix indexing.");
3858 template<
class Scalar,
class LO,
class GO,
class Node>
3862 Teuchos::ArrayView<const LO>& ,
3863 Teuchos::ArrayView<const Scalar>& )
const
3865 TEUCHOS_TEST_FOR_EXCEPTION(
3866 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getLocalRowView: "
3867 "This class doesn't support local matrix indexing.");
3870 template<
class Scalar,
class LO,
class GO,
class Node>
3875 #ifdef HAVE_TPETRA_DEBUG
3876 const char prefix[] =
3877 "Tpetra::BlockCrsMatrix::getLocalDiagCopy: ";
3878 #endif // HAVE_TPETRA_DEBUG
3880 const size_t lclNumMeshRows = graph_.getNodeNumRows ();
3882 Kokkos::View<size_t*, device_type> diagOffsets (
"diagOffsets", lclNumMeshRows);
3883 graph_.getLocalDiagOffsets (diagOffsets);
3886 auto diagOffsetsHost = Kokkos::create_mirror_view (diagOffsets);
3889 diag.template modify<typename decltype (diagOffsetsHost)::memory_space> ();
3891 #ifdef HAVE_TPETRA_DEBUG
3892 TEUCHOS_TEST_FOR_EXCEPTION
3893 (this->need_sync_host (), std::runtime_error,
3894 prefix <<
"The matrix's data were last modified on device, but have "
3895 "not been sync'd to host. Please sync to host (by calling "
3896 "sync<Kokkos::HostSpace>() on this matrix) before calling this "
3898 #endif // HAVE_TPETRA_DEBUG
3900 auto vals_host_out = getValuesHost ();
3901 Scalar* vals_host_out_raw =
3902 reinterpret_cast<Scalar*
> (vals_host_out.data ());
3905 size_t rowOffset = 0;
3907 LO bs = getBlockSize();
3908 for(
size_t r=0; r<getNodeNumRows(); r++)
3911 offset = rowOffset + diagOffsetsHost(r)*bs*bs;
3912 for(
int b=0; b<bs; b++)
3917 rowOffset += getNumEntriesInLocalRow(r)*bs*bs;
3920 diag.template sync<memory_space> ();
3923 template<
class Scalar,
class LO,
class GO,
class Node>
3926 leftScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3928 TEUCHOS_TEST_FOR_EXCEPTION(
3929 true, std::logic_error,
"Tpetra::BlockCrsMatrix::leftScale: "
3930 "not implemented.");
3934 template<
class Scalar,
class LO,
class GO,
class Node>
3937 rightScale (const ::Tpetra::Vector<Scalar, LO, GO, Node>& )
3939 TEUCHOS_TEST_FOR_EXCEPTION(
3940 true, std::logic_error,
"Tpetra::BlockCrsMatrix::rightScale: "
3941 "not implemented.");
3945 template<
class Scalar,
class LO,
class GO,
class Node>
3946 Teuchos::RCP<const ::Tpetra::RowGraph<LO, GO, Node> >
3953 template<
class Scalar,
class LO,
class GO,
class Node>
3954 typename ::Tpetra::RowMatrix<Scalar, LO, GO, Node>::mag_type
3958 TEUCHOS_TEST_FOR_EXCEPTION(
3959 true, std::logic_error,
"Tpetra::BlockCrsMatrix::getFrobeniusNorm: "
3960 "not implemented.");
3970 #define TPETRA_BLOCKCRSMATRIX_INSTANT(S,LO,GO,NODE) \
3971 template class BlockCrsMatrix< S, LO, GO, NODE >;
3973 #endif // TPETRA_BLOCKCRSMATRIX_DEF_HPP