42 #ifndef TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP 43 #define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP 45 #include "TpetraCore_config.h" 46 #include "Teuchos_Array.hpp" 47 #include "Teuchos_ArrayView.hpp" 55 #include "Kokkos_Core.hpp" 82 #ifndef DOXYGEN_SHOULD_SKIP_THIS 85 #endif // DOXYGEN_SHOULD_SKIP_THIS 92 namespace UnpackAndCombineCrsMatrixImpl {
106 template<
class ST,
class LO,
class GO,
class DT,
class BDT>
111 const Kokkos::View<const char*, BDT>& imports,
113 const size_t num_bytes,
114 const size_t num_ent,
115 const size_t num_bytes_per_value)
121 bool unpack_pids = pids_out.size() > 0;
123 const size_t num_ent_beg = offset;
126 const size_t gids_beg = num_ent_beg + num_ent_len;
127 const size_t gids_len =
130 const size_t pids_beg = gids_beg + gids_len;
131 const size_t pids_len = unpack_pids ?
135 const size_t vals_beg = gids_beg + gids_len + pids_len;
136 const size_t vals_len = num_ent * num_bytes_per_value;
138 const char*
const num_ent_in = imports.data () + num_ent_beg;
139 const char*
const gids_in = imports.data () + gids_beg;
140 const char*
const pids_in = unpack_pids ? imports.data () + pids_beg : NULL;
141 const char*
const vals_in = imports.data () + vals_beg;
143 size_t num_bytes_out = 0;
146 if (static_cast<size_t> (num_ent_out) != num_ent) {
151 Kokkos::pair<int, size_t> p;
156 num_bytes_out += p.second;
163 num_bytes_out += p.second;
170 num_bytes_out += p.second;
173 const size_t expected_num_bytes = num_ent_len + gids_len + pids_len + vals_len;
174 if (num_bytes_out != expected_num_bytes) {
190 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
192 typedef LocalMatrix local_matrix_type;
195 typedef typename local_matrix_type::value_type ST;
196 typedef typename local_map_type::local_ordinal_type LO;
197 typedef typename local_map_type::global_ordinal_type GO;
198 typedef typename local_map_type::device_type DT;
199 typedef typename DT::execution_space XS;
201 typedef Kokkos::View<const size_t*, BufferDeviceType>
202 num_packets_per_lid_type;
203 typedef Kokkos::View<const size_t*, DT> offsets_type;
204 typedef Kokkos::View<const char*, BufferDeviceType> input_buffer_type;
205 typedef Kokkos::View<const LO*, DT> import_lids_type;
207 typedef Kokkos::View<LO*, DT> lids_scratch_type;
208 typedef Kokkos::View<GO*, DT> gids_scratch_type;
209 typedef Kokkos::View<int*,DT> pids_scratch_type;
210 typedef Kokkos::View<ST*, DT> vals_scratch_type;
212 typedef Kokkos::pair<int, LO> value_type;
214 static_assert (std::is_same<LO, typename local_matrix_type::ordinal_type>::value,
215 "LocalMap::local_ordinal_type and " 216 "LocalMatrix::ordinal_type must be the same.");
218 local_matrix_type local_matrix;
219 local_map_type local_col_map;
220 input_buffer_type imports;
221 num_packets_per_lid_type num_packets_per_lid;
222 import_lids_type import_lids;
223 offsets_type offsets;
227 size_t num_bytes_per_value;
229 Kokkos::Experimental::UniqueToken<XS, Kokkos::Experimental::UniqueTokenScope::Global> tokens;
230 lids_scratch_type lids_scratch;
231 gids_scratch_type gids_scratch;
232 pids_scratch_type pids_scratch;
233 vals_scratch_type vals_scratch;
236 const local_matrix_type& local_matrix_in,
237 const local_map_type& local_col_map_in,
238 const input_buffer_type& imports_in,
239 const num_packets_per_lid_type& num_packets_per_lid_in,
240 const import_lids_type& import_lids_in,
241 const offsets_type& offsets_in,
243 const size_t max_num_ent_in,
244 const bool unpack_pids_in,
245 const size_t num_bytes_per_value_in,
246 const bool atomic_in) :
247 local_matrix (local_matrix_in),
248 local_col_map (local_col_map_in),
249 imports (imports_in),
250 num_packets_per_lid (num_packets_per_lid_in),
251 import_lids (import_lids_in),
252 offsets (offsets_in),
253 combine_mode (combine_mode_in),
254 max_num_ent (max_num_ent_in),
255 unpack_pids (unpack_pids_in),
256 num_bytes_per_value (num_bytes_per_value_in),
259 lids_scratch (
"pids_scratch", tokens.size() * max_num_ent),
260 gids_scratch (
"gids_scratch", tokens.size() * max_num_ent),
261 pids_scratch (
"lids_scratch", tokens.size() * max_num_ent),
262 vals_scratch (
"vals_scratch", tokens.size() * max_num_ent)
265 KOKKOS_INLINE_FUNCTION
void init(value_type& dst)
const 267 using Tpetra::Details::OrdinalTraits;
268 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
271 KOKKOS_INLINE_FUNCTION
void 272 join (
volatile value_type& dst,
const volatile value_type& src)
const 278 using Tpetra::Details::OrdinalTraits;
279 if (src.second != OrdinalTraits<LO>::invalid ()) {
284 if (dst.second == OrdinalTraits<LO>::invalid () ||
285 src.second < dst.second) {
291 KOKKOS_INLINE_FUNCTION
292 void operator()(
const LO i, value_type& dst)
const 295 using Kokkos::subview;
296 using Kokkos::MemoryUnmanaged;
297 typedef typename XS::size_type size_type;
298 typedef typename Kokkos::pair<size_type, size_type> slice;
299 typedef BufferDeviceType BDT;
301 typedef View<LO*, DT, MemoryUnmanaged> lids_out_type;
302 typedef View<int*,DT, MemoryUnmanaged> pids_out_type;
303 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
304 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
306 const size_t num_bytes = num_packets_per_lid(i);
309 if (num_bytes == 0) {
314 const LO import_lid = import_lids[i];
315 const size_t buf_size = imports.size();
316 const size_t offset = offsets(i);
320 const char*
const in_buf = imports.data () + offset;
322 const size_t num_ent =
static_cast<size_t> (num_ent_LO);
325 size_t expected_num_bytes = 0;
335 if (expected_num_bytes > num_bytes) {
336 dst = Kokkos::make_pair (1, i);
340 if (offset > buf_size || offset + num_bytes > buf_size) {
341 dst = Kokkos::make_pair (2, i);
348 const size_type token = tokens.acquire();
349 const size_t a =
static_cast<size_t>(token) * max_num_ent;
350 const size_t b = a + num_ent;
351 lids_out_type lids_out = subview(lids_scratch, slice(a, b));
352 gids_out_type gids_out = subview(gids_scratch, slice(a, b));
353 pids_out_type pids_out = subview(pids_scratch, slice(a, (unpack_pids ? b : a)));
354 vals_out_type vals_out = subview(vals_scratch, slice(a, b));
358 unpackRow<ST,LO,GO,DT,BDT>(gids_out, pids_out, vals_out,
359 imports, offset, num_bytes,
360 num_ent, num_bytes_per_value);
361 if (unpack_err != 0) {
362 dst = Kokkos::make_pair (unpack_err, i);
363 tokens.release (token);
370 for (
size_t k = 0; k < num_ent; ++k) {
375 const LO*
const lids_raw =
const_cast<const LO*
> (lids_out.data ());
376 const ST*
const vals_raw =
const_cast<const ST*
> (vals_out.data ());
378 if (combine_mode ==
ADD) {
380 local_matrix.sumIntoValues (import_lid, lids_raw, num_ent,
381 vals_raw,
false, atomic);
383 else if (combine_mode ==
REPLACE) {
385 local_matrix.replaceValues (import_lid, lids_raw, num_ent,
386 vals_raw,
false, atomic);
389 dst = Kokkos::make_pair (4, i);
390 tokens.release (token);
394 tokens.release (token);
398 struct MaxNumEntTag {};
399 struct TotNumEntTag {};
409 template<
class LO,
class DT,
class BDT>
412 typedef Kokkos::View<const size_t*, BDT> num_packets_per_lid_type;
413 typedef Kokkos::View<const size_t*, DT> offsets_type;
414 typedef Kokkos::View<const char*, BDT> input_buffer_type;
417 typedef size_t value_type;
420 typedef Kokkos::pair<size_t,size_t> slice;
422 num_packets_per_lid_type num_packets_per_lid;
423 offsets_type offsets;
424 input_buffer_type imports;
428 const offsets_type& offsets_in,
429 const input_buffer_type& imports_in) :
430 num_packets_per_lid (num_packets_per_lid_in),
431 offsets (offsets_in),
435 KOKKOS_INLINE_FUNCTION
void 436 operator() (
const MaxNumEntTag,
const LO i, value_type& update)
const {
438 const size_t num_bytes = num_packets_per_lid(i);
441 const char*
const in_buf = imports.data () + offsets(i);
443 const size_t num_ent =
static_cast<size_t> (num_ent_LO);
445 update = (update < num_ent) ? num_ent : update;
449 KOKKOS_INLINE_FUNCTION
void 450 join (
const MaxNumEntTag,
451 volatile value_type& dst,
452 const volatile value_type& src)
const 454 if (dst < src) dst = src;
457 KOKKOS_INLINE_FUNCTION
void 458 operator() (
const TotNumEntTag,
const LO i, value_type& tot_num_ent)
const {
460 const size_t num_bytes = num_packets_per_lid(i);
463 const char*
const in_buf = imports.data () + offsets(i);
465 tot_num_ent +=
static_cast<size_t> (num_ent_LO);
477 template<
class LO,
class DT,
class BDT>
480 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
481 const Kokkos::View<const size_t*, DT>& offsets,
482 const Kokkos::View<const char*, BDT>& imports)
484 typedef typename DT::execution_space XS;
485 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>,
486 MaxNumEntTag> range_policy;
490 const LO numRowsToUnpack =
491 static_cast<LO
> (num_packets_per_lid.extent (0));
492 size_t max_num_ent = 0;
493 Kokkos::parallel_reduce (
"Max num entries in CRS",
494 range_policy (0, numRowsToUnpack),
495 functor, max_num_ent);
506 template<
class LO,
class DT,
class BDT>
509 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
510 const Kokkos::View<const size_t*, DT>& offsets,
511 const Kokkos::View<const char*, BDT>& imports)
513 typedef typename DT::execution_space XS;
514 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO>, TotNumEntTag> range_policy;
515 size_t tot_num_ent = 0;
518 const LO numRowsToUnpack =
519 static_cast<LO
> (num_packets_per_lid.extent (0));
520 Kokkos::parallel_reduce (
"Total num entries in CRS to unpack",
521 range_policy (0, numRowsToUnpack),
522 functor, tot_num_ent);
533 template<
class LocalMatrix,
class LocalMap,
class BufferDeviceType>
536 const LocalMatrix& local_matrix,
538 const Kokkos::View<const char*, BufferDeviceType>& imports,
539 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
542 const bool unpack_pids,
545 typedef typename LocalMatrix::value_type ST;
546 typedef typename LocalMap::local_ordinal_type LO;
547 typedef typename LocalMap::device_type DT;
548 typedef typename DT::execution_space XS;
549 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO> > range_policy;
552 const char prefix[] =
553 "Tpetra::Details::UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix: ";
555 const size_t num_import_lids =
static_cast<size_t>(import_lids.extent(0));
556 if (num_import_lids == 0) {
563 TEUCHOS_TEST_FOR_EXCEPTION(combine_mode ==
ABSMAX,
564 std::invalid_argument,
565 prefix <<
"ABSMAX combine mode is not yet implemented for a matrix that has a " 566 "static graph (i.e., was constructed with the CrsMatrix constructor " 567 "that takes a const CrsGraph pointer).");
569 TEUCHOS_TEST_FOR_EXCEPTION(combine_mode ==
INSERT,
570 std::invalid_argument,
571 prefix <<
"INSERT combine mode is not allowed if the matrix has a static graph " 572 "(i.e., was constructed with the CrsMatrix constructor that takes a " 573 "const CrsGraph pointer).");
576 TEUCHOS_TEST_FOR_EXCEPTION(!(combine_mode ==
ADD || combine_mode ==
REPLACE),
577 std::invalid_argument,
578 prefix <<
"Invalid combine mode; should never get " 579 "here! Please report this bug to the Tpetra developers.");
582 bool bad_num_import_lids =
583 num_import_lids !=
static_cast<size_t>(num_packets_per_lid.extent(0));
584 TEUCHOS_TEST_FOR_EXCEPTION(bad_num_import_lids,
585 std::invalid_argument,
586 prefix <<
"importLIDs.size() (" << num_import_lids <<
") != " 587 "numPacketsPerLID.size() (" << num_packets_per_lid.extent(0) <<
").");
591 Kokkos::View<size_t*, DT> offsets(
"offsets", num_import_lids+1);
597 size_t max_num_ent = compute_maximum_num_entries<LO,DT>(
598 num_packets_per_lid, offsets, imports);
605 unpack_functor_type f(local_matrix, local_map,
606 imports, num_packets_per_lid, import_lids, offsets, combine_mode,
607 max_num_ent, unpack_pids, num_bytes_per_value, atomic);
609 typename unpack_functor_type::value_type x;
610 Kokkos::parallel_reduce(range_policy(0, static_cast<LO>(num_import_lids)), f, x);
611 auto x_h = x.to_std_pair();
612 TEUCHOS_TEST_FOR_EXCEPTION(x_h.first != 0, std::runtime_error,
613 prefix <<
"UnpackCrsMatrixAndCombineFunctor reported error code " 614 << x_h.first <<
" for the first bad row " << x_h.second);
619 template<
class LocalMatrix,
class BufferDeviceType>
622 const LocalMatrix& local_matrix,
624 const Kokkos::View<const char*, BufferDeviceType>& imports,
625 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
626 const size_t num_same_ids)
628 using Kokkos::parallel_reduce;
629 typedef typename LocalMatrix::ordinal_type LO;
630 typedef typename LocalMatrix::device_type device_type;
631 typedef typename device_type::execution_space XS;
632 typedef typename Kokkos::View<LO*, device_type>::size_type size_type;
633 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<LO> > range_policy;
634 typedef BufferDeviceType BDT;
640 num_items =
static_cast<LO
>(num_same_ids);
643 parallel_reduce(range_policy(0, num_items),
644 KOKKOS_LAMBDA(
const LO lid,
size_t& update) {
645 update +=
static_cast<size_t>(local_matrix.graph.row_map[lid+1]
646 -local_matrix.graph.row_map[lid]);
652 num_items =
static_cast<LO
>(permute_from_lids.extent(0));
655 parallel_reduce(range_policy(0, num_items),
656 KOKKOS_LAMBDA(
const LO i,
size_t& update) {
657 const LO lid = permute_from_lids(i);
658 update +=
static_cast<size_t> (local_matrix.graph.row_map[lid+1]
659 - local_matrix.graph.row_map[lid]);
666 const size_type np = num_packets_per_lid.extent(0);
667 Kokkos::View<size_t*, device_type> offsets(
"offsets", np+1);
670 compute_total_num_entries<LO, device_type, BDT> (num_packets_per_lid,
677 template<
class LO,
class DT,
class BDT>
678 KOKKOS_INLINE_FUNCTION
680 unpackRowCount(
const Kokkos::View<const char*, BDT>& imports,
682 const size_t num_bytes)
687 if (p_num_bytes > num_bytes) {
688 return OrdinalTraits<size_t>::invalid();
690 const char*
const in_buf = imports.data () + offset;
693 return static_cast<size_t>(num_ent_LO);
697 template<
class LO,
class DT,
class BDT>
699 setupRowPointersForRemotes(
702 const Kokkos::View<const char*, BDT>& imports,
703 const Kokkos::View<const size_t*, BDT>& num_packets_per_lid,
706 using Kokkos::parallel_reduce;
707 typedef typename DT::execution_space XS;
709 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
711 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
712 const size_type N = num_packets_per_lid.extent(0);
715 parallel_reduce (
"Setup row pointers for remotes",
717 KOKKOS_LAMBDA (
const size_t i,
int& k_error) {
718 typedef typename std::remove_reference< decltype( tgt_rowptr(0) ) >::type atomic_incr_type;
719 const size_t num_bytes = num_packets_per_lid(i);
720 const size_t offset = offsets(i);
721 const size_t num_ent = unpackRowCount<LO, DT, BDT> (imports, offset, num_bytes);
722 if (num_ent == InvalidNum) {
725 Kokkos::atomic_fetch_add (&tgt_rowptr (import_lids(i)), atomic_incr_type(num_ent));
733 makeCrsRowPtrFromLengths(
735 const Kokkos::View<size_t*,DT>& new_start_row)
737 using Kokkos::parallel_scan;
738 typedef typename DT::execution_space XS;
739 typedef typename Kokkos::View<size_t*,DT>::size_type size_type;
740 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
741 const size_type N = new_start_row.extent(0);
742 parallel_scan(range_policy(0, N),
743 KOKKOS_LAMBDA(
const size_t& i,
size_t& update,
const bool&
final) {
744 auto cur_val = tgt_rowptr(i);
746 tgt_rowptr(i) = update;
747 new_start_row(i) = tgt_rowptr(i);
754 template<
class LocalMatrix,
class LocalMap>
760 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
763 const LocalMatrix& local_matrix,
765 const size_t num_same_ids,
768 using Kokkos::parallel_for;
769 typedef typename LocalMap::device_type DT;
770 typedef typename LocalMap::local_ordinal_type LO;
771 typedef typename DT::execution_space XS;
772 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t> > range_policy;
774 parallel_for(range_policy(0, num_same_ids),
775 KOKKOS_LAMBDA(
const size_t i) {
776 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
778 const LO src_lid =
static_cast<LO
>(i);
779 size_t src_row = local_matrix.graph.row_map(src_lid);
781 const LO tgt_lid =
static_cast<LO
>(i);
782 const size_t tgt_row = tgt_rowptr(tgt_lid);
784 const size_t nsr = local_matrix.graph.row_map(src_lid+1)
785 - local_matrix.graph.row_map(src_lid);
786 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
788 for (
size_t j=local_matrix.graph.row_map(src_lid);
789 j<local_matrix.graph.row_map(src_lid+1); ++j) {
790 LO src_col = local_matrix.graph.entries(j);
791 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
792 tgt_colind(tgt_row + j - src_row) = local_col_map.
getGlobalElement(src_col);
793 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
799 template<
class LocalMatrix,
class LocalMap>
801 copyDataFromPermuteIDs(
805 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
810 const LocalMatrix& local_matrix,
814 using Kokkos::parallel_for;
815 typedef typename LocalMap::device_type DT;
816 typedef typename LocalMap::local_ordinal_type LO;
817 typedef typename DT::execution_space XS;
819 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
821 const size_type num_permute_to_lids = permute_to_lids.extent(0);
823 parallel_for(range_policy(0, num_permute_to_lids),
824 KOKKOS_LAMBDA(
const size_t i) {
825 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
827 const LO src_lid = permute_from_lids(i);
828 const size_t src_row = local_matrix.graph.row_map(src_lid);
830 const LO tgt_lid = permute_to_lids(i);
831 const size_t tgt_row = tgt_rowptr(tgt_lid);
833 size_t nsr = local_matrix.graph.row_map(src_lid+1)
834 - local_matrix.graph.row_map(src_lid);
835 Kokkos::atomic_fetch_add(&new_start_row(tgt_lid), atomic_incr_type(nsr));
837 for (
size_t j=local_matrix.graph.row_map(src_lid);
838 j<local_matrix.graph.row_map(src_lid+1); ++j) {
839 LO src_col = local_matrix.graph.entries(j);
840 tgt_vals(tgt_row + j - src_row) = local_matrix.values(j);
841 tgt_colind(tgt_row + j - src_row) = local_col_map.
getGlobalElement(src_col);
842 tgt_pids(tgt_row + j - src_row) = (src_pids(src_col) != my_pid) ? src_pids(src_col) : -1;
848 template<
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
850 unpackAndCombineIntoCrsArrays2(
854 const Kokkos::View<size_t*,typename LocalMap::device_type>& new_start_row,
857 const Kokkos::View<const char*, BufferDeviceType>& imports,
858 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
859 const LocalMatrix& local_matrix,
862 const size_t num_bytes_per_value)
865 using Kokkos::subview;
866 using Kokkos::MemoryUnmanaged;
867 using Kokkos::parallel_reduce;
868 using Kokkos::atomic_fetch_add;
870 typedef typename LocalMap::device_type DT;
871 typedef typename LocalMap::local_ordinal_type LO;
872 typedef typename LocalMap::global_ordinal_type GO;
873 typedef typename LocalMatrix::value_type ST;
874 typedef typename DT::execution_space XS;
875 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
876 typedef typename Kokkos::pair<size_type, size_type> slice;
877 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_type> > range_policy;
878 typedef BufferDeviceType BDT;
880 typedef View<int*,DT, MemoryUnmanaged> pids_out_type;
881 typedef View<GO*, DT, MemoryUnmanaged> gids_out_type;
882 typedef View<ST*, DT, MemoryUnmanaged> vals_out_type;
884 const size_t InvalidNum = OrdinalTraits<size_t>::invalid();
887 const size_type num_import_lids = import_lids.size();
890 parallel_reduce (
"Unpack and combine into CRS",
891 range_policy (0, num_import_lids),
892 KOKKOS_LAMBDA (
const size_t i,
int& k_error) {
893 typedef typename std::remove_reference< decltype( new_start_row(0) ) >::type atomic_incr_type;
894 const size_t num_bytes = num_packets_per_lid(i);
895 const size_t offset = offsets(i);
896 if (num_bytes == 0) {
900 size_t num_ent = unpackRowCount<LO,DT,BDT>(imports, offset, num_bytes);
901 if (num_ent == InvalidNum) {
905 const LO lcl_row = import_lids(i);
906 const size_t start_row = atomic_fetch_add(&new_start_row(lcl_row), atomic_incr_type(num_ent));
907 const size_t end_row = start_row + num_ent;
909 gids_out_type gids_out = subview(tgt_colind, slice(start_row, end_row));
910 vals_out_type vals_out = subview(tgt_vals, slice(start_row, end_row));
911 pids_out_type pids_out = subview(tgt_pids, slice(start_row, end_row));
913 k_error += unpackRow<ST,LO,GO,DT,BDT>(gids_out, pids_out, vals_out,
914 imports, offset, num_bytes,
915 num_ent, num_bytes_per_value);
918 for (
size_t j = 0; j < static_cast<size_t>(num_ent); ++j) {
919 const int pid = pids_out(j);
920 pids_out(j) = (pid != my_pid) ? pid : -1;
927 template<
typename LocalMatrix,
typename LocalMap,
typename BufferDeviceType>
930 const LocalMatrix & local_matrix,
933 const Kokkos::View<const char*, BufferDeviceType>& imports,
934 const Kokkos::View<const size_t*, BufferDeviceType>& num_packets_per_lid,
942 const size_t num_same_ids,
943 const size_t tgt_num_rows,
944 const size_t tgt_num_nonzeros,
945 const int my_tgt_pid,
946 const size_t num_bytes_per_value)
949 using Kokkos::subview;
950 using Kokkos::parallel_for;
951 using Kokkos::MemoryUnmanaged;
953 typedef typename LocalMap::device_type DT;
954 typedef typename LocalMap::local_ordinal_type LO;
955 typedef typename DT::execution_space XS;
956 typedef typename Kokkos::View<LO*, DT>::size_type size_type;
957 typedef Kokkos::RangePolicy<XS, Kokkos::IndexType<size_t> > range_policy;
958 typedef BufferDeviceType BDT;
960 const char prefix[] =
"unpackAndCombineIntoCrsArrays: ";
962 const size_t N = tgt_num_rows;
963 const size_t mynnz = tgt_num_nonzeros;
967 const int my_pid = my_tgt_pid;
970 parallel_for(range_policy(0, N+1),
971 KOKKOS_LAMBDA(
const size_t i) {
977 parallel_for(range_policy(0, num_same_ids),
978 KOKKOS_LAMBDA(
const size_t i) {
979 const LO tgt_lid =
static_cast<LO
>(i);
980 const LO src_lid =
static_cast<LO
>(i);
981 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid+1)
982 - local_matrix.graph.row_map(src_lid);
987 const size_type num_permute_to_lids = permute_to_lids.extent(0);
988 parallel_for(range_policy(0, num_permute_to_lids),
989 KOKKOS_LAMBDA(
const size_t i) {
990 const LO tgt_lid = permute_to_lids(i);
991 const LO src_lid = permute_from_lids(i);
992 tgt_rowptr(tgt_lid) = local_matrix.graph.row_map(src_lid+1)
993 - local_matrix.graph.row_map(src_lid);
998 const size_type num_import_lids = import_lids.extent(0);
999 View<size_t*, DT> offsets(
"offsets", num_import_lids+1);
1002 #ifdef HAVE_TPETRA_DEBUG 1004 auto nth_offset_h = getEntryOnHost(offsets, num_import_lids);
1005 const bool condition =
1006 nth_offset_h !=
static_cast<size_t>(imports.extent (0));
1007 TEUCHOS_TEST_FOR_EXCEPTION
1008 (condition, std::logic_error, prefix
1009 <<
"The final offset in bytes " << nth_offset_h
1010 <<
" != imports.size() = " << imports.extent(0)
1011 <<
". Please report this bug to the Tpetra developers.");
1013 #endif // HAVE_TPETRA_DEBUG 1017 setupRowPointersForRemotes<LO,DT,BDT>(tgt_rowptr,
1018 import_lids, imports, num_packets_per_lid, offsets);
1019 TEUCHOS_TEST_FOR_EXCEPTION(k_error != 0, std::logic_error, prefix
1020 <<
" Error transferring data to target row pointers. " 1021 "Please report this bug to the Tpetra developers.");
1025 View<size_t*, DT> new_start_row (
"new_start_row", N+1);
1028 makeCrsRowPtrFromLengths(tgt_rowptr, new_start_row);
1030 auto nth_tgt_rowptr_h = getEntryOnHost(tgt_rowptr, N);
1031 bool condition = nth_tgt_rowptr_h != mynnz;
1032 TEUCHOS_TEST_FOR_EXCEPTION(condition, std::invalid_argument,
1033 prefix <<
"CRS_rowptr[last] = " <<
1034 nth_tgt_rowptr_h <<
"!= mynnz = " << mynnz <<
".");
1038 copyDataFromSameIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1039 tgt_rowptr, src_pids, local_matrix, local_col_map, num_same_ids, my_pid);
1041 copyDataFromPermuteIDs(tgt_colind, tgt_pids, tgt_vals, new_start_row,
1042 tgt_rowptr, src_pids, permute_to_lids, permute_from_lids,
1043 local_matrix, local_col_map, my_pid);
1045 if (imports.extent(0) <= 0) {
1049 int unpack_err = unpackAndCombineIntoCrsArrays2(tgt_colind, tgt_pids,
1050 tgt_vals, new_start_row, offsets, import_lids, imports, num_packets_per_lid,
1051 local_matrix, local_col_map, my_pid, num_bytes_per_value);
1052 TEUCHOS_TEST_FOR_EXCEPTION(
1053 unpack_err != 0, std::logic_error, prefix <<
"unpack loop failed. This " 1054 "should never happen. Please report this bug to the Tpetra developers.");
1100 template<
typename ST,
typename LO,
typename GO,
typename Node>
1104 const Teuchos::ArrayView<const char>& imports,
1105 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1106 const Teuchos::ArrayView<const LO>& importLIDs,
1107 size_t constantNumPackets,
1113 typedef typename Node::device_type device_type;
1115 static_assert (std::is_same<device_type, typename local_matrix_type::device_type>::value,
1116 "Node::device_type and LocalMatrix::device_type must be the same.");
1119 typedef typename device_type::execution_space XS;
1122 typename XS::device_type outputDevice;
1127 auto num_packets_per_lid_d =
1129 numPacketsPerLID.size(),
true,
"num_packets_per_lid");
1131 auto import_lids_d =
1133 importLIDs.size(),
true,
"import_lids");
1137 imports.size(),
true,
"imports");
1140 auto local_col_map = sourceMatrix.
getColMap()->getLocalMap();
1143 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix(
1144 local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1145 import_lids_d, combineMode,
false, atomic);
1150 template<
typename ST,
typename LO,
typename GO,
typename NT>
1155 const Kokkos::DualView<const LO*, typename NT::device_type>& importLIDs,
1156 const size_t constantNumPackets,
1163 typedef typename NT::device_type device_type;
1165 typedef typename crs_matrix_type::local_matrix_type local_matrix_type;
1167 typedef typename dist_object_type::buffer_device_type buffer_device_type;
1168 typedef typename buffer_device_type::memory_space BMS;
1169 typedef typename device_type::memory_space MS;
1171 static_assert (std::is_same<device_type,
1172 typename local_matrix_type::device_type>::value,
1173 "NT::device_type and LocalMatrix::device_type must be " 1178 numPacketsPerLID_nc.template sync<BMS> ();
1180 auto num_packets_per_lid_d = numPacketsPerLID.template view<BMS> ();
1184 importLIDs_nc.template sync<MS> ();
1186 auto import_lids_d = importLIDs.template view<MS> ();
1190 imports_nc.template sync<BMS> ();
1192 auto imports_d = imports.template view<BMS> ();
1195 auto local_col_map = sourceMatrix.
getColMap ()->getLocalMap ();
1196 typedef decltype (local_col_map) local_map_type;
1199 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsMatrix<
1203 > (local_matrix, local_col_map, imports_d, num_packets_per_lid_d,
1204 import_lids_d, combineMode,
false, atomic);
1262 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1266 const Teuchos::ArrayView<const LocalOrdinal> &importLIDs,
1267 const Teuchos::ArrayView<const char> &imports,
1268 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1269 size_t constantNumPackets,
1273 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
1274 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs)
1276 using Kokkos::MemoryUnmanaged;
1278 typedef typename Node::device_type DT;
1280 const char prefix[] =
"unpackAndCombineWithOwningPIDsCount: ";
1282 TEUCHOS_TEST_FOR_EXCEPTION
1283 (permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
1284 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size () <<
" != " 1285 "permuteFromLIDs.size() = " << permuteFromLIDs.size() <<
".");
1289 TEUCHOS_TEST_FOR_EXCEPTION
1290 (! locallyIndexed, std::invalid_argument, prefix <<
"The input " 1291 "CrsMatrix 'sourceMatrix' must be locally indexed.");
1292 TEUCHOS_TEST_FOR_EXCEPTION
1293 (importLIDs.size () != numPacketsPerLID.size (), std::invalid_argument,
1294 prefix <<
"importLIDs.size() = " << importLIDs.size () <<
" != " 1295 "numPacketsPerLID.size() = " << numPacketsPerLID.size () <<
".");
1298 auto permute_from_lids_d =
1300 permuteFromLIDs.getRawPtr (),
1301 permuteFromLIDs.size (),
true,
1302 "permute_from_lids");
1305 imports.getRawPtr (),
1306 imports.size (),
true,
1308 auto num_packets_per_lid_d =
1310 numPacketsPerLID.getRawPtr (),
1311 numPacketsPerLID.size (),
true,
1312 "num_packets_per_lid");
1314 return UnpackAndCombineCrsMatrixImpl::unpackAndCombineWithOwningPIDsCount(
1315 local_matrix, permute_from_lids_d, imports_d,
1316 num_packets_per_lid_d, numSameIDs);
1333 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
1337 const Teuchos::ArrayView<const LocalOrdinal>& importLIDs,
1338 const Teuchos::ArrayView<const char>& imports,
1339 const Teuchos::ArrayView<const size_t>& numPacketsPerLID,
1340 const size_t constantNumPackets,
1343 const size_t numSameIDs,
1344 const Teuchos::ArrayView<const LocalOrdinal>& permuteToLIDs,
1345 const Teuchos::ArrayView<const LocalOrdinal>& permuteFromLIDs,
1346 size_t TargetNumRows,
1347 size_t TargetNumNonzeros,
1348 const int MyTargetPID,
1349 const Teuchos::ArrayView<size_t>& CRS_rowptr,
1350 const Teuchos::ArrayView<GlobalOrdinal>& CRS_colind,
1352 const Teuchos::ArrayView<const int>& SourcePids,
1353 Teuchos::Array<int>& TargetPids)
1360 using Teuchos::ArrayView;
1361 using Teuchos::outArg;
1362 using Teuchos::REDUCE_MAX;
1363 using Teuchos::reduceAll;
1365 typedef LocalOrdinal LO;
1367 typedef typename Node::device_type DT;
1368 typedef typename DT::execution_space XS;
1371 typedef typename matrix_type::impl_scalar_type ST;
1372 typedef typename ArrayView<const LO>::size_type size_type;
1374 const char prefix[] =
"Tpetra::Details::unpackAndCombineIntoCrsArrays: ";
1376 TEUCHOS_TEST_FOR_EXCEPTION(
1377 TargetNumRows + 1 != static_cast<size_t> (CRS_rowptr.size ()),
1378 std::invalid_argument, prefix <<
"CRS_rowptr.size() = " <<
1379 CRS_rowptr.size () <<
"!= TargetNumRows+1 = " << TargetNumRows+1 <<
".");
1381 TEUCHOS_TEST_FOR_EXCEPTION(
1382 permuteToLIDs.size () != permuteFromLIDs.size (), std::invalid_argument,
1383 prefix <<
"permuteToLIDs.size() = " << permuteToLIDs.size ()
1384 <<
"!= permuteFromLIDs.size() = " << permuteFromLIDs.size () <<
".");
1385 const size_type numImportLIDs = importLIDs.size ();
1387 TEUCHOS_TEST_FOR_EXCEPTION(
1388 numImportLIDs != numPacketsPerLID.size (), std::invalid_argument,
1389 prefix <<
"importLIDs.size() = " << numImportLIDs <<
" != " 1390 "numPacketsPerLID.size() = " << numPacketsPerLID.size() <<
".");
1393 if (static_cast<size_t> (TargetPids.size ()) != TargetNumNonzeros) {
1394 TargetPids.resize (TargetNumNonzeros);
1396 TargetPids.assign (TargetNumNonzeros, -1);
1400 auto local_col_map = sourceMatrix.
getColMap()->getLocalMap();
1403 typename XS::device_type outputDevice;
1404 auto import_lids_d =
1406 importLIDs.size(),
true,
"import_lids");
1410 imports.size(),
true,
"imports");
1412 auto num_packets_per_lid_d =
1414 numPacketsPerLID.size(),
true,
"num_packets_per_lid");
1416 auto permute_from_lids_d =
1418 permuteFromLIDs.size(),
true,
"permute_from_lids");
1420 auto permute_to_lids_d =
1422 permuteToLIDs.size(),
true,
"permute_to_lids");
1426 CRS_rowptr.size(),
true,
"crs_rowptr");
1430 CRS_colind.size(),
true,
"crs_colidx");
1432 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE 1433 static_assert (! std::is_same<
1434 typename std::remove_const<
1435 typename std::decay<
1439 std::complex<double> >::value,
1440 "CRS_vals::value_type is std::complex<double>; this should never happen" 1441 ", since std::complex does not work in Kokkos::View objects.");
1442 #endif // HAVE_TPETRA_INST_COMPLEX_DOUBLE 1446 CRS_vals.size(),
true,
"crs_vals");
1448 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE 1449 static_assert (! std::is_same<
1450 typename decltype (crs_vals_d)::non_const_value_type,
1451 std::complex<double> >::value,
1452 "crs_vals_d::non_const_value_type is std::complex<double>; this should " 1453 "never happen, since std::complex does not work in Kokkos::View objects.");
1454 #endif // HAVE_TPETRA_INST_COMPLEX_DOUBLE 1458 SourcePids.size(),
true,
"src_pids");
1462 TargetPids.size(),
true,
"tgt_pids");
1464 size_t num_bytes_per_value = 0;
1478 size_t num_bytes_per_value_l = 0;
1479 if (local_matrix.values.extent(0) > 0) {
1480 const ST& val = local_matrix.values(0);
1483 const ST& val = crs_vals_d(0);
1486 Teuchos::reduceAll<int, size_t>(*(sourceMatrix.
getComm()),
1487 Teuchos::REDUCE_MAX,
1488 num_bytes_per_value_l,
1489 outArg(num_bytes_per_value));
1492 #ifdef HAVE_TPETRA_INST_COMPLEX_DOUBLE 1493 static_assert (! std::is_same<
1494 typename decltype (crs_vals_d)::non_const_value_type,
1495 std::complex<double> >::value,
1496 "crs_vals_d::non_const_value_type is std::complex<double>; this should " 1497 "never happen, since std::complex does not work in Kokkos::View objects.");
1498 #endif // HAVE_TPETRA_INST_COMPLEX_DOUBLE 1500 UnpackAndCombineCrsMatrixImpl::unpackAndCombineIntoCrsArrays(
1501 local_matrix, local_col_map, import_lids_d, imports_d,
1502 num_packets_per_lid_d, permute_to_lids_d, permute_from_lids_d,
1503 crs_rowptr_d, crs_colind_d, crs_vals_d, src_pids_d, tgt_pids_d,
1504 numSameIDs, TargetNumRows, TargetNumNonzeros, MyTargetPID,
1505 num_bytes_per_value);
1508 typename decltype(crs_rowptr_d)::HostMirror crs_rowptr_h(
1509 CRS_rowptr.getRawPtr(), CRS_rowptr.size());
1512 typename decltype(crs_colind_d)::HostMirror crs_colind_h(
1513 CRS_colind.getRawPtr(), CRS_colind.size());
1516 typename decltype(crs_vals_d)::HostMirror crs_vals_h(
1517 CRS_vals.getRawPtr(), CRS_vals.size());
1520 typename decltype(tgt_pids_d)::HostMirror tgt_pids_h(
1521 TargetPids.getRawPtr(), TargetPids.size());
1529 #define TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_INSTANT( ST, LO, GO, NT ) \ 1531 Details::unpackCrsMatrixAndCombine<ST, LO, GO, NT> ( \ 1532 const CrsMatrix<ST, LO, GO, NT>&, \ 1533 const Teuchos::ArrayView<const char>&, \ 1534 const Teuchos::ArrayView<const size_t>&, \ 1535 const Teuchos::ArrayView<const LO>&, \ 1541 Details::unpackCrsMatrixAndCombineNew<ST, LO, GO, NT> ( \ 1542 const CrsMatrix<ST, LO, GO, NT>&, \ 1543 const Kokkos::DualView<const char*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \ 1544 const Kokkos::DualView<const size_t*, typename DistObject<char, LO, GO, NT>::buffer_device_type>&, \ 1545 const Kokkos::DualView<const LO*, NT::device_type>&, \ 1548 const CombineMode, \ 1551 Details::unpackAndCombineIntoCrsArrays<ST, LO, GO, NT> ( \ 1552 const CrsMatrix<ST, LO, GO, NT> &, \ 1553 const Teuchos::ArrayView<const LO>&, \ 1554 const Teuchos::ArrayView<const char>&, \ 1555 const Teuchos::ArrayView<const size_t>&, \ 1558 const CombineMode, \ 1560 const Teuchos::ArrayView<const LO>&, \ 1561 const Teuchos::ArrayView<const LO>&, \ 1565 const Teuchos::ArrayView<size_t>&, \ 1566 const Teuchos::ArrayView<GO>&, \ 1567 const Teuchos::ArrayView<CrsMatrix<ST, LO, GO, NT>::impl_scalar_type>&, \ 1568 const Teuchos::ArrayView<const int>&, \ 1569 Teuchos::Array<int>&); \ 1571 Details::unpackAndCombineWithOwningPIDsCount<ST, LO, GO, NT> ( \ 1572 const CrsMatrix<ST, LO, GO, NT> &, \ 1573 const Teuchos::ArrayView<const LO> &, \ 1574 const Teuchos::ArrayView<const char> &, \ 1575 const Teuchos::ArrayView<const size_t>&, \ 1580 const Teuchos::ArrayView<const LO>&, \ 1581 const Teuchos::ArrayView<const LO>&); 1583 #endif // TPETRA_DETAILS_UNPACKCRSMATRIXANDCOMBINE_DEF_HPP Base class for distributed Tpetra objects that support data redistribution.
Kokkos::parallel_reduce functor to determine the number of entries (to unpack) in a KokkosSparse::Crs...
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
void unpackAndCombineIntoCrsMatrix(const LocalMatrix &local_matrix, const LocalMap &local_map, const Kokkos::View< const char *, BufferDeviceType > &imports, const Kokkos::View< const size_t *, BufferDeviceType > &num_packets_per_lid, const typename PackTraits< typename LocalMap::local_ordinal_type, typename LocalMap::device_type >::input_array_type import_lids, const Tpetra::CombineMode combine_mode, const bool unpack_pids, const bool atomic)
Perform the unpack operation for the matrix.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
static KOKKOS_INLINE_FUNCTION size_t packValueCount(const T &)
Number of bytes required to pack or unpack the given value of type value_type.
KokkosSparse::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
KOKKOS_INLINE_FUNCTION LocalOrdinal getLocalElement(const GlobalOrdinal globalIndex) const
Get the local index corresponding to the given global index.
Kokkos::Details::ArithTraits< Scalar >::val_type impl_scalar_type
The type used internally in place of Scalar.
Declaration of the Tpetra::CrsMatrix class.
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks...
KOKKOS_INLINE_FUNCTION GlobalOrdinal getGlobalElement(const LocalOrdinal localIndex) const
Get the global index corresponding to the given local index.
Implementation details of Tpetra.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
Insert new values that don't currently exist.
static KOKKOS_INLINE_FUNCTION size_t unpackValue(T &outVal, const char inBuf[])
Unpack the given value from the given output buffer.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
static KOKKOS_INLINE_FUNCTION Kokkos::pair< int, size_t > unpackArray(value_type outBuf[], const char inBuf[], const size_t numEnt)
Unpack numEnt value_type entries from the given input buffer of bytes, to the given output buffer of ...
Sets up and executes a communication plan for a Tpetra DistObject.
CombineMode
Rule for combining data in an Import or Export.
Sum new values into existing values.
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Replace old value with maximum of magnitudes of old and new values.
Replace existing values with new values.
"Local" part of Map suitable for Kokkos kernels.
size_t compute_maximum_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Maximum number of entries in any row of the packed matrix.
Kokkos::View< value_type *, D, Kokkos::MemoryUnmanaged > output_array_type
The type of an output array of value_type.
size_t compute_total_num_entries(const Kokkos::View< const size_t *, BDT > &num_packets_per_lid, const Kokkos::View< const size_t *, DT > &offsets, const Kokkos::View< const char *, BDT > &imports)
Total number of entries in any row of the packed matrix.
void unpackCrsMatrixAndCombine(const CrsMatrix< ST, LO, GO, NT > &sourceMatrix, const Teuchos::ArrayView< const char > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &importLIDs, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, const bool atomic)
Unpack the imported column indices and values, and combine into matrix.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Unpacks and combines a single row of the CrsMatrix.
local_matrix_type getLocalMatrix() const
The local sparse matrix.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...