42 #ifndef TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP 43 #define TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP 45 #include "TpetraCore_config.h" 46 #include "Teuchos_Array.hpp" 47 #include "Teuchos_ArrayView.hpp" 81 #ifndef DOXYGEN_SHOULD_SKIP_THIS 84 #endif // DOXYGEN_SHOULD_SKIP_THIS 91 namespace PackCrsGraphImpl {
99 template<
class OutputOffsetsViewType,
100 class CountsViewType,
101 class InputOffsetsViewType,
102 class InputLocalRowIndicesViewType,
103 class InputLocalRowPidsViewType,
105 #ifdef HAVE_TPETRA_DEBUG 109 #endif // HAVE_TPETRA_DEBUG 113 typedef typename OutputOffsetsViewType::non_const_value_type output_offset_type;
114 typedef typename CountsViewType::non_const_value_type count_type;
115 typedef typename InputOffsetsViewType::non_const_value_type input_offset_type;
116 typedef typename InputLocalRowIndicesViewType::non_const_value_type local_row_index_type;
117 typedef typename InputLocalRowPidsViewType::non_const_value_type local_row_pid_type;
119 typedef typename OutputOffsetsViewType::device_type device_type;
120 static_assert (std::is_same<
typename CountsViewType::device_type::execution_space,
121 typename device_type::execution_space>::value,
122 "OutputOffsetsViewType and CountsViewType must have the same execution space.");
123 static_assert (Kokkos::Impl::is_view<OutputOffsetsViewType>::value,
124 "OutputOffsetsViewType must be a Kokkos::View.");
125 static_assert (std::is_same<typename OutputOffsetsViewType::value_type, output_offset_type>::value,
126 "OutputOffsetsViewType must be a nonconst Kokkos::View.");
127 static_assert (std::is_integral<output_offset_type>::value,
128 "The type of each entry of OutputOffsetsViewType must be a built-in integer type.");
129 static_assert (Kokkos::Impl::is_view<CountsViewType>::value,
130 "CountsViewType must be a Kokkos::View.");
131 static_assert (std::is_same<typename CountsViewType::value_type, output_offset_type>::value,
132 "CountsViewType must be a nonconst Kokkos::View.");
133 static_assert (std::is_integral<count_type>::value,
134 "The type of each entry of CountsViewType must be a built-in integer type.");
135 static_assert (Kokkos::Impl::is_view<InputOffsetsViewType>::value,
136 "InputOffsetsViewType must be a Kokkos::View.");
137 static_assert (std::is_integral<input_offset_type>::value,
138 "The type of each entry of InputOffsetsViewType must be a built-in integer type.");
139 static_assert (Kokkos::Impl::is_view<InputLocalRowIndicesViewType>::value,
140 "InputLocalRowIndicesViewType must be a Kokkos::View.");
141 static_assert (std::is_integral<local_row_index_type>::value,
142 "The type of each entry of InputLocalRowIndicesViewType must be a built-in integer type.");
145 const CountsViewType& counts,
146 const InputOffsetsViewType& rowOffsets,
147 const InputLocalRowIndicesViewType& lclRowInds,
148 const InputLocalRowPidsViewType& lclRowPids) :
149 outputOffsets_ (outputOffsets),
151 rowOffsets_ (rowOffsets),
152 lclRowInds_ (lclRowInds),
153 lclRowPids_ (lclRowPids),
157 const size_t numRowsToPack =
static_cast<size_t> (lclRowInds_.extent (0));
159 if (numRowsToPack != static_cast<size_t> (counts_.extent (0))) {
160 std::ostringstream os;
161 os <<
"lclRowInds.extent(0) = " << numRowsToPack
162 <<
" != counts.extent(0) = " << counts_.extent (0)
164 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
166 if (static_cast<size_t> (numRowsToPack + 1) !=
167 static_cast<size_t> (outputOffsets_.extent (0))) {
168 std::ostringstream os;
169 os <<
"lclRowInds.extent(0) + 1 = " << (numRowsToPack + 1)
170 <<
" != outputOffsets.extent(0) = " << outputOffsets_.extent (0)
172 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::invalid_argument, os.str ());
177 KOKKOS_INLINE_FUNCTION
void 178 operator() (
const local_row_index_type& curInd,
179 output_offset_type& update,
180 const bool final)
const 183 if (curInd < static_cast<local_row_index_type> (0)) {
191 if (curInd >= static_cast<local_row_index_type> (outputOffsets_.extent (0))) {
196 outputOffsets_(curInd) = update;
199 if (curInd < static_cast<local_row_index_type> (counts_.extent (0))) {
200 const auto lclRow = lclRowInds_(curInd);
201 if (static_cast<size_t> (lclRow + 1) >= static_cast<size_t> (rowOffsets_.extent (0)) ||
202 static_cast<local_row_index_type> (lclRow) <
static_cast<local_row_index_type
> (0)) {
210 const count_type count =
211 static_cast<count_type
> (rowOffsets_(lclRow+1) - rowOffsets_(lclRow));
215 const count_type numEntToPack = (count == 0)
216 ? static_cast<count_type>(0)
217 : count * (1 + (lclRowPids_.size() > 0 ? 1 : 0));
220 counts_(curInd) = numEntToPack;
222 update += numEntToPack;
232 auto error_h = Kokkos::create_mirror_view (error_);
238 OutputOffsetsViewType outputOffsets_;
239 CountsViewType counts_;
240 typename InputOffsetsViewType::const_type rowOffsets_;
241 typename InputLocalRowIndicesViewType::const_type lclRowInds_;
242 typename InputLocalRowPidsViewType::const_type lclRowPids_;
243 Kokkos::View<int, device_type> error_;
255 template<
class OutputOffsetsViewType,
256 class CountsViewType,
257 class InputOffsetsViewType,
258 class InputLocalRowIndicesViewType,
259 class InputLocalRowPidsViewType>
260 typename CountsViewType::non_const_value_type
261 computeNumPacketsAndOffsets(
const OutputOffsetsViewType& outputOffsets,
262 const CountsViewType& counts,
263 const InputOffsetsViewType& rowOffsets,
264 const InputLocalRowIndicesViewType& lclRowInds,
265 const InputLocalRowPidsViewType& lclRowPids)
268 CountsViewType,
typename InputOffsetsViewType::const_type,
269 typename InputLocalRowIndicesViewType::const_type,
270 typename InputLocalRowPidsViewType::const_type> functor_type;
271 typedef typename CountsViewType::non_const_value_type count_type;
272 typedef typename OutputOffsetsViewType::size_type size_type;
273 typedef typename OutputOffsetsViewType::execution_space execution_space;
274 typedef typename functor_type::local_row_index_type LO;
275 typedef Kokkos::RangePolicy<execution_space, LO> range_type;
276 const char prefix[] =
"computeNumPacketsAndOffsets: ";
278 count_type count = 0;
279 const count_type numRowsToPack = lclRowInds.extent (0);
281 if (numRowsToPack == 0) {
285 TEUCHOS_TEST_FOR_EXCEPTION
286 (rowOffsets.extent (0) <=
static_cast<size_type
> (1),
287 std::invalid_argument, prefix <<
"There is at least one row to pack, " 288 "but the graph has no rows. lclRowInds.extent(0) = " <<
289 numRowsToPack <<
", but rowOffsets.extent(0) = " <<
290 rowOffsets.extent (0) <<
" <= 1.");
291 TEUCHOS_TEST_FOR_EXCEPTION
292 (outputOffsets.extent (0) !=
293 static_cast<size_type
> (numRowsToPack + 1), std::invalid_argument,
294 prefix <<
"Output dimension does not match number of rows to pack. " 295 <<
"outputOffsets.extent(0) = " << outputOffsets.extent (0)
296 <<
" != lclRowInds.extent(0) + 1 = " 297 <<
static_cast<size_type
> (numRowsToPack + 1) <<
".");
298 TEUCHOS_TEST_FOR_EXCEPTION
299 (counts.extent (0) != numRowsToPack, std::invalid_argument,
300 prefix <<
"counts.extent(0) = " << counts.extent (0)
301 <<
" != numRowsToPack = " << numRowsToPack <<
".");
303 functor_type f (outputOffsets, counts, rowOffsets, lclRowInds, lclRowPids);
304 Kokkos::parallel_scan (range_type (0, numRowsToPack + 1), f);
307 const int errCode = f.getError ();
308 TEUCHOS_TEST_FOR_EXCEPTION
309 (errCode != 0, std::runtime_error, prefix <<
"parallel_scan error code " 310 << errCode <<
" != 0.");
314 for (LO k = 0; k < numRowsToPack; ++k) {
317 if (outputOffsets(numRowsToPack) != total) {
318 if (errStr.get () == NULL) {
319 errStr = std::unique_ptr<std::ostringstream> (
new std::ostringstream ());
321 std::ostringstream& os = *errStr;
323 <<
"outputOffsets(numRowsToPack=" << numRowsToPack <<
") " 324 << outputOffsets(numRowsToPack) <<
" != sum of counts = " 325 << total <<
"." << std::endl;
326 if (numRowsToPack != 0) {
328 if (numRowsToPack < static_cast<LO> (10)) {
329 os <<
"outputOffsets: [";
330 for (LO i = 0; i <= numRowsToPack; ++i) {
331 os << outputOffsets(i);
332 if (static_cast<LO> (i + 1) <= numRowsToPack) {
336 os <<
"]" << std::endl;
338 for (LO i = 0; i < numRowsToPack; ++i) {
340 if (static_cast<LO> (i + 1) < numRowsToPack) {
344 os <<
"]" << std::endl;
347 os <<
"outputOffsets(" << (numRowsToPack-1) <<
") = " 348 << outputOffsets(numRowsToPack-1) <<
"." << std::endl;
351 count = outputOffsets(numRowsToPack);
352 return {
false, errStr};
354 #endif // HAVE_TPETRA_DEBUG 358 using Tpetra::Details::getEntryOnHost;
359 return static_cast<count_type
> (getEntryOnHost (outputOffsets,
374 template<
class Packet,
class ColumnMap,
class BufferDeviceType>
377 packRow(
const ColumnMap& col_map,
378 const Kokkos::View<Packet*, BufferDeviceType>& exports,
382 const size_t num_ent,
383 const bool pack_pids)
385 using Kokkos::subview;
386 typedef typename ColumnMap::local_ordinal_type LO;
387 typedef typename ColumnMap::global_ordinal_type GO;
391 return static_cast<size_t>(0);
394 size_t num_ent_packed = num_ent;
395 if (pack_pids) num_ent_packed += num_ent;
399 for (
size_t k = 0; k < num_ent; ++k) {
400 const LO lid = lids_in[k];
401 const GO gid = col_map.getGlobalElement (lid);
402 exports(offset+k) = gid;
406 for (
size_t k = 0; k < num_ent; ++k) {
407 const LO lid = lids_in[k];
408 const int pid = pids_in[lid];
409 exports(offset+num_ent+k) =
static_cast<GO
>(pid);
413 return num_ent_packed;
416 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDeviceType>
417 struct PackCrsGraphFunctor {
418 typedef LocalGraph local_graph_type;
420 typedef typename local_map_type::local_ordinal_type LO;
421 typedef typename local_map_type::global_ordinal_type GO;
422 typedef typename local_graph_type::device_type device_type;
424 typedef Kokkos::View<const size_t*, BufferDeviceType>
425 num_packets_per_lid_view_type;
426 typedef Kokkos::View<const size_t*, BufferDeviceType> offsets_view_type;
427 typedef Kokkos::View<Packet*, BufferDeviceType> exports_view_type;
429 export_lids_view_type;
431 source_pids_view_type;
433 typedef typename num_packets_per_lid_view_type::non_const_value_type
435 typedef typename offsets_view_type::non_const_value_type
437 typedef Kokkos::pair<int, LO> value_type;
439 static_assert (std::is_same<LO, typename local_graph_type::data_type>::value,
440 "local_map_type::local_ordinal_type and " 441 "local_graph_type::data_type must be the same.");
443 local_graph_type local_graph;
444 local_map_type local_col_map;
445 exports_view_type exports;
446 num_packets_per_lid_view_type num_packets_per_lid;
447 export_lids_view_type export_lids;
448 source_pids_view_type source_pids;
449 offsets_view_type offsets;
452 PackCrsGraphFunctor(
const local_graph_type& local_graph_in,
453 const local_map_type& local_col_map_in,
454 const exports_view_type& exports_in,
455 const num_packets_per_lid_view_type& num_packets_per_lid_in,
456 const export_lids_view_type& export_lids_in,
457 const source_pids_view_type& source_pids_in,
458 const offsets_view_type& offsets_in,
459 const bool pack_pids_in) :
460 local_graph (local_graph_in),
461 local_col_map (local_col_map_in),
462 exports (exports_in),
463 num_packets_per_lid (num_packets_per_lid_in),
464 export_lids (export_lids_in),
465 source_pids (source_pids_in),
466 offsets (offsets_in),
467 pack_pids (pack_pids_in)
469 const LO numRows = local_graph_in.numRows ();
471 static_cast<LO
> (local_graph.row_map.extent (0));
472 TEUCHOS_TEST_FOR_EXCEPTION
473 (numRows != 0 && rowMapDim != numRows + static_cast<LO> (1),
474 std::logic_error,
"local_graph.row_map.extent(0) = " 475 << rowMapDim <<
" != numRows (= " << numRows <<
" ) + 1.");
478 KOKKOS_INLINE_FUNCTION
void init (value_type& dst)
const 480 using ::Tpetra::Details::OrdinalTraits;
481 dst = Kokkos::make_pair (0, OrdinalTraits<LO>::invalid ());
484 KOKKOS_INLINE_FUNCTION
void 485 join (
volatile value_type& dst,
const volatile value_type& src)
const 489 if (src.first != 0 && dst.first == 0) {
494 KOKKOS_INLINE_FUNCTION
495 void operator() (
const LO i, value_type& dst)
const 497 const size_t offset = offsets[i];
498 const LO export_lid = export_lids[i];
499 const size_t buf_size = exports.size();
500 const size_t num_packets_this_lid = num_packets_per_lid(i);
501 const size_t num_ent =
502 static_cast<size_t> (local_graph.row_map[export_lid+1]
503 - local_graph.row_map[export_lid]);
513 if (export_lid >= static_cast<LO>(local_graph.numRows())) {
514 if (dst.first != 0) {
515 dst = Kokkos::make_pair (1, i);
519 else if ((offset > buf_size || offset + num_packets_this_lid > buf_size)) {
520 if (dst.first != 0) {
521 dst = Kokkos::make_pair (2, i);
531 const auto row_beg = local_graph.row_map[export_lid];
532 const auto row_end = local_graph.row_map[export_lid + 1];
533 auto lids_in = subview (local_graph.entries,
534 Kokkos::make_pair (row_beg, row_end));
535 typedef local_map_type LMT;
537 typedef BufferDeviceType BDT;
538 size_t num_ent_packed_this_row =
539 packRow<PT,LMT,BDT>(local_col_map, exports, lids_in,
540 source_pids, offset, num_ent, pack_pids);
541 if (num_ent_packed_this_row != num_packets_this_lid) {
542 if (dst.first != 0) {
543 dst = Kokkos::make_pair (3, i);
556 template<
class Packet,
class LocalGraph,
class LocalMap,
class BufferDeviceType>
558 do_pack(
const LocalGraph& local_graph,
560 const Kokkos::View<Packet*, BufferDeviceType>& exports,
564 >::input_array_type& num_packets_per_lid,
566 typename LocalMap::local_ordinal_type,
567 typename LocalGraph::device_type
568 >::input_array_type& export_lids,
571 typename LocalGraph::device_type
572 >::input_array_type& source_pids,
573 const Kokkos::View<const size_t*, BufferDeviceType>& offsets,
574 const bool pack_pids)
576 typedef typename LocalMap::local_ordinal_type LO;
577 typedef typename LocalGraph::device_type device_type;
578 typedef Kokkos::RangePolicy<typename device_type::execution_space, LO> range_type;
579 const char prefix[] =
"Tpetra::Details::PackCrsGraphImpl::do_pack: ";
581 if (export_lids.extent (0) != 0) {
582 TEUCHOS_TEST_FOR_EXCEPTION
583 (static_cast<size_t> (offsets.extent (0)) !=
584 static_cast<size_t> (export_lids.extent (0) + 1),
585 std::invalid_argument, prefix <<
"offsets.extent(0) = " 586 << offsets.extent (0) <<
" != export_lids.extent(0) (= " 587 << export_lids.extent (0) <<
") + 1.");
588 TEUCHOS_TEST_FOR_EXCEPTION
589 (export_lids.extent (0) != num_packets_per_lid.extent (0),
590 std::invalid_argument, prefix <<
"export_lids.extent(0) = " <<
591 export_lids.extent (0) <<
" != num_packets_per_lid.extent(0) = " 592 << num_packets_per_lid.extent (0) <<
".");
596 TEUCHOS_TEST_FOR_EXCEPTION
597 (pack_pids && exports.extent (0) != 0 &&
598 source_pids.extent (0) == 0, std::invalid_argument, prefix <<
599 "pack_pids is true, and exports.extent(0) = " <<
600 exports.extent (0) <<
" != 0, meaning that we need to pack at " 601 "least one graph entry, but source_pids.extent(0) = 0.");
604 typedef PackCrsGraphFunctor<Packet,LocalGraph,LocalMap,BufferDeviceType> pack_functor_type;
605 pack_functor_type f (local_graph, local_map, exports,
606 num_packets_per_lid, export_lids,
607 source_pids, offsets, pack_pids);
609 typename pack_functor_type::value_type result;
610 range_type range (0, num_packets_per_lid.extent (0));
611 Kokkos::parallel_reduce (range, f, result);
613 if (result.first != 0) {
614 std::ostringstream os;
616 if (result.first == 1) {
617 auto export_lids_h = Kokkos::create_mirror_view (export_lids);
619 const auto firstBadLid = export_lids_h(result.second);
620 os <<
"First bad export LID: export_lids(i=" << result.second <<
") = " 623 else if (result.first == 2) {
624 auto offsets_h = Kokkos::create_mirror_view (offsets);
626 const auto firstBadOffset = offsets_h(result.second);
628 auto num_packets_per_lid_h =
629 Kokkos::create_mirror_view (num_packets_per_lid);
631 os <<
"First bad offset: offsets(i=" << result.second <<
") = " 632 << firstBadOffset <<
", num_packets_per_lid(i) = " 633 << num_packets_per_lid_h(result.second) <<
", buf_size = " 637 TEUCHOS_TEST_FOR_EXCEPTION
638 (
true, std::runtime_error, prefix <<
"PackCrsGraphFunctor reported " 639 "error code " << result.first <<
" for the first bad row " 640 << result.second <<
". " << os.str ());
670 template<
typename LO,
typename GO,
typename NT>
675 const Kokkos::View<
size_t*,
677 const Kokkos::View<const LO*, typename NT::device_type>& export_lids,
678 const Kokkos::View<const int*, typename NT::device_type>& export_pids,
679 size_t& constant_num_packets,
680 const bool pack_pids,
686 typedef typename buffer_device_type::execution_space execution_space;
687 typedef Kokkos::DualView<packet_type*,buffer_device_type> exports_view_type;
688 const char prefix[] =
"Tpetra::Details::packCrsGraph: ";
689 constexpr
bool debug =
false;
692 auto local_col_map = sourceGraph.
getColMap ()->getLocalMap ();
697 constant_num_packets = 0;
699 const size_t num_export_lids =
700 static_cast<size_t> (export_lids.extent (0));
701 TEUCHOS_TEST_FOR_EXCEPTION
703 static_cast<size_t> (num_packets_per_lid.extent (0)),
704 std::invalid_argument, prefix <<
"num_export_lids.extent(0) = " 705 << num_export_lids <<
" != num_packets_per_lid.extent(0) = " 706 << num_packets_per_lid.extent (0) <<
".");
707 if (num_export_lids != 0) {
708 TEUCHOS_TEST_FOR_EXCEPTION
709 (num_packets_per_lid.data () == NULL, std::invalid_argument,
710 prefix <<
"num_export_lids = "<< num_export_lids <<
" != 0, but " 711 "num_packets_per_lid.data() = " 712 << num_packets_per_lid.data () <<
" == NULL.");
715 if (num_export_lids == 0) {
719 execution_space::fence ();
720 exports = exports_view_type (
"exports", 0);
721 execution_space::fence ();
726 Kokkos::View<size_t*,buffer_device_type> offsets (
"offsets", num_export_lids + 1);
731 computeNumPacketsAndOffsets(offsets, num_packets_per_lid,
732 local_graph.row_map, export_lids, export_pids);
735 if (count > static_cast<size_t> (exports.extent (0))) {
739 execution_space::fence ();
740 exports = exports_view_type (
"exports", count);
742 std::ostringstream os;
743 os <<
"*** exports resized to " << count << std::endl;
744 std::cerr << os.str ();
746 execution_space::fence ();
749 std::ostringstream os;
750 os <<
"*** count: " << count <<
", exports.extent(0): " 751 << exports.extent (0) << std::endl;
752 std::cerr << os.str ();
758 TEUCHOS_TEST_FOR_EXCEPTION
759 (pack_pids && exports.extent (0) != 0 &&
760 export_pids.extent (0) == 0, std::invalid_argument, prefix <<
761 "pack_pids is true, and exports.extent(0) = " <<
762 exports.extent (0) <<
" != 0, meaning that we need to pack at least " 763 "one graph entry, but export_pids.extent(0) = 0.");
765 typedef typename std::decay<decltype (local_graph)>::type
767 typedef typename std::decay<decltype (local_col_map)>::type
769 typedef typename exports_view_type::t_dev dev_exports_view_type;
770 typedef typename dev_exports_view_type::memory_space buf_mem_space;
771 exports.template modify<buf_mem_space> ();
772 auto exports_d = exports.template view<buf_mem_space> ();
773 do_pack<packet_type,local_graph_type,local_map_type,buffer_device_type>
774 (local_graph, local_col_map, exports_d, num_packets_per_lid,
775 export_lids, export_pids, offsets, pack_pids);
781 template<
typename LO,
typename GO,
typename NT>
785 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
786 const Teuchos::ArrayView<const LO>& exportLIDs,
787 size_t& constantNumPackets,
792 typedef typename local_graph_type::device_type device_type;
793 typedef typename Kokkos::View<size_t*, device_type>::HostMirror::execution_space host_exec_space;
794 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
799 typedef typename device_type::execution_space buffer_exec_space;
800 #ifdef KOKKOS_ENABLE_CUDA 801 typedef typename std::conditional<
803 buffer_exec_space, Kokkos::Cuda
806 typename device_type::memory_space
807 >::type buffer_memory_space;
809 typedef typename device_type::memory_space buffer_memory_space;
810 #endif // KOKKOS_ENABLE_CUDA 812 typedef Kokkos::Device<buffer_exec_space,
813 buffer_memory_space> buffer_device_type;
819 typename local_graph_type::device_type outputDevice;
820 auto num_packets_per_lid_d =
822 numPacketsPerLID.getRawPtr (),
823 numPacketsPerLID.size (),
false,
824 "num_packets_per_lid");
829 exportLIDs.getRawPtr (),
830 exportLIDs.size (),
true,
833 Kokkos::View<int*, device_type> export_pids_d (
"export_pids", 0);
835 Kokkos::DualView<packet_type*,buffer_device_type> exports_dv (
"exports", 0);
836 constexpr
bool pack_pids =
false;
837 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
838 sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
839 export_pids_d, constantNumPackets, pack_pids, distor);
842 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
843 (numPacketsPerLID.getRawPtr (),
844 numPacketsPerLID.size ());
852 if (static_cast<size_t> (exports.size ()) !=
853 static_cast<size_t> (exports_dv.extent (0))) {
854 exports.resize (exports_dv.extent (0));
856 Kokkos::View<packet_type*, host_dev_type> exports_h (exports.getRawPtr (),
861 template<
typename LO,
typename GO,
typename NT>
866 const Kokkos::DualView<
size_t*,
868 const Kokkos::DualView<const LO*, typename NT::device_type>& exportLIDs,
869 size_t& constantNumPackets,
873 typedef typename local_graph_type::device_type device_type;
878 #ifdef KOKKOS_ENABLE_CUDA 879 typedef typename device_type::execution_space buffer_exec_space;
880 typedef typename std::conditional<
882 buffer_exec_space, Kokkos::Cuda
885 typename device_type::memory_space
886 >::type buffer_memory_space;
888 typedef typename device_type::memory_space buffer_memory_space;
889 #endif // KOKKOS_ENABLE_CUDA 892 Kokkos::View<int*, device_type> exportPIDs_d (
"exportPIDs", 0);
893 constexpr
bool pack_pids =
false;
896 auto numPacketsPerLID_nc = numPacketsPerLID;
897 numPacketsPerLID_nc.modified_host() = 0;
898 numPacketsPerLID_nc.modified_device() = 1;
899 auto numPacketsPerLID_d = numPacketsPerLID.template view<buffer_memory_space> ();
903 exportLIDs_nc.template sync<typename device_type::memory_space> ();
904 auto exportLIDs_d = exportLIDs.template view<typename device_type::memory_space> ();
906 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
907 sourceGraph, exports, numPacketsPerLID_d, exportLIDs_d,
908 exportPIDs_d, constantNumPackets, pack_pids, distor);
911 template<
typename LO,
typename GO,
typename NT>
917 const Teuchos::ArrayView<size_t>& numPacketsPerLID,
918 const Teuchos::ArrayView<const LO>& exportLIDs,
919 const Teuchos::ArrayView<const int>& sourcePIDs,
920 size_t& constantNumPackets,
926 typedef typename Kokkos::DualView<packet_type*, buffer_device_type>::t_host::execution_space host_exec_space;
927 typedef Kokkos::Device<host_exec_space, Kokkos::HostSpace> host_dev_type;
929 typename local_graph_type::device_type outputDevice;
935 auto num_packets_per_lid_d =
937 numPacketsPerLID.getRawPtr (),
938 numPacketsPerLID.size (),
false,
939 "num_packets_per_lid");
945 exportLIDs.getRawPtr (),
946 exportLIDs.size (),
true,
952 sourcePIDs.getRawPtr (),
953 sourcePIDs.size (),
true,
955 constexpr
bool pack_pids =
true;
956 PackCrsGraphImpl::packCrsGraph<LO,GO,NT>(
957 sourceGraph, exports_dv, num_packets_per_lid_d, export_lids_d,
958 export_pids_d, constantNumPackets, pack_pids, distor);
962 Kokkos::View<size_t*, host_dev_type> num_packets_per_lid_h
963 (numPacketsPerLID.getRawPtr (), numPacketsPerLID.size ());
970 #define TPETRA_DETAILS_PACKCRSGRAPH_INSTANT( LO, GO, NT ) \ 972 Details::packCrsGraph<LO, GO, NT> ( \ 973 const CrsGraph<LO, GO, NT>&, \ 974 Teuchos::Array<CrsGraph<LO,GO,NT>::packet_type>&, \ 975 const Teuchos::ArrayView<size_t>&, \ 976 const Teuchos::ArrayView<const LO>&, \ 980 Details::packCrsGraphNew<LO, GO, NT> ( \ 981 const CrsGraph<LO, GO, NT>&, \ 982 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \ 983 const Kokkos::DualView<size_t*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \ 984 const Kokkos::DualView<const LO*, NT::device_type>&, \ 988 Details::packCrsGraphWithOwningPIDs<LO, GO, NT> ( \ 989 const CrsGraph<LO, GO, NT>&, \ 990 Kokkos::DualView<CrsGraph<LO,GO,NT>::packet_type*, CrsGraph<LO,GO,NT>::buffer_device_type>&, \ 991 const Teuchos::ArrayView<size_t>&, \ 992 const Teuchos::ArrayView<const LO>&, \ 993 const Teuchos::ArrayView<const int>&, \ 997 #endif // TPETRA_DETAILS_PACKCRSGRAPH_DEF_HPP Namespace Tpetra contains the class and methods constituting the Tpetra library.
Impl::CreateMirrorViewFromUnmanagedHostArray< ValueType, OutputDeviceType >::output_view_type create_mirror_view_from_raw_host_array(const OutputDeviceType &, ValueType *inPtr, const size_t inSize, const bool copy=true, const char label[]="")
Variant of Kokkos::create_mirror_view that takes a raw host 1-d array as input.
Import KokkosSparse::OrdinalTraits, a traits class for "invalid" (flag) values of integer types...
local_graph_type getLocalGraph() const
Get the local graph.
Traits class for packing / unpacking data of type T, using Kokkos data structures that live in the gi...
Declaration and generic definition of traits class that tells Tpetra::CrsMatrix how to pack and unpac...
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Declaration of the Tpetra::CrsGraph class.
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, const Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &numPacketsPerLID, const Kokkos::DualView< const LO *, typename NT::device_type > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Implementation details of Tpetra.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Compute the number of packets and offsets for the pack procedure.
int getError() const
Host function for getting the error.
Sets up and executes a communication plan for a Tpetra DistObject.
Kokkos::View< const value_type *, D, Kokkos::MemoryUnmanaged > input_array_type
The type of an input array of value_type.
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
"Local" part of Map suitable for Kokkos kernels.
::Kokkos::Details::ArithTraits< GlobalOrdinal >::val_type packet_type
The type of each datum being sent or received in an Import or Export.
Kokkos::StaticCrsGraph< LocalOrdinal, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Teuchos::RCP< const map_type > getColMap() const override
Returns the Map that describes the column distribution in this graph.
Declaration and definition of Tpetra::Details::castAwayConstDualView, an implementation detail of Tpe...
Kokkos::Device< typename device_type::execution_space, buffer_memory_space > buffer_device_type
Kokkos::Device specialization for communication buffers.
Kokkos::DualView< ValueType *, DeviceType > castAwayConstDualView(const Kokkos::DualView< const ValueType *, DeviceType > &input_dv)
Cast away const-ness of a 1-D Kokkos::DualView.
Declaration and definition of Tpetra::Details::getEntryOnHost.
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Functions that wrap Kokkos::create_mirror_view, in order to avoid deep copies when not necessary...