41 #ifndef TPETRA_MATRIXMATRIX_DEF_HPP 42 #define TPETRA_MATRIXMATRIX_DEF_HPP 43 #include "Tpetra_ConfigDefs.hpp" 45 #include "Teuchos_VerboseObject.hpp" 46 #include "Teuchos_Array.hpp" 48 #include "Tpetra_CrsMatrix.hpp" 50 #include "Tpetra_RowMatrixTransposer.hpp" 52 #include "Tpetra_Details_radixSort.hpp" 53 #include "Tpetra_ConfigDefs.hpp" 54 #include "Tpetra_Map.hpp" 55 #include "Tpetra_Export.hpp" 59 #include <type_traits> 60 #include "Teuchos_FancyOStream.hpp" 62 #include "TpetraExt_MatrixMatrix_ExtraKernels_def.hpp" 65 #include "KokkosSparse_spgemm.hpp" 66 #include "KokkosSparse_spadd.hpp" 80 #include "TpetraExt_MatrixMatrix_OpenMP.hpp" 81 #include "TpetraExt_MatrixMatrix_Cuda.hpp" 86 namespace MatrixMatrix{
94 template <
class Scalar,
104 bool call_FillComplete_on_result,
105 const std::string& label,
106 const Teuchos::RCP<Teuchos::ParameterList>& params)
111 typedef LocalOrdinal LO;
112 typedef GlobalOrdinal GO;
120 #ifdef HAVE_TPETRA_MMM_TIMINGS 121 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
122 using Teuchos::TimeMonitor;
123 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Setup"))));
126 const std::string prefix =
"TpetraExt::MatrixMatrix::Multiply(): ";
131 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
132 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
137 RCP<const crs_matrix_type> Aprime = null;
141 RCP<const crs_matrix_type> Bprime = null;
151 const bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
153 bool use_optimized_ATB =
false;
154 if (transposeA && !transposeB && call_FillComplete_on_result && newFlag)
155 use_optimized_ATB =
true;
157 #ifdef USE_OLD_TRANSPOSE // NOTE: For Grey Ballard's use. Remove this later. 158 use_optimized_ATB =
false;
161 if (!use_optimized_ATB && transposeA) {
162 transposer_type transposer(rcpFromRef (A));
163 Aprime = transposer.createTranspose();
166 Aprime = rcpFromRef(A);
170 transposer_type transposer(rcpFromRef (B));
171 Bprime = transposer.createTranspose();
174 Bprime = rcpFromRef(B);
184 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
185 prefix <<
"ERROR, inner dimensions of op(A) and op(B) " 186 "must match for matrix-matrix product. op(A) is " 187 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
193 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
194 prefix <<
"ERROR, dimensions of result C must " 196 <<
" rows, should have at least " << Aouter << std::endl);
208 int numProcs = A.
getComm()->getSize();
212 crs_matrix_struct_type Aview;
213 crs_matrix_struct_type Bview;
215 RCP<const map_type> targetMap_A = Aprime->getRowMap();
216 RCP<const map_type> targetMap_B = Bprime->getRowMap();
218 #ifdef HAVE_TPETRA_MMM_TIMINGS 219 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All I&X"))));
225 if (!use_optimized_ATB) {
226 RCP<const import_type> dummyImporter;
227 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
true, label, params);
233 targetMap_B = Aprime->getColMap();
236 if (!use_optimized_ATB)
237 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label, params);
239 #ifdef HAVE_TPETRA_MMM_TIMINGS 240 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All Multiply"))));
244 if (use_optimized_ATB) {
245 MMdetails::mult_AT_B_newmatrix(A, B, C, label,params);
247 }
else if (call_FillComplete_on_result && newFlag) {
248 MMdetails::mult_A_B_newmatrix(Aview, Bview, C, label,params);
250 }
else if (call_FillComplete_on_result) {
251 MMdetails::mult_A_B_reuse(Aview, Bview, C, label,params);
257 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
259 MMdetails::mult_A_B(Aview, Bview, crsmat, label,params);
261 #ifdef HAVE_TPETRA_MMM_TIMINGS 262 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM All FillComplete"))));
264 if (call_FillComplete_on_result) {
272 C.
fillComplete(Bprime->getDomainMap(), Aprime->getRangeMap());
278 template <
class Scalar,
287 bool call_FillComplete_on_result,
288 const std::string& label,
289 const Teuchos::RCP<Teuchos::ParameterList>& params)
293 typedef LocalOrdinal LO;
294 typedef GlobalOrdinal GO;
301 #ifdef HAVE_TPETRA_MMM_TIMINGS 302 std::string prefix_mmm = std::string(
"TpetraExt ")+ label + std::string(
": ");
303 using Teuchos::TimeMonitor;
304 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm+std::string(
"Jacobi All Setup"))));
307 const std::string prefix =
"TpetraExt::MatrixMatrix::Jacobi(): ";
312 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error, prefix <<
"Matrix A is not fill complete.");
313 TEUCHOS_TEST_FOR_EXCEPTION(!B.
isFillComplete(), std::runtime_error, prefix <<
"Matrix B is not fill complete.");
315 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
316 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
325 TEUCHOS_TEST_FOR_EXCEPTION(Ainner != Binner, std::runtime_error,
326 prefix <<
"ERROR, inner dimensions of op(A) and op(B) " 327 "must match for matrix-matrix product. op(A) is " 328 << Aouter <<
"x" << Ainner <<
", op(B) is "<< Binner <<
"x" << Bouter);
334 TEUCHOS_TEST_FOR_EXCEPTION(Aouter > C.
getGlobalNumRows(), std::runtime_error,
335 prefix <<
"ERROR, dimensions of result C must " 337 <<
" rows, should have at least "<< Aouter << std::endl);
349 int numProcs = A.
getComm()->getSize();
353 crs_matrix_struct_type Aview;
354 crs_matrix_struct_type Bview;
356 RCP<const map_type> targetMap_A = Aprime->getRowMap();
357 RCP<const map_type> targetMap_B = Bprime->getRowMap();
359 #ifdef HAVE_TPETRA_MMM_TIMINGS 360 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All I&X"))));
365 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
366 if(!params.is_null()) importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
369 RCP<const import_type> dummyImporter;
370 MMdetails::import_and_extract_views(*Aprime, targetMap_A, Aview, dummyImporter,
false, label,importParams1);
375 targetMap_B = Aprime->getColMap();
380 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
381 if(!params.is_null()) importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
382 MMdetails::import_and_extract_views(*Bprime, targetMap_B, Bview, Aprime->getGraph()->getImporter(),
false, label,importParams2);
384 #ifdef HAVE_TPETRA_MMM_TIMINGS 385 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi All Multiply"))));
389 CrsWrapper_CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> crsmat(C);
392 bool newFlag = !C.
getGraph()->isLocallyIndexed() && !C.
getGraph()->isGloballyIndexed();
394 if (call_FillComplete_on_result && newFlag) {
395 MMdetails::jacobi_A_B_newmatrix(omega, Dinv, Aview, Bview, C, label, params);
397 }
else if (call_FillComplete_on_result) {
398 MMdetails::jacobi_A_B_reuse(omega, Dinv, Aview, Bview, C, label, params);
401 TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"jacobi_A_B_general not implemented");
424 template <
class Scalar,
435 using Teuchos::Array;
439 typedef LocalOrdinal LO;
440 typedef GlobalOrdinal GO;
445 const std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
447 TEUCHOS_TEST_FOR_EXCEPTION(!A.
isFillComplete(), std::runtime_error,
448 prefix <<
"ERROR, input matrix A.isFillComplete() is false; it is required to be true. " 449 "(Result matrix B is not required to be isFillComplete()).");
450 TEUCHOS_TEST_FOR_EXCEPTION(B.
isFillComplete() , std::runtime_error,
451 prefix <<
"ERROR, input matrix B must not be fill complete!");
452 TEUCHOS_TEST_FOR_EXCEPTION(B.
isStaticGraph() , std::runtime_error,
453 prefix <<
"ERROR, input matrix B must not have static graph!");
455 prefix <<
"ERROR, input matrix B must not be locally indexed!");
457 prefix <<
"ERROR, input matrix B must have a dynamic profile!");
460 RCP<const crs_matrix_type> Aprime = null;
462 transposer_type transposer(rcpFromRef (A));
463 Aprime = transposer.createTranspose();
465 Aprime = rcpFromRef(A);
473 if (scalarB != Teuchos::ScalarTraits<SC>::one())
478 if (scalarA != Teuchos::ScalarTraits<SC>::zero()) {
479 for (LO i = 0; (size_t)i < numMyRows; ++i) {
480 row = B.
getRowMap()->getGlobalElement(i);
481 Aprime->getGlobalRowCopy(row, a_inds(), a_vals(), a_numEntries);
483 if (scalarA != Teuchos::ScalarTraits<SC>::one())
484 for (
size_t j = 0; j < a_numEntries; ++j)
485 a_vals[j] *= scalarA;
495 namespace ColMapFunctors
497 template<
typename ByteView,
typename GView>
500 UnionEntries(ByteView entryUnion_,
const GView gids_) : entryUnion(entryUnion_), gids(gids_) {}
501 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const 503 entryUnion(gids(i)) = 1;
509 template<
typename LView,
typename GView>
510 struct ConvertGlobalToLocal
512 ConvertGlobalToLocal(
const LView gtol_,
const GView gids_, LView lids_) : gtol(gtol_), gids(gids_), lids(lids_) {}
513 KOKKOS_INLINE_FUNCTION
void operator()(
const size_t i)
const 515 lids(i) = gtol(gids(i));
525 template<
typename Scalar,
typename LocalOrdinal,
typename GlobalOrdinal,
typename Node>
526 Teuchos::RCP<Map<LocalOrdinal, GlobalOrdinal, Node> > AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::
527 makeColMapAndConvertGids(GlobalOrdinal ncols,
528 const typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::global_col_inds_array& gids,
529 typename AddDetails::AddKernels<Scalar, LocalOrdinal, GlobalOrdinal, Node>::col_inds_array& lids,
530 const Teuchos::RCP<
const Teuchos::Comm<int>>& comm)
532 using namespace ColMapFunctors;
535 typedef Kokkos::View<char*, device_type> ByteView;
536 typedef global_col_inds_array GView;
537 typedef col_inds_array LView;
539 auto nentries = gids.extent(0);
541 ByteView entryUnion(
"entry union", ncols);
542 UnionEntries<ByteView, GView> ue(entryUnion, gids);
543 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_unionEntries", range_type(0, nentries), ue);
545 LView gtol(
"global col -> local col", ncols + 1);
546 ::Tpetra::Details::computeOffsetsFromCounts<decltype(gtol), decltype(entryUnion)>(gtol, entryUnion);
548 ConvertGlobalToLocal<LView, GView> cgtl(gtol, gids, lids);
549 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertGlobalToLocal", range_type(0, gids.extent(0)), cgtl);
551 execution_space::fence();
552 GView colmap(
"column map", gtol(ncols));
553 size_t localIter = 0;
554 execution_space::fence();
555 for(
size_t i = 0; i < entryUnion.extent(0); i++)
557 if(entryUnion(i) != 0)
559 colmap(localIter++) = i;
562 execution_space::fence();
564 return rcp(
new map_type(Teuchos::OrdinalTraits<GlobalOrdinal>::invalid(), colmap, 0, comm));
567 template <
class Scalar,
571 Teuchos::RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >
573 const bool transposeA,
576 const bool transposeB,
580 const Teuchos::RCP<Teuchos::ParameterList>& params)
586 Teuchos::RCP<crs_matrix_type> C = rcp(
new crs_matrix_type(CrowMap, 0));
588 add(alpha,transposeA,A,beta,transposeB,B,*C,domainMap,rangeMap,params);
592 template <
class Scalar,
598 const bool transposeA,
601 const bool transposeB,
606 const Teuchos::RCP<Teuchos::ParameterList>& params)
610 using Teuchos::rcpFromRef;
611 using Teuchos::rcp_implicit_cast;
612 using Teuchos::rcp_dynamic_cast;
613 using Teuchos::TimeMonitor;
615 typedef LocalOrdinal LO;
616 typedef GlobalOrdinal GO;
623 typedef AddDetails::AddKernels<SC,LO,GO,NO> AddKern;
624 const char* prefix_mmm =
"TpetraExt::MatrixMatrix::add: ";
625 constexpr
bool debug =
false;
627 #ifdef HAVE_TPETRA_MMM_TIMINGS 628 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"transpose"))));
632 std::ostringstream os;
633 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 634 <<
"TpetraExt::MatrixMatrix::add" << std::endl;
635 std::cerr << os.str ();
639 prefix_mmm <<
"A and B must both be fill complete.");
640 #ifdef HAVE_TPETRA_DEBUG 643 const bool domainMapsSame =
647 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
648 prefix_mmm <<
"The domain Maps of Op(A) and Op(B) are not the same.");
650 const bool rangeMapsSame =
654 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
655 prefix_mmm <<
"The range Maps of Op(A) and Op(B) are not the same.");
657 #endif // HAVE_TPETRA_DEBUG 660 RCP<const crs_matrix_type> Aprime = rcpFromRef(A);
662 transposer_type transposer(Aprime);
663 Aprime = transposer.createTranspose ();
665 #ifdef HAVE_TPETRA_DEBUG 666 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
667 prefix_mmm <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
668 #endif // HAVE_TPETRA_DEBUG 670 RCP<const crs_matrix_type> Bprime = rcpFromRef(B);
673 std::ostringstream os;
674 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 675 <<
"Form explicit xpose of B" << std::endl;
676 std::cerr << os.str ();
678 transposer_type transposer(Bprime);
679 Bprime = transposer.createTranspose ();
681 #ifdef HAVE_TPETRA_DEBUG 682 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
683 prefix_mmm <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
684 TEUCHOS_TEST_FOR_EXCEPTION(
685 !Aprime->isFillComplete () || !Bprime->isFillComplete (), std::invalid_argument,
686 prefix_mmm <<
"Aprime and Bprime must both be fill complete. " 687 "Please report this bug to the Tpetra developers.");
688 #endif // HAVE_TPETRA_DEBUG 689 RCP<const map_type> CDomainMap = domainMap;
690 RCP<const map_type> CRangeMap = rangeMap;
691 if(CDomainMap.is_null())
693 CDomainMap = Bprime->getDomainMap();
695 if(CRangeMap.is_null())
697 CRangeMap = Bprime->getRangeMap();
699 assert(!(CDomainMap.is_null()));
700 assert(!(CRangeMap.is_null()));
701 typedef typename AddKern::values_array values_array;
702 typedef typename AddKern::row_ptrs_array row_ptrs_array;
703 typedef typename AddKern::col_inds_array col_inds_array;
704 bool AGraphSorted = Aprime->getCrsGraph()->isSorted();
705 bool BGraphSorted = Bprime->getCrsGraph()->isSorted();
707 row_ptrs_array rowptrs;
708 col_inds_array colinds;
709 auto acolmap = Aprime->getColMap()->getMyGlobalIndices();
710 auto bcolmap = Bprime->getColMap()->getMyGlobalIndices();
711 #ifdef HAVE_TPETRA_MMM_TIMINGS 712 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"rowmap check/import"))));
714 if(!(Aprime->getRowMap()->isSameAs(*(Bprime->getRowMap()))))
717 auto import = rcp(
new import_type(Aprime->getRowMap(), Bprime->getRowMap()));
718 Aprime = importAndFillCompleteCrsMatrix<crs_matrix_type>(Aprime, *
import, Bprime->getDomainMap(), Bprime->getRangeMap());
720 bool matchingColMaps = Aprime->getColMap()->isSameAs(*(Bprime->getColMap()));
721 bool sorted = AGraphSorted && BGraphSorted;
722 RCP<const map_type> CrowMap;
723 RCP<const map_type> CcolMap;
724 RCP<const import_type> Cimport = Teuchos::null;
725 RCP<export_type> Cexport = Teuchos::null;
727 if(!matchingColMaps && !(CDomainMap->isContiguous()))
730 #ifdef HAVE_TPETRA_MMM_TIMINGS 731 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"fallback to CrsMatrix::add"))));
734 std::ostringstream os;
735 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 736 <<
"Call Bprime->add(...)" << std::endl;
737 std::cerr << os.str ();
739 Teuchos::RCP<crs_matrix_type> C_ = Teuchos::rcp_static_cast<crs_matrix_type>(Bprime->add(alpha, *Aprime, beta, CDomainMap, CRangeMap, params));
741 C.
setAllValues(C_->getLocalMatrix().graph.row_map,C_->getLocalMatrix().graph.entries,C_->getLocalMatrix().values);
742 C.
expertStaticFillComplete(CDomainMap, CRangeMap, C_->getGraph()->getImporter(), C_->getGraph()->getExporter(), params);
745 else if(!matchingColMaps)
747 #ifdef HAVE_TPETRA_MMM_TIMINGS 748 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mismatched col map full kernel"))));
751 auto Acolmap = Aprime->getColMap()->getMyGlobalIndices();
752 auto Bcolmap = Bprime->getColMap()->getMyGlobalIndices();
753 typename AddKern::global_col_inds_array globalColinds(
"", 0);
755 std::ostringstream os;
756 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 757 <<
"Call AddKern::convertToGlobalAndAdd(...)" << std::endl;
758 std::cerr << os.str ();
760 AddKern::convertToGlobalAndAdd(
761 Aprime->getLocalMatrix(), alpha, Bprime->getLocalMatrix(), beta, Acolmap, Bcolmap,
762 CRangeMap->getMinGlobalIndex(), Aprime->getGlobalNumCols(), vals, rowptrs, globalColinds);
763 colinds = col_inds_array(
"C colinds", globalColinds.extent(0));
765 std::ostringstream os;
766 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 767 <<
"Finished AddKern::convertToGlobalAndAdd(...)" << std::endl;
768 std::cerr << os.str ();
770 #ifdef HAVE_TPETRA_MMM_TIMINGS 771 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"building optimized column map"))));
773 CrowMap = Bprime->getRowMap();
776 CcolMap = AddKern::makeColMapAndConvertGids(Aprime->getGlobalNumCols(), globalColinds, colinds, comm);
781 auto localA = Aprime->getLocalMatrix();
782 auto localB = Bprime->getLocalMatrix();
783 auto Avals = localA.values;
784 auto Bvals = localB.values;
785 auto Arowptrs = localA.graph.row_map;
786 auto Browptrs = localB.graph.row_map;
787 auto Acolinds = localA.graph.entries;
788 auto Bcolinds = localB.graph.entries;
792 #ifdef HAVE_TPETRA_MMM_TIMINGS 793 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"sorted entries full kernel"))));
796 std::ostringstream os;
797 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 798 <<
"Call AddKern::addSorted(...)" << std::endl;
799 std::cerr << os.str ();
801 AddKern::addSorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, vals, rowptrs, colinds);
806 #ifdef HAVE_TPETRA_MMM_TIMINGS 807 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"mm add unsorted entries full kernel"))));
810 std::ostringstream os;
811 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 812 <<
"Call AddKern::addUnsorted(...)" << std::endl;
813 std::cerr << os.str ();
815 AddKern::addUnsorted(Avals, Arowptrs, Acolinds, alpha, Bvals, Browptrs, Bcolinds, beta, Aprime->getGlobalNumCols(), vals, rowptrs, colinds);
817 CrowMap = Bprime->getRowMap();
818 CcolMap = Bprime->getColMap();
821 #ifdef HAVE_TPETRA_MMM_TIMINGS 822 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs constructor"))));
827 #ifdef HAVE_TPETRA_MMM_TIMINGS 828 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Tpetra::Crs expertStaticFillComplete"))));
830 if(!CDomainMap->isSameAs(*CcolMap))
833 std::ostringstream os;
834 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 835 <<
"Create Cimport" << std::endl;
836 std::cerr << os.str ();
838 Cimport = rcp(
new import_type(CDomainMap, CcolMap));
840 if(!CrowMap->isSameAs(*CRangeMap))
843 std::ostringstream os;
844 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 845 <<
"Create Cexport" << std::endl;
846 std::cerr << os.str ();
848 Cexport = rcp(
new export_type(CrowMap, CRangeMap));
852 std::ostringstream os;
853 os <<
"Proc " << A.
getMap ()->getComm ()->getRank () <<
": " 854 <<
"Call C->expertStaticFillComplete(...)" << std::endl;
855 std::cerr << os.str ();
861 template <
class Scalar,
874 using Teuchos::Array;
875 using Teuchos::ArrayRCP;
876 using Teuchos::ArrayView;
879 using Teuchos::rcp_dynamic_cast;
880 using Teuchos::rcpFromRef;
881 using Teuchos::tuple;
884 typedef Teuchos::ScalarTraits<Scalar> STS;
892 std::string prefix =
"TpetraExt::MatrixMatrix::Add(): ";
894 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
895 prefix <<
"The case C == null does not actually work. Fixing this will require an interface change.");
897 TEUCHOS_TEST_FOR_EXCEPTION(
899 prefix <<
"Both input matrices must be fill complete before calling this function.");
901 #ifdef HAVE_TPETRA_DEBUG 903 const bool domainMapsSame =
907 TEUCHOS_TEST_FOR_EXCEPTION(domainMapsSame, std::invalid_argument,
908 prefix <<
"The domain Maps of Op(A) and Op(B) are not the same.");
910 const bool rangeMapsSame =
914 TEUCHOS_TEST_FOR_EXCEPTION(rangeMapsSame, std::invalid_argument,
915 prefix <<
"The range Maps of Op(A) and Op(B) are not the same.");
917 #endif // HAVE_TPETRA_DEBUG 920 RCP<const crs_matrix_type> Aprime;
922 transposer_type theTransposer (rcpFromRef (A));
923 Aprime = theTransposer.createTranspose ();
925 Aprime = rcpFromRef (A);
928 #ifdef HAVE_TPETRA_DEBUG 929 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
930 prefix <<
"Failed to compute Op(A). Please report this bug to the Tpetra developers.");
931 #endif // HAVE_TPETRA_DEBUG 934 RCP<const crs_matrix_type> Bprime;
936 transposer_type theTransposer (rcpFromRef (B));
937 Bprime = theTransposer.createTranspose ();
939 Bprime = rcpFromRef (B);
942 #ifdef HAVE_TPETRA_DEBUG 943 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
944 prefix <<
"Failed to compute Op(B). Please report this bug to the Tpetra developers.");
945 #endif // HAVE_TPETRA_DEBUG 948 if (! C.is_null ()) {
949 C->setAllToScalar (STS::zero ());
957 if (Aprime->getRowMap ()->isSameAs (* (Bprime->getRowMap ())) {
958 RCP<const map_type> rowMap = Aprime->getRowMap ();
960 RCP<const crs_graph_type> A_graph =
961 rcp_dynamic_cast<
const crs_graph_type> (Aprime->getGraph (),
true);
962 #ifdef HAVE_TPETRA_DEBUG 963 TEUCHOS_TEST_FOR_EXCEPTION(A_graph.is_null (), std::logic_error,
964 "Tpetra::MatrixMatrix::Add: Graph of Op(A) is null. " 965 "Please report this bug to the Tpetra developers.");
966 #endif // HAVE_TPETRA_DEBUG 967 RCP<const crs_graph_type> B_graph =
968 rcp_dynamic_cast<
const crs_graph_type> (Bprime->getGraph (),
true);
969 #ifdef HAVE_TPETRA_DEBUG 970 TEUCHOS_TEST_FOR_EXCEPTION(B_graph.is_null (), std::logic_error,
971 "Tpetra::MatrixMatrix::Add: Graph of Op(B) is null. " 972 "Please report this bug to the Tpetra developers.");
973 #endif // HAVE_TPETRA_DEBUG 974 RCP<const map_type> A_colMap = A_graph->getColMap ();
975 #ifdef HAVE_TPETRA_DEBUG 976 TEUCHOS_TEST_FOR_EXCEPTION(A_colMap.is_null (), std::logic_error,
977 "Tpetra::MatrixMatrix::Add: Column Map of Op(A) is null. " 978 "Please report this bug to the Tpetra developers.");
979 #endif // HAVE_TPETRA_DEBUG 980 RCP<const map_type> B_colMap = B_graph->getColMap ();
981 #ifdef HAVE_TPETRA_DEBUG 982 TEUCHOS_TEST_FOR_EXCEPTION(B_colMap.is_null (), std::logic_error,
983 "Tpetra::MatrixMatrix::Add: Column Map of Op(B) is null. " 984 "Please report this bug to the Tpetra developers.");
985 TEUCHOS_TEST_FOR_EXCEPTION(A_graph->getImporter ().is_null (),
987 "Tpetra::MatrixMatrix::Add: Op(A)'s Import is null. " 988 "Please report this bug to the Tpetra developers.");
989 TEUCHOS_TEST_FOR_EXCEPTION(B_graph->getImporter ().is_null (),
991 "Tpetra::MatrixMatrix::Add: Op(B)'s Import is null. " 992 "Please report this bug to the Tpetra developers.");
993 #endif // HAVE_TPETRA_DEBUG 996 RCP<const import_type> sumImport =
997 A_graph->getImporter ()->setUnion (* (B_graph->getImporter ()));
998 RCP<const map_type> C_colMap = sumImport->getTargetMap ();
1006 ArrayView<const LocalOrdinal> A_local_ind;
1007 Array<GlobalOrdinal> A_global_ind;
1008 ArrayView<const LocalOrdinal> B_local_ind;
1009 Array<GlobalOrdinal> B_global_ind;
1011 const size_t localNumRows = rowMap->getNodeNumElements ();
1012 ArrayRCP<size_t> numEntriesPerRow (localNumRows);
1015 size_t maxNumEntriesPerRow = 0;
1016 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1018 A_graph->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind);
1019 const size_type A_numEnt = A_local_ind.size ();
1020 if (A_numEnt > A_global_ind.size ()) {
1021 A_global_ind.resize (A_numEnt);
1024 for (size_type k = 0; k < A_numEnt; ++k) {
1025 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1029 B_graph->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind);
1030 const size_type B_numEnt = B_local_ind.size ();
1031 if (B_numEnt > B_global_ind.size ()) {
1032 B_global_ind.resize (B_numEnt);
1035 for (size_type k = 0; k < B_numEnt; ++k) {
1036 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1040 const size_t curNumEntriesPerRow =
1041 keyMergeCount (A_global_ind.begin (), A_global_ind.end (),
1042 B_global_ind.begin (), B_global_ind.end ());
1043 numEntriesPerRow[localRow] = curNumEntriesPerRow;
1044 maxNumEntriesPerRow = std::max (maxNumEntriesPerRow, curNumEntriesPerRow);
1051 C = rcp (
new crs_matrix_type (rowMap, C_colMap, numEntriesPerRow,
StaticProfile));
1056 ArrayView<const Scalar> A_val;
1057 ArrayView<const Scalar> B_val;
1059 Array<LocalOrdinal> AplusB_local_ind (maxNumEntriesPerRow);
1060 Array<GlobalOrdinal> AplusB_global_ind (maxNumEntriesPerRow);
1061 Array<Scalar> AplusB_val (maxNumEntriesPerRow);
1063 for (
size_t localRow = 0; localRow < localNumRows; ++localRow) {
1065 Aprime->getLocalRowView (as<LocalOrdinal> (localRow), A_local_ind, A_val);
1067 for (size_type k = 0; k < A_local_ind.size (); ++k) {
1068 A_global_ind[k] = A_colMap->getGlobalElement (A_local_ind[k]);
1072 Bprime->getLocalRowView (as<LocalOrdinal> (localRow), B_local_ind, B_val);
1074 for (size_type k = 0; k < B_local_ind.size (); ++k) {
1075 B_global_ind[k] = B_colMap->getGlobalElement (B_local_ind[k]);
1078 const size_t curNumEntries = numEntriesPerRow[localRow];
1079 ArrayView<LocalOrdinal> C_local_ind = AplusB_local_ind (0, curNumEntries);
1080 ArrayView<GlobalOrdinal> C_global_ind = AplusB_global_ind (0, curNumEntries);
1081 ArrayView<Scalar> C_val = AplusB_val (0, curNumEntries);
1085 A_val.begin (), A_val.end (),
1086 B_global_ind.begin (), B_global_ind.end (),
1087 B_val.begin (), B_val.end (),
1088 C_global_ind.begin (), C_val.begin (),
1089 std::plus<Scalar> ());
1091 for (size_type k = 0; k < as<size_type> (numEntriesPerRow[localRow]); ++k) {
1092 C_local_ind[k] = C_colMap->getLocalElement (C_global_ind[k]);
1095 C->replaceLocalValues (localRow, C_local_ind, C_val);
1100 RCP<const map_type> domainMap = A_graph->getDomainMap ();
1101 RCP<const map_type> rangeMap = A_graph->getRangeMap ();
1102 C->expertStaticFillComplete (domainMap, rangeMap, sumImport, A_graph->getExporter ());
1110 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), null));
1117 C = rcp (
new crs_matrix_type (Aprime->getRowMap (), 0));
1121 #ifdef HAVE_TPETRA_DEBUG 1122 TEUCHOS_TEST_FOR_EXCEPTION(Aprime.is_null (), std::logic_error,
1123 prefix <<
"At this point, Aprime is null. Please report this bug to the Tpetra developers.");
1124 TEUCHOS_TEST_FOR_EXCEPTION(Bprime.is_null (), std::logic_error,
1125 prefix <<
"At this point, Bprime is null. Please report this bug to the Tpetra developers.");
1126 TEUCHOS_TEST_FOR_EXCEPTION(C.is_null (), std::logic_error,
1127 prefix <<
"At this point, C is null. Please report this bug to the Tpetra developers.");
1128 #endif // HAVE_TPETRA_DEBUG 1130 Array<RCP<const crs_matrix_type> > Mat =
1131 tuple<RCP<const crs_matrix_type> > (Aprime, Bprime);
1132 Array<Scalar> scalar = tuple<Scalar> (scalarA, scalarB);
1135 for (
int k = 0; k < 2; ++k) {
1136 Array<GlobalOrdinal> Indices;
1137 Array<Scalar> Values;
1145 #ifdef HAVE_TPETRA_DEBUG 1146 TEUCHOS_TEST_FOR_EXCEPTION(Mat[k].is_null (), std::logic_error,
1147 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1148 #endif // HAVE_TPETRA_DEBUG 1149 RCP<const map_type> curRowMap = Mat[k]->getRowMap ();
1150 #ifdef HAVE_TPETRA_DEBUG 1151 TEUCHOS_TEST_FOR_EXCEPTION(curRowMap.is_null (), std::logic_error,
1152 prefix <<
"At this point, curRowMap is null. Please report this bug to the Tpetra developers.");
1153 #endif // HAVE_TPETRA_DEBUG 1155 const size_t localNumRows = Mat[k]->getNodeNumRows ();
1156 for (
size_t i = 0; i < localNumRows; ++i) {
1157 const GlobalOrdinal globalRow = curRowMap->getGlobalElement (i);
1158 size_t numEntries = Mat[k]->getNumEntriesInGlobalRow (globalRow);
1159 if (numEntries > 0) {
1160 Indices.resize (numEntries);
1161 Values.resize (numEntries);
1162 Mat[k]->getGlobalRowCopy (globalRow, Indices (), Values (), numEntries);
1164 if (scalar[k] != STS::one ()) {
1165 for (
size_t j = 0; j < numEntries; ++j) {
1166 Values[j] *= scalar[k];
1170 if (C->isFillComplete ()) {
1171 C->sumIntoGlobalValues (globalRow, Indices, Values);
1173 C->insertGlobalValues (globalRow, Indices, Values);
1180 template<
typename SC,
typename LO,
typename GO,
typename NO>
1181 void AddDetails::AddKernels<SC, LO, GO, NO>::
1183 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1184 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1185 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1186 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1187 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1188 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1189 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1190 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1191 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1192 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1193 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1195 using Teuchos::TimeMonitor;
1196 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1197 auto nrows = Arowptrs.extent(0) - 1;
1198 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1199 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, impl_scalar_type,
1200 execution_space, memory_space, memory_space> KKH;
1202 handle.create_spadd_handle(
true);
1203 auto addHandle = handle.get_spadd_handle();
1204 #ifdef HAVE_TPETRA_MMM_TIMINGS 1205 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1207 KokkosSparse::Experimental::spadd_symbolic
1209 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1210 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1211 row_ptrs_array, col_inds_array>
1212 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1213 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1214 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1215 #ifdef HAVE_TPETRA_MMM_TIMINGS 1216 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted numeric")));
1218 KokkosSparse::Experimental::spadd_numeric(&handle,
1219 Arowptrs, Acolinds, Avals, scalarA,
1220 Browptrs, Bcolinds, Bvals, scalarB,
1221 Crowptrs, Ccolinds, Cvals);
1224 template<
typename SC,
typename LO,
typename GO,
typename NO>
1225 void AddDetails::AddKernels<SC, LO, GO, NO>::
1227 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Avals,
1228 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Arowptrs,
1229 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Acolinds,
1230 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1231 const typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Bvals,
1232 const typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array_const& Browptrs,
1233 const typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Bcolinds,
1234 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1236 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1237 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1238 typename AddDetails::AddKernels<SC, LO, GO, NO>::col_inds_array& Ccolinds)
1240 using Teuchos::TimeMonitor;
1241 TEUCHOS_TEST_FOR_EXCEPTION(Arowptrs.extent(0) != Browptrs.extent(0), std::runtime_error,
"Can't add matrices with different numbers of rows.");
1242 auto nrows = Arowptrs.extent(0) - 1;
1243 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1244 typedef AddDetails::AddKernels<SC, LO, GO, NO> AddKern;
1245 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, LO, AddKern::impl_scalar_type,
1246 AddKern::execution_space, AddKern::memory_space, AddKern::memory_space> KKH;
1248 handle.create_spadd_handle(
false);
1249 auto addHandle = handle.get_spadd_handle();
1250 #ifdef HAVE_TPETRA_MMM_TIMINGS 1251 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted symbolic")));
1253 KokkosSparse::Experimental::spadd_symbolic
1255 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1256 typename row_ptrs_array::const_type,
typename col_inds_array::const_type,
1257 row_ptrs_array, col_inds_array>
1258 (&handle, Arowptrs, Acolinds, Browptrs, Bcolinds, Crowptrs);
1259 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1260 Ccolinds = col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1261 #ifdef HAVE_TPETRA_MMM_TIMINGS 1262 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() sorted kernel: sorted numeric")));
1264 KokkosSparse::Experimental::spadd_numeric(&handle,
1265 Arowptrs, Acolinds, Avals, scalarA,
1266 Browptrs, Bcolinds, Bvals, scalarB,
1267 Crowptrs, Ccolinds, Cvals);
1270 template<
typename GO,
1271 typename LocalIndicesType,
1272 typename GlobalIndicesType,
1273 typename ColMapType>
1274 struct ConvertColIndsFunctor
1276 ConvertColIndsFunctor (
const GO minGlobal_,
1277 const LocalIndicesType& colindsOrig_,
1278 const GlobalIndicesType& colindsConverted_,
1279 const ColMapType& colmap_) :
1280 minGlobal (minGlobal_),
1281 colindsOrig (colindsOrig_),
1282 colindsConverted (colindsConverted_),
1285 KOKKOS_INLINE_FUNCTION
void 1286 operator() (
const size_t& i)
const 1288 colindsConverted[i] = colmap[colindsOrig[i]];
1291 LocalIndicesType colindsOrig;
1292 GlobalIndicesType colindsConverted;
1296 template<
typename SC,
typename LO,
typename GO,
typename NO>
1297 void AddDetails::AddKernels<SC, LO, GO, NO>::
1298 convertToGlobalAndAdd(
1299 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& A,
1300 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarA,
1301 const typename AddDetails::AddKernels<SC, LO, GO, NO>::KCRS& B,
1302 const typename AddDetails::AddKernels<SC, LO, GO, NO>::impl_scalar_type scalarB,
1303 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& AcolMap,
1304 const typename AddDetails::AddKernels<SC, LO, GO, NO>::local_map_type& BcolMap,
1307 typename AddDetails::AddKernels<SC, LO, GO, NO>::values_array& Cvals,
1308 typename AddDetails::AddKernels<SC, LO, GO, NO>::row_ptrs_array& Crowptrs,
1309 typename AddDetails::AddKernels<SC, LO, GO, NO>::global_col_inds_array& Ccolinds)
1311 using Teuchos::TimeMonitor;
1313 const values_array& Avals = A.values;
1314 const values_array& Bvals = B.values;
1315 const col_inds_array& Acolinds = A.graph.entries;
1316 const col_inds_array& Bcolinds = B.graph.entries;
1317 auto Arowptrs = A.graph.row_map;
1318 auto Browptrs = B.graph.row_map;
1319 global_col_inds_array AcolindsConverted(
"A colinds (converted)", Acolinds.extent(0));
1320 global_col_inds_array BcolindsConverted(
"B colinds (converted)", Bcolinds.extent(0));
1321 #ifdef HAVE_TPETRA_MMM_TIMINGS 1322 auto MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: " + std::string(
"column map conversion"))));
1324 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertA(minGlobalCol, Acolinds, AcolindsConverted, AcolMap);
1325 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsA", range_type(0, Acolinds.extent(0)), convertA);
1326 ConvertColIndsFunctor<GO, col_inds_array, global_col_inds_array, local_map_type> convertB(minGlobalCol, Bcolinds, BcolindsConverted, BcolMap);
1327 Kokkos::parallel_for(
"Tpetra_MatrixMatrix_convertColIndsB", range_type(0, Bcolinds.extent(0)), convertB);
1328 typedef KokkosKernels::Experimental::KokkosKernelsHandle<
typename col_inds_array::size_type, GO, impl_scalar_type,
1329 execution_space, memory_space, memory_space> KKH;
1331 handle.create_spadd_handle(
false);
1332 auto addHandle = handle.get_spadd_handle();
1333 #ifdef HAVE_TPETRA_MMM_TIMINGS 1334 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted symbolic")));
1336 auto nrows = Arowptrs.extent(0) - 1;
1337 Crowptrs = row_ptrs_array(
"C row ptrs", nrows + 1);
1338 KokkosSparse::Experimental::spadd_symbolic
1339 <KKH,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type,
typename row_ptrs_array::const_type,
typename global_col_inds_array::const_type, row_ptrs_array, global_col_inds_array>
1340 (&handle, Arowptrs, AcolindsConverted, Browptrs, BcolindsConverted, Crowptrs);
1341 Cvals = values_array(
"C values", addHandle->get_max_result_nnz());
1342 Ccolinds = global_col_inds_array(
"C colinds", addHandle->get_max_result_nnz());
1343 #ifdef HAVE_TPETRA_MMM_TIMINGS 1344 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(
"TpetraExt::MatrixMatrix::add() diff col map kernel: unsorted numeric")));
1346 KokkosSparse::Experimental::spadd_numeric(&handle,
1347 Arowptrs, AcolindsConverted, Avals, scalarA,
1348 Browptrs, BcolindsConverted, Bvals, scalarB,
1349 Crowptrs, Ccolinds, Cvals);
1354 namespace MMdetails{
1358 template <
class TransferType>
1359 void printMultiplicationStatistics(Teuchos::RCP<TransferType >
Transfer,
const std::string &label) {
1360 if (Transfer.is_null())
1363 const Distributor & Distor = Transfer->getDistributor();
1364 Teuchos::RCP<const Teuchos::Comm<int> > Comm = Transfer->getSourceMap()->getComm();
1366 size_t rows_send = Transfer->getNumExportIDs();
1367 size_t rows_recv = Transfer->getNumRemoteIDs();
1369 size_t round1_send = Transfer->getNumExportIDs() *
sizeof(size_t);
1370 size_t round1_recv = Transfer->getNumRemoteIDs() *
sizeof(size_t);
1373 size_t round2_send, round2_recv;
1376 int myPID = Comm->getRank();
1377 int NumProcs = Comm->getSize();
1384 size_t lstats[8] = {num_send_neighbors,num_recv_neighbors,rows_send,rows_recv,round1_send,round1_recv,round2_send,round2_recv};
1385 size_t gstats_min[8], gstats_max[8];
1387 double lstats_avg[8], gstats_avg[8];
1388 for(
int i=0; i<8; i++)
1389 lstats_avg[i] = ((
double)lstats[i])/NumProcs;
1391 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MIN,8,lstats,gstats_min);
1392 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_MAX,8,lstats,gstats_max);
1393 Teuchos::reduceAll(*Comm(),Teuchos::REDUCE_SUM,8,lstats_avg,gstats_avg);
1396 printf(
"%s Send Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1397 (int)gstats_min[0],gstats_avg[0],(
int)gstats_max[0], (int)gstats_min[2],gstats_avg[2],(
int)gstats_max[2],
1398 (int)gstats_min[4],gstats_avg[4],(
int)gstats_max[4], (int)gstats_min[6],gstats_avg[6],(
int)gstats_max[6]);
1399 printf(
"%s Recv Statistics[min/avg/max]: neigh=%d/%4.1f/%d rows=%d/%4.1f/%d round1=%d/%4.1f/%d round2=%d/%4.1f/%d\n", label.c_str(),
1400 (int)gstats_min[1],gstats_avg[1],(
int)gstats_max[1], (int)gstats_min[3],gstats_avg[3],(
int)gstats_max[3],
1401 (int)gstats_min[5],gstats_avg[5],(
int)gstats_max[5], (int)gstats_min[7],gstats_avg[7],(
int)gstats_max[7]);
1408 template<
class Scalar,
1410 class GlobalOrdinal,
1412 void mult_AT_B_newmatrix(
1416 const std::string & label,
1417 const Teuchos::RCP<Teuchos::ParameterList>& params)
1423 typedef LocalOrdinal LO;
1424 typedef GlobalOrdinal GO;
1429 #ifdef HAVE_TPETRA_MMM_TIMINGS 1430 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1431 using Teuchos::TimeMonitor;
1432 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T Transpose"))));
1438 transposer_type transposer (rcpFromRef (A),label+std::string(
"XP: "));
1439 RCP<Teuchos::ParameterList> transposeParams = Teuchos::rcp(
new Teuchos::ParameterList);
1440 if(!params.is_null()) transposeParams->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1441 RCP<CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Atrans = transposer.createTransposeLocal(transposeParams);
1446 #ifdef HAVE_TPETRA_MMM_TIMINGS 1447 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T I&X"))));
1451 crs_matrix_struct_type Aview;
1452 crs_matrix_struct_type Bview;
1453 RCP<const Import<LocalOrdinal,GlobalOrdinal, Node> > dummyImporter;
1456 RCP<Teuchos::ParameterList> importParams1 = Teuchos::rcp(
new Teuchos::ParameterList);
1457 if(!params.is_null()) importParams1->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1458 MMdetails::import_and_extract_views(*Atrans, Atrans->getRowMap(), Aview, dummyImporter,
true, label,importParams1);
1460 RCP<Teuchos::ParameterList> importParams2 = Teuchos::rcp(
new Teuchos::ParameterList);
1461 if(!params.is_null()) importParams2->set(
"compute global constants",params->get(
"compute global constants: temporaries",
false));
1463 if(B.
getRowMap()->isSameAs(*Atrans->getColMap())){
1464 MMdetails::import_and_extract_views(B, B.
getRowMap(), Bview, dummyImporter,
true, label,importParams2);
1467 MMdetails::import_and_extract_views(B, Atrans->
getColMap(), Bview, dummyImporter,
false, label,importParams2);
1470 #ifdef HAVE_TPETRA_MMM_TIMINGS 1471 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T AB-core"))));
1474 RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> >Ctemp;
1477 bool needs_final_export = !Atrans->getGraph()->getExporter().is_null();
1478 if (needs_final_export)
1481 Ctemp = rcp(&C,
false);
1484 mult_A_B_newmatrix(Aview, Bview, *Ctemp, label,params);
1489 #ifdef HAVE_TPETRA_MMM_TIMINGS 1490 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM-T exportAndFillComplete"))));
1493 Teuchos::RCP<Tpetra::CrsMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node> > Crcp(&C,
false);
1494 if (needs_final_export) {
1495 Teuchos::ParameterList labelList;
1496 labelList.set(
"Timer Label", label);
1497 if(!params.is_null()) labelList.set(
"compute global constants",params->get(
"compute global constants",
true));
1499 Ctemp->exportAndFillComplete(Crcp,*Ctemp->getGraph()->getExporter(),
1502 #ifdef HAVE_TPETRA_MMM_STATISTICS 1503 printMultiplicationStatistics(Ctemp->getGraph()->getExporter(), label+std::string(
" AT_B MMM"));
1509 template<
class Scalar,
1511 class GlobalOrdinal,
1516 CrsWrapper<Scalar, LocalOrdinal, GlobalOrdinal, Node>& C,
1517 const std::string& label,
1518 const Teuchos::RCP<Teuchos::ParameterList>& params)
1520 using Teuchos::Array;
1521 using Teuchos::ArrayRCP;
1522 using Teuchos::ArrayView;
1523 using Teuchos::OrdinalTraits;
1524 using Teuchos::null;
1526 typedef Teuchos::ScalarTraits<Scalar> STS;
1528 LocalOrdinal C_firstCol = Bview.
colMap->getMinLocalIndex();
1529 LocalOrdinal C_lastCol = Bview.
colMap->getMaxLocalIndex();
1531 LocalOrdinal C_firstCol_import = OrdinalTraits<LocalOrdinal>::zero();
1532 LocalOrdinal C_lastCol_import = OrdinalTraits<LocalOrdinal>::invalid();
1534 ArrayView<const GlobalOrdinal> bcols = Bview.
colMap->getNodeElementList();
1535 ArrayView<const GlobalOrdinal> bcols_import = null;
1537 C_firstCol_import = Bview.
importColMap->getMinLocalIndex();
1538 C_lastCol_import = Bview.
importColMap->getMaxLocalIndex();
1540 bcols_import = Bview.
importColMap->getNodeElementList();
1543 size_t C_numCols = C_lastCol - C_firstCol +
1544 OrdinalTraits<LocalOrdinal>::one();
1545 size_t C_numCols_import = C_lastCol_import - C_firstCol_import +
1546 OrdinalTraits<LocalOrdinal>::one();
1548 if (C_numCols_import > C_numCols)
1549 C_numCols = C_numCols_import;
1551 Array<Scalar> dwork = Array<Scalar>(C_numCols);
1552 Array<GlobalOrdinal> iwork = Array<GlobalOrdinal>(C_numCols);
1553 Array<size_t> iwork2 = Array<size_t>(C_numCols);
1555 Array<Scalar> C_row_i = dwork;
1556 Array<GlobalOrdinal> C_cols = iwork;
1557 Array<size_t> c_index = iwork2;
1558 Array<GlobalOrdinal> combined_index = Array<GlobalOrdinal>(2*C_numCols);
1559 Array<Scalar> combined_values = Array<Scalar>(2*C_numCols);
1561 size_t C_row_i_length, j, k, last_index;
1564 LocalOrdinal LO_INVALID = OrdinalTraits<LocalOrdinal>::invalid();
1565 Array<LocalOrdinal> Acol2Brow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1566 Array<LocalOrdinal> Acol2Irow(Aview.
colMap->getNodeNumElements(),LO_INVALID);
1569 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <=
1570 Aview.
colMap->getMaxLocalIndex(); i++)
1575 for(LocalOrdinal i=Aview.
colMap->getMinLocalIndex(); i <=
1576 Aview.
colMap->getMaxLocalIndex(); i++) {
1577 GlobalOrdinal GID = Aview.
colMap->getGlobalElement(i);
1578 LocalOrdinal BLID = Bview.
origMatrix->getRowMap()->getLocalElement(GID);
1579 if(BLID != LO_INVALID) Acol2Brow[i] = BLID;
1580 else Acol2Irow[i] = Bview.
importMatrix->getRowMap()->getLocalElement(GID);
1590 ArrayRCP<const size_t> Arowptr_RCP, Browptr_RCP, Irowptr_RCP;
1591 ArrayRCP<const LocalOrdinal> Acolind_RCP, Bcolind_RCP, Icolind_RCP;
1592 ArrayRCP<const Scalar> Avals_RCP, Bvals_RCP, Ivals_RCP;
1593 ArrayView<const size_t> Arowptr, Browptr, Irowptr;
1594 ArrayView<const LocalOrdinal> Acolind, Bcolind, Icolind;
1595 ArrayView<const Scalar> Avals, Bvals, Ivals;
1596 Aview.
origMatrix->getAllValues(Arowptr_RCP,Acolind_RCP,Avals_RCP);
1597 Bview.
origMatrix->getAllValues(Browptr_RCP,Bcolind_RCP,Bvals_RCP);
1598 Arowptr = Arowptr_RCP(); Acolind = Acolind_RCP(); Avals = Avals_RCP();
1599 Browptr = Browptr_RCP(); Bcolind = Bcolind_RCP(); Bvals = Bvals_RCP();
1601 Bview.
importMatrix->getAllValues(Irowptr_RCP,Icolind_RCP,Ivals_RCP);
1602 Irowptr = Irowptr_RCP(); Icolind = Icolind_RCP(); Ivals = Ivals_RCP();
1605 bool C_filled = C.isFillComplete();
1607 for (
size_t i = 0; i < C_numCols; i++)
1608 c_index[i] = OrdinalTraits<size_t>::invalid();
1611 size_t Arows = Aview.
rowMap->getNodeNumElements();
1612 for(
size_t i=0; i<Arows; ++i) {
1616 GlobalOrdinal global_row = Aview.
rowMap->getGlobalElement(i);
1622 C_row_i_length = OrdinalTraits<size_t>::zero();
1624 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1625 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1626 const Scalar Aval = Avals[k];
1627 if (Aval == STS::zero())
1630 if (Ak == LO_INVALID)
1633 for (j = Browptr[Ak]; j < Browptr[Ak+1]; ++j) {
1634 LocalOrdinal col = Bcolind[j];
1637 if (c_index[col] == OrdinalTraits<size_t>::invalid()){
1640 C_row_i[C_row_i_length] = Aval*Bvals[j];
1641 C_cols[C_row_i_length] = col;
1642 c_index[col] = C_row_i_length;
1646 C_row_i[c_index[col]] += Aval*Bvals[j];
1651 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1652 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1653 C_cols[ii] = bcols[C_cols[ii]];
1654 combined_index[ii] = C_cols[ii];
1655 combined_values[ii] = C_row_i[ii];
1657 last_index = C_row_i_length;
1663 C_row_i_length = OrdinalTraits<size_t>::zero();
1665 for (k = Arowptr[i]; k < Arowptr[i+1]; ++k) {
1666 LocalOrdinal Ak = Acol2Brow[Acolind[k]];
1667 const Scalar Aval = Avals[k];
1668 if (Aval == STS::zero())
1671 if (Ak!=LO_INVALID)
continue;
1673 Ak = Acol2Irow[Acolind[k]];
1674 for (j = Irowptr[Ak]; j < Irowptr[Ak+1]; ++j) {
1675 LocalOrdinal col = Icolind[j];
1678 if (c_index[col] == OrdinalTraits<size_t>::invalid()) {
1681 C_row_i[C_row_i_length] = Aval*Ivals[j];
1682 C_cols[C_row_i_length] = col;
1683 c_index[col] = C_row_i_length;
1688 C_row_i[c_index[col]] += Aval*Ivals[j];
1693 for (
size_t ii = 0; ii < C_row_i_length; ii++) {
1694 c_index[C_cols[ii]] = OrdinalTraits<size_t>::invalid();
1695 C_cols[ii] = bcols_import[C_cols[ii]];
1696 combined_index[last_index] = C_cols[ii];
1697 combined_values[last_index] = C_row_i[ii];
1704 C.sumIntoGlobalValues(
1706 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1707 combined_values.view(OrdinalTraits<size_t>::zero(), last_index))
1709 C.insertGlobalValues(
1711 combined_index.view(OrdinalTraits<size_t>::zero(), last_index),
1712 combined_values.view(OrdinalTraits<size_t>::zero(), last_index));
1718 template<
class Scalar,
1720 class GlobalOrdinal,
1723 typedef typename Teuchos::Array<Teuchos::ArrayView<const LocalOrdinal> >::size_type local_length_size;
1724 Mview.maxNumRowEntries = Teuchos::OrdinalTraits<local_length_size>::zero();
1726 if (Mview.indices.size() > Teuchos::OrdinalTraits<local_length_size>::zero()) {
1727 Mview.maxNumRowEntries = Mview.indices[0].size();
1729 for (local_length_size i = 1; i < Mview.indices.size(); ++i)
1730 if (Mview.indices[i].size() > Mview.maxNumRowEntries)
1731 Mview.maxNumRowEntries = Mview.indices[i].size();
1736 template<
class CrsMatrixType>
1737 size_t C_estimate_nnz(CrsMatrixType & A, CrsMatrixType &B){
1739 size_t Aest = 100, Best=100;
1740 if (A.getNodeNumEntries() > 0)
1741 Aest = (A.getNodeNumRows() > 0)? A.getNodeNumEntries()/A.getNodeNumRows() : 100;
1742 if (B.getNodeNumEntries() > 0)
1743 Best = (B.getNodeNumRows() > 0) ? B.getNodeNumEntries()/B.getNodeNumRows() : 100;
1745 size_t nnzperrow = (size_t)(sqrt((
double)Aest) + sqrt((
double)Best) - 1);
1746 nnzperrow *= nnzperrow;
1748 return (
size_t)(A.getNodeNumRows()*nnzperrow*0.75 + 100);
1758 template<
class Scalar,
1760 class GlobalOrdinal,
1762 void mult_A_B_newmatrix(
1766 const std::string& label,
1767 const Teuchos::RCP<Teuchos::ParameterList>& params)
1769 using Teuchos::Array;
1770 using Teuchos::ArrayRCP;
1771 using Teuchos::ArrayView;
1776 typedef LocalOrdinal LO;
1777 typedef GlobalOrdinal GO;
1783 typedef typename map_type::local_map_type local_map_type;
1785 typedef typename KCRS::StaticCrsGraphType graph_t;
1786 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1787 typedef typename NO::execution_space execution_space;
1788 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
1789 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
1791 #ifdef HAVE_TPETRA_MMM_TIMINGS 1792 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1793 using Teuchos::TimeMonitor;
1794 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*(TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM M5 Cmap")))));
1796 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1799 RCP<const import_type> Cimport;
1800 RCP<const map_type> Ccolmap;
1801 RCP<const import_type> Bimport = Bview.
origMatrix->getGraph()->getImporter();
1802 RCP<const import_type> Iimport = Bview.
importMatrix.is_null() ?
1803 Teuchos::null : Bview.
importMatrix->getGraph()->getImporter();
1804 local_map_type Acolmap_local = Aview.
colMap->getLocalMap();
1805 local_map_type Browmap_local = Bview.
origMatrix->getRowMap()->getLocalMap();
1806 local_map_type Irowmap_local;
if(!Bview.
importMatrix.is_null()) Irowmap_local = Bview.
importMatrix->getRowMap()->getLocalMap();
1807 local_map_type Bcolmap_local = Bview.
origMatrix->getColMap()->getLocalMap();
1808 local_map_type Icolmap_local;
if(!Bview.
importMatrix.is_null()) Icolmap_local = Bview.
importMatrix->getColMap()->getLocalMap();
1815 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.
colMap->getNodeNumElements()), Icol2Ccol;
1821 const LO colMapSize =
static_cast<LO
>(Bview.
colMap->getNodeNumElements());
1823 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_fill",
1824 Kokkos::RangePolicy<execution_space, LO>(0, colMapSize),
1825 KOKKOS_LAMBDA(
const LO i) {
1838 if (!Bimport.is_null() && !Iimport.is_null()) {
1839 Cimport = Bimport->setUnion(*Iimport);
1841 else if (!Bimport.is_null() && Iimport.is_null()) {
1842 Cimport = Bimport->setUnion();
1844 else if (Bimport.is_null() && !Iimport.is_null()) {
1845 Cimport = Iimport->setUnion();
1848 throw std::runtime_error(
"TpetraExt::MMM status of matrix importers is nonsensical");
1850 Ccolmap = Cimport->getTargetMap();
1855 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
1856 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
1863 Kokkos::resize(Icol2Ccol,Bview.
importMatrix->getColMap()->getNodeNumElements());
1864 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
1865 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Bcol2Ccol_getGlobalElement",range_type(0,Bview.
origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
1866 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
1868 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::Icol2Ccol_getGlobalElement",range_type(0,Bview.
importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
1869 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
1896 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.
colMap->getNodeNumElements());
1897 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.
colMap->getNodeNumElements());
1899 Kokkos::parallel_for(
"Tpetra::mult_A_B_newmatrix::construct_tables",range_type(Aview.
colMap->getMinLocalIndex(), Aview.
colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
1900 GO aidx = Acolmap_local.getGlobalElement(i);
1901 LO B_LID = Browmap_local.getLocalElement(aidx);
1902 if (B_LID != LO_INVALID) {
1903 targetMapToOrigRow(i) = B_LID;
1904 targetMapToImportRow(i) = LO_INVALID;
1906 LO I_LID = Irowmap_local.getLocalElement(aidx);
1907 targetMapToOrigRow(i) = LO_INVALID;
1908 targetMapToImportRow(i) = I_LID;
1913 #ifdef HAVE_TPETRA_MMM_TIMINGS 1919 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_newmatrix_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
1926 template<
class Scalar,
1928 class GlobalOrdinal,
1930 class LocalOrdinalViewType>
1933 const LocalOrdinalViewType & targetMapToOrigRow,
1934 const LocalOrdinalViewType & targetMapToImportRow,
1935 const LocalOrdinalViewType & Bcol2Ccol,
1936 const LocalOrdinalViewType & Icol2Ccol,
1939 const std::string& label,
1940 const Teuchos::RCP<Teuchos::ParameterList>& params) {
1941 #ifdef HAVE_TPETRA_MMM_TIMINGS 1942 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
1943 using Teuchos::TimeMonitor;
1944 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore"))));
1945 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
1948 using Teuchos::Array;
1949 using Teuchos::ArrayRCP;
1950 using Teuchos::ArrayView;
1956 typedef typename KCRS::StaticCrsGraphType graph_t;
1957 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
1958 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
1959 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
1960 typedef typename KCRS::values_type::non_const_type scalar_view_t;
1963 typedef LocalOrdinal LO;
1964 typedef GlobalOrdinal GO;
1967 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1968 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
1969 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
1972 RCP<const map_type> Ccolmap = C.
getColMap();
1973 size_t m = Aview.
origMatrix->getNodeNumRows();
1974 size_t n = Ccolmap->getNodeNumElements();
1975 size_t b_max_nnz_per_row = Bview.
origMatrix->getNodeMaxNumRowEntries();
1978 const KCRS & Amat = Aview.
origMatrix->getLocalMatrix();
1979 const KCRS & Bmat = Bview.
origMatrix->getLocalMatrix();
1981 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
1982 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
1983 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
1985 c_lno_view_t Irowptr;
1986 lno_nnz_view_t Icolind;
1987 scalar_view_t Ivals;
1989 Irowptr = Bview.
importMatrix->getLocalMatrix().graph.row_map;
1990 Icolind = Bview.
importMatrix->getLocalMatrix().graph.entries;
1992 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.
importMatrix->getNodeMaxNumRowEntries());
1995 #ifdef HAVE_TPETRA_MMM_TIMINGS 1996 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2008 lno_view_t Crowptr(
"Crowptr",m+1);
2009 lno_nnz_view_t Ccolind(
"Ccolind",CSR_alloc);
2010 scalar_view_t Cvals(
"Cvals",CSR_alloc);
2020 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2021 std::vector<size_t> c_status(n, ST_INVALID);
2031 size_t CSR_ip = 0, OLD_ip = 0;
2032 for (
size_t i = 0; i < m; i++) {
2035 Crowptr[i] = CSR_ip;
2038 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2039 LO Aik = Acolind[k];
2040 const SC Aval = Avals[k];
2041 if (Aval == SC_ZERO)
2044 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2051 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2054 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2055 LO Bkj = Bcolind[j];
2056 LO Cij = Bcol2Ccol[Bkj];
2058 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2060 c_status[Cij] = CSR_ip;
2061 Ccolind[CSR_ip] = Cij;
2062 Cvals[CSR_ip] = Aval*Bvals[j];
2066 Cvals[c_status[Cij]] += Aval*Bvals[j];
2077 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2078 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2079 LO Ikj = Icolind[j];
2080 LO Cij = Icol2Ccol[Ikj];
2082 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip){
2084 c_status[Cij] = CSR_ip;
2085 Ccolind[CSR_ip] = Cij;
2086 Cvals[CSR_ip] = Aval*Ivals[j];
2089 Cvals[c_status[Cij]] += Aval*Ivals[j];
2096 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1])*b_max_nnz_per_row) > CSR_alloc) {
2098 Kokkos::resize(Ccolind,CSR_alloc);
2099 Kokkos::resize(Cvals,CSR_alloc);
2104 Crowptr[m] = CSR_ip;
2107 Kokkos::resize(Ccolind,CSR_ip);
2108 Kokkos::resize(Cvals,CSR_ip);
2110 #ifdef HAVE_TPETRA_MMM_TIMINGS 2111 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix Final Sort"))));
2112 MM2 = Teuchos::null;
2116 if (params.is_null() || params->get(
"sort entries",
true))
2117 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2121 #ifdef HAVE_TPETRA_MMM_TIMINGS 2122 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix ESFC"))));
2133 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2134 labelList->set(
"Timer Label",label);
2135 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2136 RCP<const Export<LO,GO,NO> > dummyExport;
2137 C.
expertStaticFillComplete(Bview. origMatrix->getDomainMap(), Aview. origMatrix->getRangeMap(), Cimport,dummyExport,labelList);
2146 template<
class Scalar,
2148 class GlobalOrdinal,
2150 void mult_A_B_reuse(
2154 const std::string& label,
2155 const Teuchos::RCP<Teuchos::ParameterList>& params)
2157 using Teuchos::Array;
2158 using Teuchos::ArrayRCP;
2159 using Teuchos::ArrayView;
2164 typedef LocalOrdinal LO;
2165 typedef GlobalOrdinal GO;
2171 typedef typename map_type::local_map_type local_map_type;
2173 typedef typename KCRS::StaticCrsGraphType graph_t;
2174 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2175 typedef typename NO::execution_space execution_space;
2176 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2177 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2179 #ifdef HAVE_TPETRA_MMM_TIMINGS 2180 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2181 using Teuchos::TimeMonitor;
2182 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse Cmap"))));
2184 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2187 RCP<const import_type> Cimport = C.
getGraph()->getImporter();
2188 RCP<const map_type> Ccolmap = C.
getColMap();
2189 local_map_type Acolmap_local = Aview.
colMap->getLocalMap();
2190 local_map_type Browmap_local = Bview.
origMatrix->getRowMap()->getLocalMap();
2191 local_map_type Irowmap_local;
if(!Bview.
importMatrix.is_null()) Irowmap_local = Bview.
importMatrix->getRowMap()->getLocalMap();
2192 local_map_type Bcolmap_local = Bview.
origMatrix->getColMap()->getLocalMap();
2193 local_map_type Icolmap_local;
if(!Bview.
importMatrix.is_null()) Icolmap_local = Bview.
importMatrix->getColMap()->getLocalMap();
2194 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2197 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.
colMap->getNodeNumElements()), Icol2Ccol;
2201 Kokkos::parallel_for(range_type(0,Bview.
origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2202 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2206 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
2207 std::runtime_error,
"Tpetra::MMM: Import setUnion messed with the DomainMap in an unfortunate way");
2209 Kokkos::resize(Icol2Ccol,Bview.
importMatrix->getColMap()->getNodeNumElements());
2210 Kokkos::parallel_for(range_type(0,Bview.
importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2211 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2217 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.
colMap->getNodeNumElements());
2218 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.
colMap->getNodeNumElements());
2219 Kokkos::parallel_for(range_type(Aview.
colMap->getMinLocalIndex(), Aview.
colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2220 GO aidx = Acolmap_local.getGlobalElement(i);
2221 LO B_LID = Browmap_local.getLocalElement(aidx);
2222 if (B_LID != LO_INVALID) {
2223 targetMapToOrigRow(i) = B_LID;
2224 targetMapToImportRow(i) = LO_INVALID;
2226 LO I_LID = Irowmap_local.getLocalElement(aidx);
2227 targetMapToOrigRow(i) = LO_INVALID;
2228 targetMapToImportRow(i) = I_LID;
2233 #ifdef HAVE_TPETRA_MMM_TIMINGS 2239 KernelWrappers<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::mult_A_B_reuse_kernel_wrapper(Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2243 template<
class Scalar,
2245 class GlobalOrdinal,
2247 class LocalOrdinalViewType>
2250 const LocalOrdinalViewType & targetMapToOrigRow,
2251 const LocalOrdinalViewType & targetMapToImportRow,
2252 const LocalOrdinalViewType & Bcol2Ccol,
2253 const LocalOrdinalViewType & Icol2Ccol,
2256 const std::string& label,
2257 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2258 #ifdef HAVE_TPETRA_MMM_TIMINGS 2259 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2260 using Teuchos::TimeMonitor;
2261 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse SerialCore"))));
2262 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2270 typedef typename KCRS::StaticCrsGraphType graph_t;
2271 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2272 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2273 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2276 typedef LocalOrdinal LO;
2277 typedef GlobalOrdinal GO;
2280 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2281 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2282 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2285 RCP<const map_type> Ccolmap = C.
getColMap();
2286 size_t m = Aview.
origMatrix->getNodeNumRows();
2287 size_t n = Ccolmap->getNodeNumElements();
2290 const KCRS & Amat = Aview.
origMatrix->getLocalMatrix();
2291 const KCRS & Bmat = Bview.
origMatrix->getLocalMatrix();
2294 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2295 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2296 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2297 scalar_view_t Cvals = Cmat.values;
2299 c_lno_view_t Irowptr;
2300 lno_nnz_view_t Icolind;
2301 scalar_view_t Ivals;
2303 Irowptr = Bview.
importMatrix->getLocalMatrix().graph.row_map;
2304 Icolind = Bview.
importMatrix->getLocalMatrix().graph.entries;
2308 #ifdef HAVE_TPETRA_MMM_TIMINGS 2309 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Newmatrix SerialCore - Compare"))));
2321 std::vector<size_t> c_status(n, ST_INVALID);
2324 size_t CSR_ip = 0, OLD_ip = 0;
2325 for (
size_t i = 0; i < m; i++) {
2328 OLD_ip = Crowptr[i];
2329 CSR_ip = Crowptr[i+1];
2330 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2331 c_status[Ccolind[k]] = k;
2337 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2338 LO Aik = Acolind[k];
2339 const SC Aval = Avals[k];
2340 if (Aval == SC_ZERO)
2343 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2345 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2347 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2348 LO Bkj = Bcolind[j];
2349 LO Cij = Bcol2Ccol[Bkj];
2351 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2352 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2353 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2355 Cvals[c_status[Cij]] += Aval * Bvals[j];
2360 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2361 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2362 LO Ikj = Icolind[j];
2363 LO Cij = Icol2Ccol[Ikj];
2365 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2366 std::runtime_error,
"Trying to insert a new entry (" << i <<
"," << Cij <<
") into a static graph " <<
2367 "(c_status = " << c_status[Cij] <<
" of [" << OLD_ip <<
"," << CSR_ip <<
"))");
2369 Cvals[c_status[Cij]] += Aval * Ivals[j];
2375 #ifdef HAVE_TPETRA_MMM_TIMINGS 2377 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM Reuse ESFC"))));
2386 template<
class Scalar,
2388 class GlobalOrdinal,
2390 void jacobi_A_B_newmatrix(
2396 const std::string& label,
2397 const Teuchos::RCP<Teuchos::ParameterList>& params)
2399 using Teuchos::Array;
2400 using Teuchos::ArrayRCP;
2401 using Teuchos::ArrayView;
2405 typedef LocalOrdinal LO;
2406 typedef GlobalOrdinal GO;
2411 typedef typename map_type::local_map_type local_map_type;
2415 typedef typename KCRS::StaticCrsGraphType graph_t;
2416 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2417 typedef typename NO::execution_space execution_space;
2418 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2419 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2422 #ifdef HAVE_TPETRA_MMM_TIMINGS 2423 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2424 using Teuchos::TimeMonitor;
2425 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi M5 Cmap"))));
2427 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2430 RCP<const import_type> Cimport;
2431 RCP<const map_type> Ccolmap;
2432 RCP<const import_type> Bimport = Bview.
origMatrix->getGraph()->getImporter();
2433 RCP<const import_type> Iimport = Bview.
importMatrix.is_null() ?
2434 Teuchos::null : Bview.
importMatrix->getGraph()->getImporter();
2435 local_map_type Acolmap_local = Aview.
colMap->getLocalMap();
2436 local_map_type Browmap_local = Bview.
origMatrix->getRowMap()->getLocalMap();
2437 local_map_type Irowmap_local;
if(!Bview.
importMatrix.is_null()) Irowmap_local = Bview.
importMatrix->getRowMap()->getLocalMap();
2438 local_map_type Bcolmap_local = Bview.
origMatrix->getColMap()->getLocalMap();
2439 local_map_type Icolmap_local;
if(!Bview.
importMatrix.is_null()) Icolmap_local = Bview.
importMatrix->getColMap()->getLocalMap();
2446 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.
colMap->getNodeNumElements()), Icol2Ccol;
2455 Kokkos::RangePolicy<execution_space, LO> range (0, static_cast<LO> (Bview.
colMap->getNodeNumElements ()));
2456 Kokkos::parallel_for (range, KOKKOS_LAMBDA (
const size_t i) {
2457 Bcol2Ccol(i) =
static_cast<LO
> (i);
2468 if (!Bimport.is_null() && !Iimport.is_null()){
2469 Cimport = Bimport->setUnion(*Iimport);
2470 Ccolmap = Cimport->getTargetMap();
2472 }
else if (!Bimport.is_null() && Iimport.is_null()) {
2473 Cimport = Bimport->setUnion();
2475 }
else if(Bimport.is_null() && !Iimport.is_null()) {
2476 Cimport = Iimport->setUnion();
2479 throw std::runtime_error(
"TpetraExt::Jacobi status of matrix importers is nonsensical");
2481 Ccolmap = Cimport->getTargetMap();
2483 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
2484 std::runtime_error,
"Tpetra:Jacobi Import setUnion messed with the DomainMap in an unfortunate way");
2491 Kokkos::resize(Icol2Ccol,Bview.
importMatrix->getColMap()->getNodeNumElements());
2492 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2493 Kokkos::parallel_for(range_type(0,Bview.
origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2494 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2496 Kokkos::parallel_for(range_type(0,Bview.
importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2497 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2524 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.
colMap->getNodeNumElements());
2525 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.
colMap->getNodeNumElements());
2526 Kokkos::parallel_for(range_type(Aview.
colMap->getMinLocalIndex(), Aview.
colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2527 GO aidx = Acolmap_local.getGlobalElement(i);
2528 LO B_LID = Browmap_local.getLocalElement(aidx);
2529 if (B_LID != LO_INVALID) {
2530 targetMapToOrigRow(i) = B_LID;
2531 targetMapToImportRow(i) = LO_INVALID;
2533 LO I_LID = Irowmap_local.getLocalElement(aidx);
2534 targetMapToOrigRow(i) = LO_INVALID;
2535 targetMapToImportRow(i) = I_LID;
2542 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_newmatrix_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2551 template<
class Scalar,
2553 class GlobalOrdinal,
2555 class LocalOrdinalViewType>
2556 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_newmatrix_kernel_wrapper(Scalar omega,
2560 const LocalOrdinalViewType & targetMapToOrigRow,
2561 const LocalOrdinalViewType & targetMapToImportRow,
2562 const LocalOrdinalViewType & Bcol2Ccol,
2563 const LocalOrdinalViewType & Icol2Ccol,
2566 const std::string& label,
2567 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2569 #ifdef HAVE_TPETRA_MMM_TIMINGS 2570 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2571 using Teuchos::TimeMonitor;
2572 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Nemwmatrix SerialCore"))));
2576 using Teuchos::Array;
2577 using Teuchos::ArrayRCP;
2578 using Teuchos::ArrayView;
2584 typedef typename KCRS::StaticCrsGraphType graph_t;
2585 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2586 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2587 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2588 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2591 typedef typename scalar_view_t::memory_space scalar_memory_space;
2594 typedef LocalOrdinal LO;
2595 typedef GlobalOrdinal GO;
2599 size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2600 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2603 RCP<const map_type> Ccolmap = C.
getColMap();
2604 size_t m = Aview.
origMatrix->getNodeNumRows();
2605 size_t n = Ccolmap->getNodeNumElements();
2606 size_t b_max_nnz_per_row = Bview.
origMatrix->getNodeMaxNumRowEntries();
2609 const KCRS & Amat = Aview.
origMatrix->getLocalMatrix();
2610 const KCRS & Bmat = Bview.
origMatrix->getLocalMatrix();
2612 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map;
2613 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries;
2614 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2616 c_lno_view_t Irowptr;
2617 lno_nnz_view_t Icolind;
2618 scalar_view_t Ivals;
2620 Irowptr = Bview.
importMatrix->getLocalMatrix().graph.row_map;
2621 Icolind = Bview.
importMatrix->getLocalMatrix().graph.entries;
2623 b_max_nnz_per_row = std::max(b_max_nnz_per_row,Bview.
importMatrix->getNodeMaxNumRowEntries());
2627 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
2635 size_t INVALID = Teuchos::OrdinalTraits<size_t>::invalid();
2636 Array<size_t> c_status(n, ST_INVALID);
2646 lno_view_t Crowptr(
"Crowptr",m+1);
2647 lno_nnz_view_t Ccolind(
"Ccolind",CSR_alloc);
2648 scalar_view_t Cvals(
"Cvals",CSR_alloc);
2649 size_t CSR_ip = 0, OLD_ip = 0;
2651 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2665 for (
size_t i = 0; i < m; i++) {
2668 Crowptr[i] = CSR_ip;
2669 SC minusOmegaDval = -omega*Dvals(i,0);
2672 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2673 Scalar Bval = Bvals[j];
2674 if (Bval == SC_ZERO)
2676 LO Bij = Bcolind[j];
2677 LO Cij = Bcol2Ccol[Bij];
2680 c_status[Cij] = CSR_ip;
2681 Ccolind[CSR_ip] = Cij;
2682 Cvals[CSR_ip] = Bvals[j];
2687 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
2688 LO Aik = Acolind[k];
2689 const SC Aval = Avals[k];
2690 if (Aval == SC_ZERO)
2693 if (targetMapToOrigRow[Aik] != LO_INVALID) {
2695 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
2697 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
2698 LO Bkj = Bcolind[j];
2699 LO Cij = Bcol2Ccol[Bkj];
2701 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2703 c_status[Cij] = CSR_ip;
2704 Ccolind[CSR_ip] = Cij;
2705 Cvals[CSR_ip] = minusOmegaDval* Aval * Bvals[j];
2709 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Bvals[j];
2715 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
2716 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
2717 LO Ikj = Icolind[j];
2718 LO Cij = Icol2Ccol[Ikj];
2720 if (c_status[Cij] == INVALID || c_status[Cij] < OLD_ip) {
2722 c_status[Cij] = CSR_ip;
2723 Ccolind[CSR_ip] = Cij;
2724 Cvals[CSR_ip] = minusOmegaDval* Aval * Ivals[j];
2727 Cvals[c_status[Cij]] += minusOmegaDval* Aval * Ivals[j];
2734 if (i+1 < m && CSR_ip + std::min(n,(Arowptr[i+2]-Arowptr[i+1]+1)*b_max_nnz_per_row) > CSR_alloc) {
2736 Kokkos::resize(Ccolind,CSR_alloc);
2737 Kokkos::resize(Cvals,CSR_alloc);
2741 Crowptr[m] = CSR_ip;
2744 Kokkos::resize(Ccolind,CSR_ip);
2745 Kokkos::resize(Cvals,CSR_ip);
2749 #ifdef HAVE_TPETRA_MMM_TIMINGS 2750 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix Final Sort"))));
2764 if (params.is_null() || params->get(
"sort entries",
true))
2765 Import_Util::sortCrsEntries(Crowptr,Ccolind, Cvals);
2768 #ifdef HAVE_TPETRA_MMM_TIMINGS 2769 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Newmatrix ESFC"))));
2780 RCP<Teuchos::ParameterList> labelList = rcp(
new Teuchos::ParameterList);
2781 labelList->set(
"Timer Label",label);
2782 if(!params.is_null()) labelList->set(
"compute global constants",params->get(
"compute global constants",
true));
2783 RCP<const Export<LO,GO,NO> > dummyExport;
2790 template<
class Scalar,
2792 class GlobalOrdinal,
2794 void jacobi_A_B_reuse(
2800 const std::string& label,
2801 const Teuchos::RCP<Teuchos::ParameterList>& params)
2803 using Teuchos::Array;
2804 using Teuchos::ArrayRCP;
2805 using Teuchos::ArrayView;
2809 typedef LocalOrdinal LO;
2810 typedef GlobalOrdinal GO;
2817 typedef typename map_type::local_map_type local_map_type;
2819 typedef typename KCRS::StaticCrsGraphType graph_t;
2820 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
2821 typedef typename NO::execution_space execution_space;
2822 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
2823 typedef Kokkos::View<LO*, typename lno_view_t::array_layout, typename lno_view_t::device_type> lo_view_t;
2825 #ifdef HAVE_TPETRA_MMM_TIMINGS 2826 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2827 using Teuchos::TimeMonitor;
2828 RCP<TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse Cmap"))));
2830 LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2833 RCP<const import_type> Cimport = C.
getGraph()->getImporter();
2834 RCP<const map_type> Ccolmap = C.
getColMap();
2835 local_map_type Acolmap_local = Aview.
colMap->getLocalMap();
2836 local_map_type Browmap_local = Bview.
origMatrix->getRowMap()->getLocalMap();
2837 local_map_type Irowmap_local;
if(!Bview.
importMatrix.is_null()) Irowmap_local = Bview.
importMatrix->getRowMap()->getLocalMap();
2838 local_map_type Bcolmap_local = Bview.
origMatrix->getColMap()->getLocalMap();
2839 local_map_type Icolmap_local;
if(!Bview.
importMatrix.is_null()) Icolmap_local = Bview.
importMatrix->getColMap()->getLocalMap();
2840 local_map_type Ccolmap_local = Ccolmap->getLocalMap();
2843 lo_view_t Bcol2Ccol(Kokkos::ViewAllocateWithoutInitializing(
"Bcol2Ccol"),Bview.
colMap->getNodeNumElements()), Icol2Ccol;
2847 Kokkos::parallel_for(range_type(0,Bview.
origMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2848 Bcol2Ccol(i) = Ccolmap_local.getLocalElement(Bcolmap_local.getGlobalElement(i));
2852 TEUCHOS_TEST_FOR_EXCEPTION(!Cimport->getSourceMap()->isSameAs(*Bview.
origMatrix->getDomainMap()),
2853 std::runtime_error,
"Tpetra::Jacobi: Import setUnion messed with the DomainMap in an unfortunate way");
2855 Kokkos::resize(Icol2Ccol,Bview.
importMatrix->getColMap()->getNodeNumElements());
2856 Kokkos::parallel_for(range_type(0,Bview.
importMatrix->getColMap()->getNodeNumElements()),KOKKOS_LAMBDA(
const LO i) {
2857 Icol2Ccol(i) = Ccolmap_local.getLocalElement(Icolmap_local.getGlobalElement(i));
2863 lo_view_t targetMapToOrigRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToOrigRow"),Aview.
colMap->getNodeNumElements());
2864 lo_view_t targetMapToImportRow(Kokkos::ViewAllocateWithoutInitializing(
"targetMapToImportRow"),Aview.
colMap->getNodeNumElements());
2865 Kokkos::parallel_for(range_type(Aview.
colMap->getMinLocalIndex(), Aview.
colMap->getMaxLocalIndex()+1),KOKKOS_LAMBDA(
const LO i) {
2866 GO aidx = Acolmap_local.getGlobalElement(i);
2867 LO B_LID = Browmap_local.getLocalElement(aidx);
2868 if (B_LID != LO_INVALID) {
2869 targetMapToOrigRow(i) = B_LID;
2870 targetMapToImportRow(i) = LO_INVALID;
2872 LO I_LID = Irowmap_local.getLocalElement(aidx);
2873 targetMapToOrigRow(i) = LO_INVALID;
2874 targetMapToImportRow(i) = I_LID;
2879 #ifdef HAVE_TPETRA_MMM_TIMINGS 2885 KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,lo_view_t>::jacobi_A_B_reuse_kernel_wrapper(omega,Dinv,Aview,Bview,targetMapToOrigRow,targetMapToImportRow,Bcol2Ccol,Icol2Ccol,C,Cimport,label,params);
2891 template<
class Scalar,
2893 class GlobalOrdinal,
2895 class LocalOrdinalViewType>
2896 void KernelWrappers2<Scalar,LocalOrdinal,GlobalOrdinal,Node,LocalOrdinalViewType>::jacobi_A_B_reuse_kernel_wrapper(Scalar omega,
2900 const LocalOrdinalViewType & targetMapToOrigRow,
2901 const LocalOrdinalViewType & targetMapToImportRow,
2902 const LocalOrdinalViewType & Bcol2Ccol,
2903 const LocalOrdinalViewType & Icol2Ccol,
2906 const std::string& label,
2907 const Teuchos::RCP<Teuchos::ParameterList>& params) {
2908 #ifdef HAVE_TPETRA_MMM_TIMINGS 2909 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
2910 using Teuchos::TimeMonitor;
2911 Teuchos::RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore"))));
2912 Teuchos::RCP<Teuchos::TimeMonitor> MM2;
2919 typedef typename KCRS::StaticCrsGraphType graph_t;
2920 typedef typename graph_t::row_map_type::const_type c_lno_view_t;
2921 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
2922 typedef typename KCRS::values_type::non_const_type scalar_view_t;
2923 typedef typename scalar_view_t::memory_space scalar_memory_space;
2926 typedef LocalOrdinal LO;
2927 typedef GlobalOrdinal GO;
2930 const size_t ST_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2931 const LO LO_INVALID = Teuchos::OrdinalTraits<LO>::invalid();
2932 const SC SC_ZERO = Teuchos::ScalarTraits<Scalar>::zero();
2935 RCP<const map_type> Ccolmap = C.
getColMap();
2936 size_t m = Aview.
origMatrix->getNodeNumRows();
2937 size_t n = Ccolmap->getNodeNumElements();
2940 const KCRS & Amat = Aview.
origMatrix->getLocalMatrix();
2941 const KCRS & Bmat = Bview.
origMatrix->getLocalMatrix();
2944 c_lno_view_t Arowptr = Amat.graph.row_map, Browptr = Bmat.graph.row_map, Crowptr = Cmat.graph.row_map;
2945 const lno_nnz_view_t Acolind = Amat.graph.entries, Bcolind = Bmat.graph.entries, Ccolind = Cmat.graph.entries;
2946 const scalar_view_t Avals = Amat.values, Bvals = Bmat.values;
2947 scalar_view_t Cvals = Cmat.values;
2949 c_lno_view_t Irowptr;
2950 lno_nnz_view_t Icolind;
2951 scalar_view_t Ivals;
2953 Irowptr = Bview.
importMatrix->getLocalMatrix().graph.row_map;
2954 Icolind = Bview.
importMatrix->getLocalMatrix().graph.entries;
2959 auto Dvals = Dinv.template getLocalView<scalar_memory_space>();
2961 #ifdef HAVE_TPETRA_MMM_TIMINGS 2962 MM2 = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse SerialCore - Compare"))));
2970 std::vector<size_t> c_status(n, ST_INVALID);
2973 size_t CSR_ip = 0, OLD_ip = 0;
2974 for (
size_t i = 0; i < m; i++) {
2978 OLD_ip = Crowptr[i];
2979 CSR_ip = Crowptr[i+1];
2980 for (
size_t k = OLD_ip; k < CSR_ip; k++) {
2981 c_status[Ccolind[k]] = k;
2987 SC minusOmegaDval = -omega*Dvals(i,0);
2990 for (
size_t j = Browptr[i]; j < Browptr[i+1]; j++) {
2991 Scalar Bval = Bvals[j];
2992 if (Bval == SC_ZERO)
2994 LO Bij = Bcolind[j];
2995 LO Cij = Bcol2Ccol[Bij];
2997 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
2998 std::runtime_error,
"Trying to insert a new entry into a static graph");
3000 Cvals[c_status[Cij]] = Bvals[j];
3004 for (
size_t k = Arowptr[i]; k < Arowptr[i+1]; k++) {
3005 LO Aik = Acolind[k];
3006 const SC Aval = Avals[k];
3007 if (Aval == SC_ZERO)
3010 if (targetMapToOrigRow[Aik] != LO_INVALID) {
3012 size_t Bk =
static_cast<size_t> (targetMapToOrigRow[Aik]);
3014 for (
size_t j = Browptr[Bk]; j < Browptr[Bk+1]; ++j) {
3015 LO Bkj = Bcolind[j];
3016 LO Cij = Bcol2Ccol[Bkj];
3018 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3019 std::runtime_error,
"Trying to insert a new entry into a static graph");
3021 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Bvals[j];
3026 size_t Ik =
static_cast<size_t> (targetMapToImportRow[Aik]);
3027 for (
size_t j = Irowptr[Ik]; j < Irowptr[Ik+1]; ++j) {
3028 LO Ikj = Icolind[j];
3029 LO Cij = Icol2Ccol[Ikj];
3031 TEUCHOS_TEST_FOR_EXCEPTION(c_status[Cij] < OLD_ip || c_status[Cij] >= CSR_ip,
3032 std::runtime_error,
"Trying to insert a new entry into a static graph");
3034 Cvals[c_status[Cij]] += minusOmegaDval * Aval * Ivals[j];
3040 #ifdef HAVE_TPETRA_MMM_TIMINGS 3042 MM = rcp(
new TimeMonitor (*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"Jacobi Reuse ESFC"))));
3051 template<
class Scalar,
3053 class GlobalOrdinal,
3055 void import_and_extract_views(
3060 bool userAssertsThereAreNoRemotes,
3061 const std::string& label,
3062 const Teuchos::RCP<Teuchos::ParameterList>& params)
3064 using Teuchos::Array;
3065 using Teuchos::ArrayView;
3068 using Teuchos::null;
3071 typedef LocalOrdinal LO;
3072 typedef GlobalOrdinal GO;
3079 #ifdef HAVE_TPETRA_MMM_TIMINGS 3080 std::string prefix_mmm = std::string(
"TpetraExt ") + label + std::string(
": ");
3081 using Teuchos::TimeMonitor;
3082 RCP<Teuchos::TimeMonitor> MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Alloc"))));
3090 Aview.deleteContents();
3094 Aview.
rowMap = targetMap;
3100 if (userAssertsThereAreNoRemotes)
3103 RCP<const import_type> importer;
3104 if (params != null && params->isParameter(
"importer")) {
3105 importer = params->get<RCP<const import_type> >(
"importer");
3108 #ifdef HAVE_TPETRA_MMM_TIMINGS 3109 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X RemoteMap"))));
3114 RCP<const map_type> rowMap = A.
getRowMap(), remoteRowMap;
3115 size_t numRemote = 0;
3117 if (!prototypeImporter.is_null() &&
3118 prototypeImporter->getSourceMap()->isSameAs(*rowMap) &&
3119 prototypeImporter->getTargetMap()->isSameAs(*targetMap)) {
3121 ArrayView<const LO> remoteLIDs = prototypeImporter->getRemoteLIDs();
3122 numRemote = prototypeImporter->getNumRemoteIDs();
3124 Array<GO> remoteRows(numRemote);
3125 for (
size_t i = 0; i < numRemote; i++)
3126 remoteRows[i] = targetMap->getGlobalElement(remoteLIDs[i]);
3128 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3129 rowMap->getIndexBase(), rowMap->getComm()));
3132 }
else if (prototypeImporter.is_null()) {
3134 ArrayView<const GO> rows = targetMap->getNodeElementList();
3135 size_t numRows = targetMap->getNodeNumElements();
3137 Array<GO> remoteRows(numRows);
3138 for(
size_t i = 0; i < numRows; ++i) {
3139 const LO mlid = rowMap->getLocalElement(rows[i]);
3141 if (mlid == Teuchos::OrdinalTraits<LO>::invalid())
3142 remoteRows[numRemote++] = rows[i];
3144 remoteRows.resize(numRemote);
3145 remoteRowMap = rcp(
new map_type(Teuchos::OrdinalTraits<global_size_t>::invalid(), remoteRows(),
3146 rowMap->getIndexBase(), rowMap->getComm()));
3154 const int numProcs = rowMap->getComm()->getSize();
3156 TEUCHOS_TEST_FOR_EXCEPTION(numRemote > 0, std::runtime_error,
3157 "MatrixMatrix::import_and_extract_views ERROR, numProcs < 2 but attempting to import remote matrix rows.");
3166 #ifdef HAVE_TPETRA_MMM_TIMINGS 3167 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Collective-0"))));
3171 Teuchos::reduceAll(*(rowMap->getComm()), Teuchos::REDUCE_MAX, (
global_size_t)numRemote, Teuchos::outArg(globalMaxNumRemote) );
3173 if (globalMaxNumRemote > 0) {
3174 #ifdef HAVE_TPETRA_MMM_TIMINGS 3175 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-2"))));
3179 importer = prototypeImporter->createRemoteOnlyImport(remoteRowMap);
3181 importer = rcp(
new import_type(rowMap, remoteRowMap));
3183 throw std::runtime_error(
"prototypeImporter->SourceMap() does not match A.getRowMap()!");
3187 params->set(
"importer", importer);
3190 if (importer != null) {
3191 #ifdef HAVE_TPETRA_MMM_TIMINGS 3192 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-3"))));
3196 Teuchos::ParameterList labelList;
3197 labelList.set(
"Timer Label", label);
3199 if(!params.is_null())
3200 labelList.set(
"compute global constants", params->get(
"compute global constants",
false));
3201 Aview.
importMatrix = Tpetra::importAndFillCompleteCrsMatrix<crs_matrix_type>(rcpFromRef(A), *importer,
3202 A.
getDomainMap(), importer->getTargetMap(), rcpFromRef(labelList));
3208 sprintf(str,
"import_matrix.%d.dat",count);
3213 #ifdef HAVE_TPETRA_MMM_STATISTICS 3214 printMultiplicationStatistics(importer, label + std::string(
" I&X MMM"));
3218 #ifdef HAVE_TPETRA_MMM_TIMINGS 3219 MM = rcp(
new TimeMonitor(*TimeMonitor::getNewTimer(prefix_mmm + std::string(
"MMM I&X Import-4"))));
3233 template<
class Scalar,
class LocalOrdinal,
class GlobalOrdinal,
class Node,
class LocalOrdinalViewType>
3237 const LocalOrdinalViewType & Acol2Brow,
3238 const LocalOrdinalViewType & Acol2Irow,
3239 const LocalOrdinalViewType & Bcol2Ccol,
3240 const LocalOrdinalViewType & Icol2Ccol,
3241 const size_t mergedNodeNumCols) {
3245 typedef typename KCRS::StaticCrsGraphType graph_t;
3246 typedef typename graph_t::row_map_type::non_const_type lno_view_t;
3247 typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
3248 typedef typename KCRS::values_type::non_const_type scalar_view_t;
3250 const KCRS & Ak = Aview.
origMatrix->getLocalMatrix();
3251 const KCRS & Bk = Bview.
origMatrix->getLocalMatrix();
3258 RCP<const KCRS> Ik_;
3260 const KCRS * Ik = Bview.
importMatrix.is_null() ? 0 : &*Ik_;
3262 if(Ik!=0) Iks = *Ik;
3263 size_t merge_numrows = Ak.numCols();
3264 lno_view_t Mrowptr(
"Mrowptr", merge_numrows + 1);
3266 const LocalOrdinal LO_INVALID =Teuchos::OrdinalTraits<LocalOrdinal>::invalid();
3269 typedef typename Node::execution_space execution_space;
3270 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3271 Kokkos::parallel_scan (
"Tpetra_MatrixMatrix_merge_matrices_buildRowptr", range_type (0, merge_numrows),
3272 KOKKOS_LAMBDA(
const size_t i,
size_t& update,
const bool final) {
3273 if(
final) Mrowptr(i) = update;
3276 if(Acol2Brow(i)!=LO_INVALID)
3277 ct = Bk.graph.row_map(Acol2Brow(i)+1) - Bk.graph.row_map(Acol2Brow(i));
3279 ct = Iks.graph.row_map(Acol2Irow(i)+1) - Iks.graph.row_map(Acol2Irow(i));
3282 if(
final && i+1==merge_numrows)
3283 Mrowptr(i+1)=update;
3287 size_t merge_nnz = ::Tpetra::Details::getEntryOnHost(Mrowptr,merge_numrows);
3288 lno_nnz_view_t Mcolind(
"Mcolind",merge_nnz);
3289 scalar_view_t Mvalues(
"Mvals",merge_nnz);
3292 typedef Kokkos::RangePolicy<execution_space, size_t> range_type;
3293 Kokkos::parallel_for (
"Tpetra_MatrixMatrix_merg_matrices_buildColindValues", range_type (0, merge_numrows),KOKKOS_LAMBDA(
const size_t i) {
3294 if(Acol2Brow(i)!=LO_INVALID) {
3295 size_t row = Acol2Brow(i);
3296 size_t start = Bk.graph.row_map(row);
3297 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3298 Mvalues(j) = Bk.values(j-Mrowptr(i)+start);
3299 Mcolind(j) = Bcol2Ccol(Bk.graph.entries(j-Mrowptr(i)+start));
3303 size_t row = Acol2Irow(i);
3304 size_t start = Iks.graph.row_map(row);
3305 for(
size_t j= Mrowptr(i); j<Mrowptr(i+1); j++) {
3306 Mvalues(j) = Iks.values(j-Mrowptr(i)+start);
3307 Mcolind(j) = Icol2Ccol(Iks.graph.entries(j-Mrowptr(i)+start));
3312 KCRS newmat(
"CrsMatrix",merge_numrows,mergedNodeNumCols,merge_nnz,Mvalues,Mrowptr,Mcolind);
3335 #define TPETRA_MATRIXMATRIX_INSTANT(SCALAR,LO,GO,NODE) \ 3337 void MatrixMatrix::Multiply( \ 3338 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 3340 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 3342 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 3343 bool call_FillComplete_on_result, \ 3344 const std::string & label, \ 3345 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 3348 void MatrixMatrix::Jacobi( \ 3350 const Vector< SCALAR, LO, GO, NODE > & Dinv, \ 3351 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 3352 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 3353 CrsMatrix< SCALAR , LO , GO , NODE >& C, \ 3354 bool call_FillComplete_on_result, \ 3355 const std::string & label, \ 3356 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 3359 void MatrixMatrix::Add( \ 3360 const CrsMatrix< SCALAR , LO , GO , NODE >& A, \ 3363 const CrsMatrix< SCALAR , LO , GO , NODE >& B, \ 3366 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > C); \ 3369 void MatrixMatrix::Add( \ 3370 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \ 3373 CrsMatrix<SCALAR, LO, GO, NODE>& B, \ 3377 Teuchos::RCP<CrsMatrix< SCALAR , LO , GO , NODE > > \ 3378 MatrixMatrix::add<SCALAR, LO, GO, NODE> \ 3379 (const SCALAR & alpha, \ 3380 const bool transposeA, \ 3381 const CrsMatrix<SCALAR, LO, GO, NODE>& A, \ 3382 const SCALAR & beta, \ 3383 const bool transposeB, \ 3384 const CrsMatrix<SCALAR, LO, GO, NODE>& B, \ 3385 const Teuchos::RCP<const Map<LO, GO, NODE> >& domainMap, \ 3386 const Teuchos::RCP<const Map<LO, GO, NODE> >& rangeMap, \ 3387 const Teuchos::RCP<Teuchos::ParameterList>& params); \ 3389 template struct MatrixMatrix::AddDetails::AddKernels<SCALAR, LO, GO, NODE>; 3393 #endif // TPETRA_MATRIXMATRIX_DEF_HPP Teuchos::RCP< const map_type > importColMap
Colmap garnered as a result of the import.
Teuchos::RCP< const map_type > getDomainMap() const override
The domain Map of this matrix.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
static void writeSparseFile(const std::string &filename, const Teuchos::RCP< const sparse_matrix_type > &pMatrix, const std::string &matrixName, const std::string &matrixDescription, const bool debug=false)
Print the sparse matrix in Matrix Market format, with comments.
void Jacobi(Scalar omega, const Vector< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Dinv, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Construct and (optionally) redistribute the explicitly stored transpose of a CrsMatrix.
bool isStaticGraph() const
Indicates that the graph is static, so that new entries cannot be added to this matrix.
Teuchos::RCP< const map_type > domainMap
Domain map for original matrix.
A distributed dense vector.
KokkosSparse::CrsMatrix< impl_scalar_type, LocalOrdinal, execution_space, void, typename local_graph_type::size_type > local_matrix_type
The specialization of Kokkos::CrsMatrix that represents the part of the sparse matrix on each MPI pro...
Teuchos::RCP< const map_type > getRowMap() const override
The Map that describes the row distribution in this matrix.
Teuchos::RCP< const map_type > colMap
Col map for the original version of the matrix.
size_t getNodeNumRows() const override
The number of matrix rows owned by the calling process.
Classes::Transfer< LocalOrdinal, GlobalOrdinal, Node > Transfer
Alias for Tpetra::Classes::Details::Transfer.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
ProfileType getProfileType() const
Returns true if the matrix was allocated with static data structures.
bool isFillActive() const
Whether the matrix is not fill complete.
LocalOrdinal sumIntoGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals, const bool atomic=useAtomicUpdatesByDefault)
Sum into one or more sparse matrix entries, using global indices.
Teuchos::RCP< const RowGraph< LocalOrdinal, GlobalOrdinal, Node > > getGraph() const override
This matrix's graph, as a RowGraph.
Teuchos::RCP< const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > origMatrix
The original matrix.
size_t getNumReceives() const
The number of processes from which we will receive data.
size_t getNumSends() const
The number of processes to which we will send data.
Sparse matrix that presents a row-oriented interface that lets users read or modify entries...
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
The communicator over which the matrix is distributed.
size_t global_size_t
Global size_t object.
Teuchos::RCP< const map_type > getColMap() const override
The Map that describes the column distribution in this matrix.
void replaceColMap(const Teuchos::RCP< const map_type > &newColMap)
Replace the matrix's column Map with the given Map.
bool isFillComplete() const override
Whether the matrix is fill complete.
void Add(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, Scalar scalarA, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, Scalar scalarB)
void keyValueMerge(KeyInputIterType keyBeg1, KeyInputIterType keyEnd1, ValueInputIterType valBeg1, ValueInputIterType valEnd1, KeyInputIterType keyBeg2, KeyInputIterType keyEnd2, ValueInputIterType valBeg2, ValueInputIterType valEnd2, KeyOutputIterType keyOut, ValueOutputIterType valOut, BinaryFunction f)
Merge two sorted (by keys) sequences of unique (key,value) pairs by combining pairs with equal keys...
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > importMatrix
The imported matrix.
Declare and define the function Tpetra::Details::computeOffsetsFromCounts, an implementation detail o...
Sets up and executes a communication plan for a Tpetra DistObject.
size_t getNodeMaxNumRowEntries() const override
Maximum number of entries in any row of the matrix, on this process.
bool isLocallyIndexed() const override
Whether the matrix is locally indexed on the calling process.
Utility functions for packing and unpacking sparse matrix entries.
void fillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Tell the matrix that you are done changing its structure or values, and that you are ready to do comp...
void setAllValues(const typename local_matrix_type::row_map_type &ptr, const typename local_graph_type::entries_type::non_const_type &ind, const typename local_matrix_type::values_type &val)
Set the local matrix using three (compressed sparse row) arrays.
void Multiply(const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, bool transposeB, CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &C, bool call_FillComplete_on_result=true, const std::string &label=std::string(), const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Sparse matrix-matrix multiply.
void scale(const Scalar &alpha)
Scale the matrix's values: this := alpha*this.
void insertGlobalValues(const GlobalOrdinal globalRow, const Teuchos::ArrayView< const GlobalOrdinal > &cols, const Teuchos::ArrayView< const Scalar > &vals)
Insert one or more entries into the matrix, using global column indices.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void expertStaticFillComplete(const Teuchos::RCP< const map_type > &domainMap, const Teuchos::RCP< const map_type > &rangeMap, const Teuchos::RCP< const import_type > &importer=Teuchos::null, const Teuchos::RCP< const export_type > &exporter=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Perform a fillComplete on a matrix that already has data.
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects...
Stand-alone utility functions and macros.
Teuchos::RCP< const map_type > rowMap
Desired row map for "imported" version of the matrix.
void getLastDoStatistics(size_t &bytes_sent, size_t &bytes_recvd) const
Information on the last call to do/doReverse.
Struct that holds views of the contents of a CrsMatrix.
global_size_t getGlobalNumRows() const override
Number of global elements in the row map of this matrix.
A parallel distribution of indices over processes.
Matrix Market file readers and writers for Tpetra objects.
Teuchos::RCP< CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > add(const Scalar &alpha, const bool transposeA, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &A, const Scalar &beta, const bool transposeB, const CrsMatrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &B, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap=Teuchos::null, const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &rangeMap=Teuchos::null, const Teuchos::RCP< Teuchos::ParameterList > ¶ms=Teuchos::null)
Compute the sparse matrix sum C = scalarA * Op(A) + scalarB * Op(B), where Op(X) is either X or its t...
Teuchos::RCP< const map_type > getRangeMap() const override
The range Map of this matrix.
Declaration and definition of Tpetra::Details::getEntryOnHost.
Teuchos::RCP< const map_type > origRowMap
Original row map of matrix.
local_matrix_type getLocalMatrix() const
The local sparse matrix.