42 #ifndef BELOS_BLOCK_CG_SOLMGR_HPP
43 #define BELOS_BLOCK_CG_SOLMGR_HPP
66 #include "Teuchos_BLAS.hpp"
67 #include "Teuchos_LAPACK.hpp"
68 #ifdef BELOS_TEUCHOS_TIME_MONITOR
69 # include "Teuchos_TimeMonitor.hpp"
71 #if defined(HAVE_TEUCHOSCORE_CXX11)
72 # include <type_traits>
73 #endif // defined(HAVE_TEUCHOSCORE_CXX11)
118 template<
class ScalarType,
class MV,
class OP,
119 const bool lapackSupportsScalarType =
124 static const bool requiresLapack =
134 const Teuchos::RCP<Teuchos::ParameterList>& pl) :
144 template<
class ScalarType,
class MV,
class OP>
171 typedef Teuchos::ScalarTraits<ScalarType> SCT;
172 typedef typename Teuchos::ScalarTraits<ScalarType>::magnitudeType MagnitudeType;
173 typedef Teuchos::ScalarTraits<MagnitudeType> MT;
224 const Teuchos::RCP<Teuchos::ParameterList> &pl );
230 Teuchos::RCP<SolverManager<ScalarType, MV, OP> >
clone ()
const override {
244 Teuchos::RCP<const Teuchos::ParameterList> getValidParameters()
const override;
255 Teuchos::Array<Teuchos::RCP<Teuchos::Time> >
getTimers()
const {
256 return Teuchos::tuple(timerSolve_);
286 void setParameters(
const Teuchos::RCP<Teuchos::ParameterList> ¶ms )
override;
327 std::string description()
const override;
334 Teuchos::RCP<LinearProblem<ScalarType,MV,OP> > problem_;
337 Teuchos::RCP<OutputManager<ScalarType> > printer_;
339 Teuchos::RCP<std::ostream> outputStream_;
345 Teuchos::RCP<StatusTest<ScalarType,MV,OP> > sTest_;
348 Teuchos::RCP<StatusTestMaxIters<ScalarType,MV,OP> > maxIterTest_;
351 Teuchos::RCP<StatusTestGenResNorm<ScalarType,MV,OP> > convTest_;
354 Teuchos::RCP<StatusTestOutput<ScalarType,MV,OP> > outputTest_;
357 Teuchos::RCP<MatOrthoManager<ScalarType,MV,OP> > ortho_;
360 Teuchos::RCP<Teuchos::ParameterList> params_;
365 static constexpr
int maxIters_default_ = 1000;
366 static constexpr
bool adaptiveBlockSize_default_ =
true;
367 static constexpr
bool showMaxResNormOnly_default_ =
false;
368 static constexpr
bool useSingleReduction_default_ =
false;
369 static constexpr
int blockSize_default_ = 1;
372 static constexpr
int outputFreq_default_ = -1;
373 static constexpr
const char * resNorm_default_ =
"TwoNorm";
374 static constexpr
bool foldConvergenceDetectionIntoAllreduce_default_ =
false;
375 static constexpr
const char * resScale_default_ =
"Norm of Initial Residual";
376 static constexpr
const char * label_default_ =
"Belos";
377 static constexpr
const char * orthoType_default_ =
"DGKS";
378 static constexpr std::ostream * outputStream_default_ = &std::cout;
385 MagnitudeType convtol_;
388 MagnitudeType orthoKappa_;
395 MagnitudeType achievedTol_;
404 int blockSize_, verbosity_, outputStyle_, outputFreq_;
405 bool adaptiveBlockSize_, showMaxResNormOnly_, useSingleReduction_;
406 std::string orthoType_, resScale_;
407 bool foldConvergenceDetectionIntoAllreduce_;
413 Teuchos::RCP<Teuchos::Time> timerSolve_;
421 template<
class ScalarType,
class MV,
class OP>
423 outputStream_(Teuchos::rcp(outputStream_default_,false)),
426 achievedTol_(Teuchos::ScalarTraits<MagnitudeType>::zero()),
427 maxIters_(maxIters_default_),
429 blockSize_(blockSize_default_),
430 verbosity_(verbosity_default_),
431 outputStyle_(outputStyle_default_),
432 outputFreq_(outputFreq_default_),
433 adaptiveBlockSize_(adaptiveBlockSize_default_),
434 showMaxResNormOnly_(showMaxResNormOnly_default_),
435 useSingleReduction_(useSingleReduction_default_),
436 orthoType_(orthoType_default_),
437 resScale_(resScale_default_),
438 foldConvergenceDetectionIntoAllreduce_(foldConvergenceDetectionIntoAllreduce_default_),
439 label_(label_default_),
445 template<
class ScalarType,
class MV,
class OP>
448 const Teuchos::RCP<Teuchos::ParameterList> &pl) :
450 outputStream_(Teuchos::rcp(outputStream_default_,false)),
453 achievedTol_(Teuchos::ScalarTraits<MagnitudeType>::zero()),
454 maxIters_(maxIters_default_),
456 blockSize_(blockSize_default_),
457 verbosity_(verbosity_default_),
458 outputStyle_(outputStyle_default_),
459 outputFreq_(outputFreq_default_),
460 adaptiveBlockSize_(adaptiveBlockSize_default_),
461 showMaxResNormOnly_(showMaxResNormOnly_default_),
462 useSingleReduction_(useSingleReduction_default_),
463 orthoType_(orthoType_default_),
464 resScale_(resScale_default_),
465 foldConvergenceDetectionIntoAllreduce_(foldConvergenceDetectionIntoAllreduce_default_),
466 label_(label_default_),
469 TEUCHOS_TEST_FOR_EXCEPTION(problem_.is_null(), std::invalid_argument,
470 "BlockCGSolMgr's constructor requires a nonnull LinearProblem instance.");
475 if (! pl.is_null()) {
480 template<
class ScalarType,
class MV,
class OP>
483 setParameters (
const Teuchos::RCP<Teuchos::ParameterList> ¶ms)
486 if (params_ == Teuchos::null) {
487 params_ = Teuchos::rcp(
new Teuchos::ParameterList(*getValidParameters()) );
490 params->validateParameters(*getValidParameters());
494 if (params->isParameter(
"Maximum Iterations")) {
495 maxIters_ = params->get(
"Maximum Iterations",maxIters_default_);
498 params_->set(
"Maximum Iterations", maxIters_);
499 if (maxIterTest_!=Teuchos::null)
500 maxIterTest_->setMaxIters( maxIters_ );
504 if (params->isParameter(
"Block Size")) {
505 blockSize_ = params->get(
"Block Size",blockSize_default_);
506 TEUCHOS_TEST_FOR_EXCEPTION(blockSize_ <= 0, std::invalid_argument,
507 "Belos::BlockCGSolMgr: \"Block Size\" must be strictly positive.");
510 params_->set(
"Block Size", blockSize_);
514 if (params->isParameter(
"Adaptive Block Size")) {
515 adaptiveBlockSize_ = params->get(
"Adaptive Block Size",adaptiveBlockSize_default_);
518 params_->set(
"Adaptive Block Size", adaptiveBlockSize_);
522 if (params->isParameter(
"Use Single Reduction")) {
523 useSingleReduction_ = params->get(
"Use Single Reduction", useSingleReduction_default_);
524 if (useSingleReduction_)
525 foldConvergenceDetectionIntoAllreduce_ = params->get(
"Fold Convergence Detection Into Allreduce",
526 foldConvergenceDetectionIntoAllreduce_default_);
530 if (params->isParameter(
"Timer Label")) {
531 std::string tempLabel = params->get(
"Timer Label", label_default_);
534 if (tempLabel != label_) {
536 params_->set(
"Timer Label", label_);
537 std::string solveLabel = label_ +
": BlockCGSolMgr total solve time";
538 #ifdef BELOS_TEUCHOS_TIME_MONITOR
539 timerSolve_ = Teuchos::TimeMonitor::getNewCounter(solveLabel);
541 if (ortho_ != Teuchos::null) {
542 ortho_->setLabel( label_ );
548 if (params->isParameter(
"Orthogonalization")) {
549 std::string tempOrthoType = params->get(
"Orthogonalization",orthoType_default_);
550 TEUCHOS_TEST_FOR_EXCEPTION( tempOrthoType !=
"DGKS" && tempOrthoType !=
"ICGS" && tempOrthoType !=
"IMGS",
551 std::invalid_argument,
552 "Belos::BlockCGSolMgr: \"Orthogonalization\" must be either \"DGKS\", \"ICGS\", or \"IMGS\".");
553 if (tempOrthoType != orthoType_) {
554 orthoType_ = tempOrthoType;
555 params_->set(
"Orthogonalization", orthoType_);
557 if (orthoType_==
"DGKS") {
558 if (orthoKappa_ <= 0) {
563 Teuchos::rcp_dynamic_cast<DGKSOrthoManager<ScalarType,MV,OP> >(ortho_)->setDepTol( orthoKappa_ );
566 else if (orthoType_==
"ICGS") {
569 else if (orthoType_==
"IMGS") {
576 if (params->isParameter(
"Orthogonalization Constant")) {
577 if (params->isType<MagnitudeType> (
"Orthogonalization Constant")) {
578 orthoKappa_ = params->get (
"Orthogonalization Constant",
582 orthoKappa_ = params->get (
"Orthogonalization Constant",
587 params_->set(
"Orthogonalization Constant",orthoKappa_);
588 if (orthoType_==
"DGKS") {
589 if (orthoKappa_ > 0 && ortho_ != Teuchos::null) {
590 Teuchos::rcp_dynamic_cast<DGKSOrthoManager<ScalarType,MV,OP> >(ortho_)->setDepTol( orthoKappa_ );
596 if (params->isParameter(
"Verbosity")) {
597 if (Teuchos::isParameterType<int>(*params,
"Verbosity")) {
598 verbosity_ = params->get(
"Verbosity", verbosity_default_);
600 verbosity_ = (int)Teuchos::getParameter<Belos::MsgType>(*params,
"Verbosity");
604 params_->set(
"Verbosity", verbosity_);
605 if (printer_ != Teuchos::null)
606 printer_->setVerbosity(verbosity_);
610 if (params->isParameter(
"Output Style")) {
611 if (Teuchos::isParameterType<int>(*params,
"Output Style")) {
612 outputStyle_ = params->get(
"Output Style", outputStyle_default_);
614 outputStyle_ = (int)Teuchos::getParameter<Belos::OutputType>(*params,
"Output Style");
618 params_->set(
"Output Style", outputStyle_);
619 outputTest_ = Teuchos::null;
623 if (params->isParameter(
"Output Stream")) {
624 outputStream_ = Teuchos::getParameter<Teuchos::RCP<std::ostream> >(*params,
"Output Stream");
627 params_->set(
"Output Stream", outputStream_);
628 if (printer_ != Teuchos::null)
629 printer_->setOStream( outputStream_ );
634 if (params->isParameter(
"Output Frequency")) {
635 outputFreq_ = params->get(
"Output Frequency", outputFreq_default_);
639 params_->set(
"Output Frequency", outputFreq_);
640 if (outputTest_ != Teuchos::null)
641 outputTest_->setOutputFrequency( outputFreq_ );
645 if (printer_ == Teuchos::null) {
654 if (params->isParameter(
"Convergence Tolerance")) {
655 if (params->isType<MagnitudeType> (
"Convergence Tolerance")) {
656 convtol_ = params->get (
"Convergence Tolerance",
664 params_->set(
"Convergence Tolerance", convtol_);
665 if (convTest_ != Teuchos::null)
669 if (params->isParameter(
"Show Maximum Residual Norm Only")) {
670 showMaxResNormOnly_ = Teuchos::getParameter<bool>(*params,
"Show Maximum Residual Norm Only");
673 params_->set(
"Show Maximum Residual Norm Only", showMaxResNormOnly_);
674 if (convTest_ != Teuchos::null)
675 convTest_->setShowMaxResNormOnly( showMaxResNormOnly_ );
679 bool newResTest =
false;
681 std::string tempResScale = resScale_;
682 if (params->isParameter (
"Implicit Residual Scaling")) {
683 tempResScale = params->get<std::string> (
"Implicit Residual Scaling");
687 if (resScale_ != tempResScale) {
690 resScale_ = tempResScale;
693 params_->set (
"Implicit Residual Scaling", resScale_);
695 if (! convTest_.is_null ()) {
698 if (params->isParameter(
"Residual Norm")) {
699 if (params->isType<std::string> (
"Residual Norm")) {
703 convTest_->defineResForm(StatusTestResNorm_t::Implicit, normType);
706 catch (std::exception& e) {
717 if (maxIterTest_ == Teuchos::null)
721 if (convTest_.is_null () || newResTest) {
724 if (params->isParameter(
"Residual Norm")) {
725 if (params->isType<std::string> (
"Residual Norm")) {
730 convTest_ = rcp (
new StatusTestResNorm_t (convtol_, 1, showMaxResNormOnly_));
731 convTest_->defineResForm(StatusTestResNorm_t::Implicit, normType);
735 if (sTest_.is_null () || newResTest)
736 sTest_ = Teuchos::rcp(
new StatusTestCombo_t( StatusTestCombo_t::OR, maxIterTest_, convTest_ ) );
738 if (outputTest_.is_null () || newResTest) {
746 std::string solverDesc =
" Block CG ";
747 outputTest_->setSolverDesc( solverDesc );
752 if (ortho_ == Teuchos::null) {
753 params_->set(
"Orthogonalization", orthoType_);
754 if (orthoType_==
"DGKS") {
755 if (orthoKappa_ <= 0) {
760 Teuchos::rcp_dynamic_cast<DGKSOrthoManager<ScalarType,MV,OP> >(ortho_)->setDepTol( orthoKappa_ );
763 else if (orthoType_==
"ICGS") {
766 else if (orthoType_==
"IMGS") {
770 TEUCHOS_TEST_FOR_EXCEPTION(orthoType_!=
"ICGS"&&orthoType_!=
"DGKS"&&orthoType_!=
"IMGS",std::logic_error,
771 "Belos::BlockCGSolMgr(): Invalid orthogonalization type.");
776 if (timerSolve_ == Teuchos::null) {
777 std::string solveLabel = label_ +
": BlockCGSolMgr total solve time";
778 #ifdef BELOS_TEUCHOS_TIME_MONITOR
779 timerSolve_ = Teuchos::TimeMonitor::getNewCounter(solveLabel);
788 template<
class ScalarType,
class MV,
class OP>
789 Teuchos::RCP<const Teuchos::ParameterList>
792 static Teuchos::RCP<const Teuchos::ParameterList> validPL;
795 if(is_null(validPL)) {
796 Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
798 "The relative residual tolerance that needs to be achieved by the\n"
799 "iterative solver in order for the linear system to be declared converged.");
800 pl->set(
"Maximum Iterations",
static_cast<int>(maxIters_default_),
801 "The maximum number of block iterations allowed for each\n"
802 "set of RHS solved.");
803 pl->set(
"Block Size",
static_cast<int>(blockSize_default_),
804 "The number of vectors in each block.");
805 pl->set(
"Adaptive Block Size",
static_cast<bool>(adaptiveBlockSize_default_),
806 "Whether the solver manager should adapt to the block size\n"
807 "based on the number of RHS to solve.");
808 pl->set(
"Verbosity",
static_cast<int>(verbosity_default_),
809 "What type(s) of solver information should be outputted\n"
810 "to the output stream.");
811 pl->set(
"Output Style",
static_cast<int>(outputStyle_default_),
812 "What style is used for the solver information outputted\n"
813 "to the output stream.");
814 pl->set(
"Output Frequency",
static_cast<int>(outputFreq_default_),
815 "How often convergence information should be outputted\n"
816 "to the output stream.");
817 pl->set(
"Output Stream", Teuchos::rcp(outputStream_default_,
false),
818 "A reference-counted pointer to the output stream where all\n"
819 "solver output is sent.");
820 pl->set(
"Show Maximum Residual Norm Only",
static_cast<bool>(showMaxResNormOnly_default_),
821 "When convergence information is printed, only show the maximum\n"
822 "relative residual norm when the block size is greater than one.");
823 pl->set(
"Use Single Reduction",
static_cast<bool>(useSingleReduction_default_),
824 "Use single reduction iteration when the block size is one.");
825 pl->set(
"Implicit Residual Scaling", resScale_default_,
826 "The type of scaling used in the residual convergence test.");
827 pl->set(
"Timer Label",
static_cast<const char *
>(label_default_),
828 "The string to use as a prefix for the timer labels.");
829 pl->set(
"Orthogonalization",
static_cast<const char *
>(orthoType_default_),
830 "The type of orthogonalization to use: DGKS, ICGS, or IMGS.");
832 "The constant used by DGKS orthogonalization to determine\n"
833 "whether another step of classical Gram-Schmidt is necessary.");
834 pl->set(
"Residual Norm",
static_cast<const char *
>(resNorm_default_),
835 "Norm used for the convergence check on the residual.");
836 pl->set(
"Fold Convergence Detection Into Allreduce",
static_cast<bool>(foldConvergenceDetectionIntoAllreduce_default_),
837 "Merge the allreduce for convergence detection with the one for CG.\n"
838 "This saves one all-reduce, but incurs more computation.");
846 template<
class ScalarType,
class MV,
class OP>
850 using Teuchos::rcp_const_cast;
851 using Teuchos::rcp_dynamic_cast;
858 setParameters(Teuchos::parameterList(*getValidParameters()));
861 Teuchos::BLAS<int,ScalarType> blas;
862 Teuchos::LAPACK<int,ScalarType> lapack;
864 TEUCHOS_TEST_FOR_EXCEPTION( !problem_->isProblemSet(),
866 "Belos::BlockCGSolMgr::solve(): Linear problem is not ready, setProblem() "
867 "has not been called.");
871 int numRHS2Solve = MVT::GetNumberVecs( *(problem_->getRHS()) );
872 int numCurrRHS = ( numRHS2Solve < blockSize_) ? numRHS2Solve : blockSize_;
874 std::vector<int> currIdx, currIdx2;
879 if ( adaptiveBlockSize_ ) {
880 blockSize_ = numCurrRHS;
881 currIdx.resize( numCurrRHS );
882 currIdx2.resize( numCurrRHS );
883 for (
int i=0; i<numCurrRHS; ++i)
884 { currIdx[i] = startPtr+i; currIdx2[i]=i; }
888 currIdx.resize( blockSize_ );
889 currIdx2.resize( blockSize_ );
890 for (
int i=0; i<numCurrRHS; ++i)
891 { currIdx[i] = startPtr+i; currIdx2[i]=i; }
892 for (
int i=numCurrRHS; i<blockSize_; ++i)
893 { currIdx[i] = -1; currIdx2[i] = i; }
897 problem_->setLSIndex( currIdx );
901 Teuchos::ParameterList plist;
902 plist.set(
"Block Size",blockSize_);
905 outputTest_->reset();
909 bool isConverged =
true;
914 RCP<CGIteration<ScalarType,MV,OP> > block_cg_iter;
915 if (blockSize_ == 1) {
919 if (useSingleReduction_) {
920 plist.set(
"Fold Convergence Detection Into Allreduce",
921 foldConvergenceDetectionIntoAllreduce_);
924 outputTest_, convTest_, plist));
929 outputTest_, plist));
940 #ifdef BELOS_TEUCHOS_TIME_MONITOR
941 Teuchos::TimeMonitor slvtimer(*timerSolve_);
944 while ( numRHS2Solve > 0 ) {
947 std::vector<int> convRHSIdx;
948 std::vector<int> currRHSIdx( currIdx );
949 currRHSIdx.resize(numCurrRHS);
952 block_cg_iter->resetNumIters();
955 outputTest_->resetNumCalls();
958 RCP<MV> R_0 = MVT::CloneViewNonConst( *(rcp_const_cast<MV>(problem_->getInitResVec())), currIdx );
963 block_cg_iter->initializeCG(newstate);
969 block_cg_iter->iterate();
973 if (convTest_->getStatus() ==
Passed) {
978 std::vector<int> convIdx =
979 rcp_dynamic_cast<conv_test_type>(convTest_)->
convIndices();
984 if (convIdx.size() == currRHSIdx.size())
989 problem_->setCurrLS();
994 std::vector<int> unconvIdx(currRHSIdx.size());
995 for (
unsigned int i=0; i<currRHSIdx.size(); ++i) {
997 for (
unsigned int j=0; j<convIdx.size(); ++j) {
998 if (currRHSIdx[i] == convIdx[j]) {
1004 currIdx2[have] = currIdx2[i];
1005 currRHSIdx[have++] = currRHSIdx[i];
1010 currRHSIdx.resize(have);
1011 currIdx2.resize(have);
1014 problem_->setLSIndex( currRHSIdx );
1017 std::vector<MagnitudeType> norms;
1018 R_0 = MVT::CloneCopy( *(block_cg_iter->getNativeResiduals(&norms)),currIdx2 );
1019 for (
int i=0; i<have; ++i) { currIdx2[i] = i; }
1022 block_cg_iter->setBlockSize( have );
1027 block_cg_iter->initializeCG(defstate);
1033 else if (maxIterTest_->getStatus() ==
Passed) {
1034 isConverged =
false;
1042 TEUCHOS_TEST_FOR_EXCEPTION(
true,std::logic_error,
1043 "Belos::BlockCGSolMgr::solve(): Neither the convergence test nor "
1044 "the maximum iteration count test passed. Please report this bug "
1045 "to the Belos developers.");
1048 catch (
const std::exception &e) {
1049 std::ostream& err = printer_->stream (
Errors);
1050 err <<
"Error! Caught std::exception in CGIteration::iterate() at "
1051 <<
"iteration " << block_cg_iter->getNumIters() << std::endl
1052 << e.what() << std::endl;
1059 problem_->setCurrLS();
1062 startPtr += numCurrRHS;
1063 numRHS2Solve -= numCurrRHS;
1064 if ( numRHS2Solve > 0 ) {
1065 numCurrRHS = ( numRHS2Solve < blockSize_) ? numRHS2Solve : blockSize_;
1067 if ( adaptiveBlockSize_ ) {
1068 blockSize_ = numCurrRHS;
1069 currIdx.resize( numCurrRHS );
1070 currIdx2.resize( numCurrRHS );
1071 for (
int i=0; i<numCurrRHS; ++i)
1072 { currIdx[i] = startPtr+i; currIdx2[i] = i; }
1075 currIdx.resize( blockSize_ );
1076 currIdx2.resize( blockSize_ );
1077 for (
int i=0; i<numCurrRHS; ++i)
1078 { currIdx[i] = startPtr+i; currIdx2[i] = i; }
1079 for (
int i=numCurrRHS; i<blockSize_; ++i)
1080 { currIdx[i] = -1; currIdx2[i] = i; }
1083 problem_->setLSIndex( currIdx );
1086 block_cg_iter->setBlockSize( blockSize_ );
1089 currIdx.resize( numRHS2Solve );
1100 #ifdef BELOS_TEUCHOS_TIME_MONITOR
1106 Teuchos::TimeMonitor::summarize( printer_->stream(
TimingDetails) );
1111 numIters_ = maxIterTest_->getNumIters();
1117 const std::vector<MagnitudeType>* pTestValues =
1118 rcp_dynamic_cast<conv_test_type>(convTest_)->
getTestValue();
1120 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues == NULL, std::logic_error,
1121 "Belos::BlockCGSolMgr::solve(): The convergence test's getTestValue() "
1122 "method returned NULL. Please report this bug to the Belos developers.");
1124 TEUCHOS_TEST_FOR_EXCEPTION(pTestValues->size() < 1, std::logic_error,
1125 "Belos::BlockCGSolMgr::solve(): The convergence test's getTestValue() "
1126 "method returned a vector of length zero. Please report this bug to the "
1127 "Belos developers.");
1132 achievedTol_ = *std::max_element (pTestValues->begin(), pTestValues->end());
1142 template<
class ScalarType,
class MV,
class OP>
1145 std::ostringstream oss;
1146 oss <<
"Belos::BlockCGSolMgr<...,"<<Teuchos::ScalarTraits<ScalarType>::name()<<
">";
1148 oss <<
"Ortho Type='"<<orthoType_<<
"\', Block Size=" << blockSize_;