Tpetra parallel linear algebra  Version of the Day
Tpetra_CrsGraph_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ************************************************************************
38 // @HEADER
39 
40 #ifndef TPETRA_CRSGRAPH_DEF_HPP
41 #define TPETRA_CRSGRAPH_DEF_HPP
42 
50 
54 #include "Tpetra_Details_gathervPrint.hpp"
55 #include "Tpetra_Details_getGraphDiagOffsets.hpp"
56 #include "Tpetra_Details_makeColMap.hpp"
60 #include "Tpetra_Distributor.hpp"
61 #include "Teuchos_SerialDenseMatrix.hpp"
62 #include "Tpetra_Vector.hpp"
63 #include "Tpetra_Import_Util.hpp"
64 #include "Tpetra_Import_Util2.hpp"
65 #include "Tpetra_Details_packCrsGraph.hpp"
66 #include "Tpetra_Details_unpackCrsGraphAndCombine.hpp"
68 #include <algorithm>
69 #include <limits>
70 #include <map>
71 #include <sstream>
72 #include <string>
73 #include <type_traits>
74 #include <utility>
75 #include <vector>
76 
77 namespace Tpetra {
78  namespace Details {
79  namespace Impl {
80 
81  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
82  class ConvertColumnIndicesFromGlobalToLocal {
83  public:
84  ConvertColumnIndicesFromGlobalToLocal (const ::Kokkos::View<LO*, DT>& lclColInds,
85  const ::Kokkos::View<const GO*, DT>& gblColInds,
86  const ::Kokkos::View<const OffsetType*, DT>& ptr,
87  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
88  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt) :
89  lclColInds_ (lclColInds),
90  gblColInds_ (gblColInds),
91  ptr_ (ptr),
92  lclColMap_ (lclColMap),
93  numRowEnt_ (numRowEnt)
94  {}
95 
96  KOKKOS_FUNCTION void
97  operator () (const LO& lclRow, OffsetType& curNumBad) const
98  {
99  const OffsetType offset = ptr_(lclRow);
100  // NOTE (mfh 26 Jun 2016) It's always legal to cast the number
101  // of entries in a row to LO, as long as the row doesn't have
102  // too many duplicate entries.
103  const LO numEnt = static_cast<LO> (numRowEnt_(lclRow));
104  for (LO j = 0; j < numEnt; ++j) {
105  const GO gid = gblColInds_(offset + j);
106  const LO lid = lclColMap_.getLocalElement (gid);
107  lclColInds_(offset + j) = lid;
108  if (lid == ::Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
109  ++curNumBad;
110  }
111  }
112  }
113 
114  static OffsetType
115  run (const ::Kokkos::View<LO*, DT>& lclColInds,
116  const ::Kokkos::View<const GO*, DT>& gblColInds,
117  const ::Kokkos::View<const OffsetType*, DT>& ptr,
118  const ::Tpetra::Details::LocalMap<LO, GO, DT>& lclColMap,
119  const ::Kokkos::View<const NumEntType*, DT>& numRowEnt)
120  {
121  typedef ::Kokkos::RangePolicy<typename DT::execution_space, LO> range_type;
122  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> functor_type;
123 
124  const LO lclNumRows = ptr.extent (0) == 0 ?
125  static_cast<LO> (0) : static_cast<LO> (ptr.extent (0) - 1);
126  OffsetType numBad = 0;
127  // Count of "bad" column indices is a reduction over rows.
128  ::Kokkos::parallel_reduce (range_type (0, lclNumRows),
129  functor_type (lclColInds, gblColInds, ptr,
130  lclColMap, numRowEnt),
131  numBad);
132  return numBad;
133  }
134 
135  private:
136  ::Kokkos::View<LO*, DT> lclColInds_;
137  ::Kokkos::View<const GO*, DT> gblColInds_;
138  ::Kokkos::View<const OffsetType*, DT> ptr_;
140  ::Kokkos::View<const NumEntType*, DT> numRowEnt_;
141  };
142 
143  } // namespace Impl
144 
159  template<class LO, class GO, class DT, class OffsetType, class NumEntType>
160  OffsetType
161  convertColumnIndicesFromGlobalToLocal (const Kokkos::View<LO*, DT>& lclColInds,
162  const Kokkos::View<const GO*, DT>& gblColInds,
163  const Kokkos::View<const OffsetType*, DT>& ptr,
164  const LocalMap<LO, GO, DT>& lclColMap,
165  const Kokkos::View<const NumEntType*, DT>& numRowEnt)
166  {
167  using Impl::ConvertColumnIndicesFromGlobalToLocal;
168  typedef ConvertColumnIndicesFromGlobalToLocal<LO, GO, DT, OffsetType, NumEntType> impl_type;
169  return impl_type::run (lclColInds, gblColInds, ptr, lclColMap, numRowEnt);
170  }
171 
172  template<class ViewType, class LO>
173  class MaxDifference {
174  public:
175  MaxDifference (const ViewType& ptr) : ptr_ (ptr) {}
176 
177  KOKKOS_INLINE_FUNCTION void init (LO& dst) const {
178  dst = 0;
179  }
180 
181  KOKKOS_INLINE_FUNCTION void
182  join (volatile LO& dst, const volatile LO& src) const
183  {
184  dst = (src > dst) ? src : dst;
185  }
186 
187  KOKKOS_INLINE_FUNCTION void
188  operator () (const LO lclRow, LO& maxNumEnt) const
189  {
190  const LO numEnt = static_cast<LO> (ptr_(lclRow+1) - ptr_(lclRow));
191  maxNumEnt = (numEnt > maxNumEnt) ? numEnt : maxNumEnt;
192  }
193  private:
194  typename ViewType::const_type ptr_;
195  };
196 
197  template<class ViewType, class LO>
198  typename ViewType::non_const_value_type
199  maxDifference (const char kernelLabel[],
200  const ViewType& ptr,
201  const LO lclNumRows)
202  {
203  if (lclNumRows == 0) {
204  // mfh 07 May 2018: Weirdly, I need this special case,
205  // otherwise I get the wrong answer.
206  return static_cast<LO> (0);
207  }
208  else {
209  using execution_space = typename ViewType::execution_space;
210  using range_type = Kokkos::RangePolicy<execution_space, LO>;
211  LO theMaxNumEnt {0};
212  Kokkos::parallel_reduce (kernelLabel,
213  range_type (0, lclNumRows),
214  MaxDifference<ViewType, LO> (ptr),
215  theMaxNumEnt);
216  return theMaxNumEnt;
217  }
218  }
219 
220  } // namespace Details
221 
222  template <class LocalOrdinal, class GlobalOrdinal, class Node>
224  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
225  const size_t maxNumEntriesPerRow,
226  const ProfileType pftype,
227  const Teuchos::RCP<Teuchos::ParameterList>& params) :
228  dist_object_type (rowMap)
229  , rowMap_ (rowMap)
230  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
231  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
232  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
233  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
234  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
235  , pftype_ (pftype)
236  , numAllocForAllRows_ (maxNumEntriesPerRow)
237  , storageStatus_ (pftype == StaticProfile ?
238  ::Tpetra::Details::STORAGE_1D_UNPACKED :
239  ::Tpetra::Details::STORAGE_2D)
240  , indicesAreAllocated_ (false)
241  , indicesAreLocal_ (false)
242  , indicesAreGlobal_ (false)
243  , fillComplete_ (false)
244  , lowerTriangular_ (false)
245  , upperTriangular_ (false)
246  , indicesAreSorted_ (true)
247  , noRedundancies_ (true)
248  , haveLocalConstants_ (false)
249  , haveGlobalConstants_ (false)
250  , sortGhostsAssociatedWithEachProcessor_ (true)
251  {
252  const char tfecfFuncName[] = "CrsGraph(rowMap,maxNumEntriesPerRow,"
253  "pftype,params): ";
254  staticAssertions ();
255  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
256  (maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
257  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
258  "a valid size_t value, which in this case means it must not be "
259  "Teuchos::OrdinalTraits<size_t>::invalid().");
260  resumeFill (params);
262  }
263 
264  template <class LocalOrdinal, class GlobalOrdinal, class Node>
266  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
267  const Teuchos::RCP<const map_type>& colMap,
268  const size_t maxNumEntriesPerRow,
269  const ProfileType pftype,
270  const Teuchos::RCP<Teuchos::ParameterList>& params) :
271  dist_object_type (rowMap)
272  , rowMap_ (rowMap)
273  , colMap_ (colMap)
274  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
275  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
276  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
277  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
278  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
279  , pftype_ (pftype)
280  , numAllocForAllRows_ (maxNumEntriesPerRow)
281  , storageStatus_ (pftype == StaticProfile ?
282  ::Tpetra::Details::STORAGE_1D_UNPACKED :
283  ::Tpetra::Details::STORAGE_2D)
284  , indicesAreAllocated_ (false)
285  , indicesAreLocal_ (false)
286  , indicesAreGlobal_ (false)
287  , fillComplete_ (false)
288  , lowerTriangular_ (false)
289  , upperTriangular_ (false)
290  , indicesAreSorted_ (true)
291  , noRedundancies_ (true)
292  , haveLocalConstants_ (false)
293  , haveGlobalConstants_ (false)
294  , sortGhostsAssociatedWithEachProcessor_ (true)
295  {
296  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,maxNumEntriesPerRow,"
297  "pftype,params): ";
298  staticAssertions ();
299  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
300  maxNumEntriesPerRow == Teuchos::OrdinalTraits<size_t>::invalid (),
301  std::invalid_argument, "The allocation hint maxNumEntriesPerRow must be "
302  "a valid size_t value, which in this case means it must not be "
303  "Teuchos::OrdinalTraits<size_t>::invalid().");
304  resumeFill (params);
306  }
307 
308  template <class LocalOrdinal, class GlobalOrdinal, class Node>
310  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
311  const Teuchos::ArrayView<const size_t>& numEntPerRow,
312  const ProfileType pftype,
313  const Teuchos::RCP<Teuchos::ParameterList>& params) :
314  dist_object_type (rowMap)
315  , rowMap_ (rowMap)
316  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
317  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
318  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
319  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
320  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
321  , pftype_ (pftype)
322  , numAllocForAllRows_ (0)
323  , storageStatus_ (pftype == StaticProfile ?
324  ::Tpetra::Details::STORAGE_1D_UNPACKED :
325  ::Tpetra::Details::STORAGE_2D)
326  , indicesAreAllocated_ (false)
327  , indicesAreLocal_ (false)
328  , indicesAreGlobal_ (false)
329  , fillComplete_ (false)
330  , lowerTriangular_ (false)
331  , upperTriangular_ (false)
332  , indicesAreSorted_ (true)
333  , noRedundancies_ (true)
334  , haveLocalConstants_ (false)
335  , haveGlobalConstants_ (false)
336  , sortGhostsAssociatedWithEachProcessor_ (true)
337  {
338  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
339  staticAssertions ();
340 
341  const size_t lclNumRows = rowMap.is_null () ?
342  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
343  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
344  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
345  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
346  << " != the local number of rows " << lclNumRows << " as specified by "
347  "the input row Map.");
348 
349  const bool debug = ::Tpetra::Details::Behavior::debug ();
350  if (debug) {
351  for (size_t r = 0; r < lclNumRows; ++r) {
352  const size_t curRowCount = numEntPerRow[r];
353  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
354  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
355  std::invalid_argument, "numEntPerRow(" << r << ") "
356  "specifies an invalid number of entries "
357  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
358  }
359  }
360 
361  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
362  // The latter is a const View, so we have to copy into a nonconst
363  // View first, then assign.
364  typedef decltype (k_numAllocPerRow_) out_view_type;
365  typedef typename out_view_type::non_const_type nc_view_type;
366  typedef Kokkos::View<const size_t*,
367  typename nc_view_type::array_layout,
368  Kokkos::HostSpace,
369  Kokkos::MemoryUnmanaged> in_view_type;
370  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
371  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
372  lclNumRows);
373  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
374  k_numAllocPerRow_ = numAllocPerRowOut;
375 
376  resumeFill (params);
378  }
379 
380 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
383  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
384  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
385  const ProfileType pftype,
386  const Teuchos::RCP<Teuchos::ParameterList>& params) :
387  dist_object_type (rowMap)
388  , rowMap_ (rowMap)
389  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
390  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
391  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
392  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
393  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
394  , pftype_ (pftype)
395  , numAllocForAllRows_ (0)
396  , storageStatus_ (pftype == StaticProfile ?
397  ::Tpetra::Details::STORAGE_1D_UNPACKED :
398  ::Tpetra::Details::STORAGE_2D)
399  , indicesAreAllocated_ (false)
400  , indicesAreLocal_ (false)
401  , indicesAreGlobal_ (false)
402  , fillComplete_ (false)
403  , lowerTriangular_ (false)
404  , upperTriangular_ (false)
405  , indicesAreSorted_ (true)
406  , noRedundancies_ (true)
407  , haveLocalConstants_ (false)
408  , haveGlobalConstants_ (false)
409  , sortGhostsAssociatedWithEachProcessor_ (true)
410  {
411  const char tfecfFuncName[] = "CrsGraph(RCP<const Map>,"
412  "ArrayRCP<const size_t>,ProfileType,RCP<ParameterList>): ";
413  staticAssertions ();
414 
415  const size_t lclNumRows = rowMap.is_null () ?
416  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
418  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
419  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
420  << " != the local number of rows " << lclNumRows << " as specified by "
421  "the input row Map.");
422 
423  const bool debug = ::Tpetra::Details::Behavior::debug ();
424  if (debug) {
425  for (size_t r = 0; r < lclNumRows; ++r) {
426  const size_t curRowCount = numEntPerRow[r];
427  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
428  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
429  std::invalid_argument, "numEntPerRow(" << r << ") "
430  "specifies an invalid number of entries "
431  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
432  }
433  }
434 
435  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
436  // The latter is a const View, so we have to copy into a nonconst
437  // View first, then assign.
438  typedef decltype (k_numAllocPerRow_) out_view_type;
439  typedef typename out_view_type::non_const_type nc_view_type;
440  typedef Kokkos::View<const size_t*,
441  typename nc_view_type::array_layout,
442  Kokkos::HostSpace,
443  Kokkos::MemoryUnmanaged> in_view_type;
444  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
445  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
446  lclNumRows);
447  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
448  k_numAllocPerRow_ = numAllocPerRowOut;
449 
450  resumeFill (params);
451  checkInternalState ();
452  }
453 #endif // TPETRA_ENABLE_DEPRECATED_CODE
454 
455 
456  template <class LocalOrdinal, class GlobalOrdinal, class Node>
458  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
459  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
460  const ProfileType pftype,
461  const Teuchos::RCP<Teuchos::ParameterList>& params) :
462  dist_object_type (rowMap)
463  , rowMap_ (rowMap)
464  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
465  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
466  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
467  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
468  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
469  , pftype_ (pftype)
470  , k_numAllocPerRow_ (numEntPerRow.h_view)
471  , numAllocForAllRows_ (0)
472  , storageStatus_ (pftype == StaticProfile ?
473  ::Tpetra::Details::STORAGE_1D_UNPACKED :
474  ::Tpetra::Details::STORAGE_2D)
475  , indicesAreAllocated_ (false)
476  , indicesAreLocal_ (false)
477  , indicesAreGlobal_ (false)
478  , fillComplete_ (false)
479  , lowerTriangular_ (false)
480  , upperTriangular_ (false)
481  , indicesAreSorted_ (true)
482  , noRedundancies_ (true)
483  , haveLocalConstants_ (false)
484  , haveGlobalConstants_ (false)
485  , sortGhostsAssociatedWithEachProcessor_ (true)
486  {
487  const char tfecfFuncName[] = "CrsGraph(rowMap,numEntPerRow,pftype,params): ";
488  staticAssertions ();
489 
490  const size_t lclNumRows = rowMap.is_null () ?
491  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
492  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
493  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
494  std::invalid_argument, "numEntPerRow has length " <<
495  numEntPerRow.extent (0) << " != the local number of rows " <<
496  lclNumRows << " as specified by " "the input row Map.");
497 
498  const bool debug = ::Tpetra::Details::Behavior::debug ();
499  if (debug) {
500  for (size_t r = 0; r < lclNumRows; ++r) {
501  const size_t curRowCount = numEntPerRow.h_view(r);
502  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
503  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
504  std::invalid_argument, "numEntPerRow(" << r << ") "
505  "specifies an invalid number of entries "
506  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
507  }
508  }
509 
510  resumeFill (params);
511  checkInternalState ();
512  }
513 
514 
515  template <class LocalOrdinal, class GlobalOrdinal, class Node>
517  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
518  const Teuchos::RCP<const map_type>& colMap,
519  const Kokkos::DualView<const size_t*, execution_space>& numEntPerRow,
520  const ProfileType pftype,
521  const Teuchos::RCP<Teuchos::ParameterList>& params) :
522  dist_object_type (rowMap)
523  , rowMap_ (rowMap)
524  , colMap_ (colMap)
525  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
526  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
527  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
528  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
529  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
530  , pftype_ (pftype)
531  , k_numAllocPerRow_ (numEntPerRow.h_view)
532  , numAllocForAllRows_ (0)
533  , storageStatus_ (pftype == StaticProfile ?
534  ::Tpetra::Details::STORAGE_1D_UNPACKED :
535  ::Tpetra::Details::STORAGE_2D)
536  , indicesAreAllocated_ (false)
537  , indicesAreLocal_ (false)
538  , indicesAreGlobal_ (false)
539  , fillComplete_ (false)
540  , lowerTriangular_ (false)
541  , upperTriangular_ (false)
542  , indicesAreSorted_ (true)
543  , noRedundancies_ (true)
544  , haveLocalConstants_ (false)
545  , haveGlobalConstants_ (false)
546  , sortGhostsAssociatedWithEachProcessor_ (true)
547  {
548  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,params): ";
549  staticAssertions ();
550 
551  const size_t lclNumRows = rowMap.is_null () ?
552  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
553  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
554  static_cast<size_t> (numEntPerRow.extent (0)) != lclNumRows,
555  std::invalid_argument, "numEntPerRow has length " <<
556  numEntPerRow.extent (0) << " != the local number of rows " <<
557  lclNumRows << " as specified by " "the input row Map.");
558 
559  const bool debug = ::Tpetra::Details::Behavior::debug ();
560  if (debug) {
561  for (size_t r = 0; r < lclNumRows; ++r) {
562  const size_t curRowCount = numEntPerRow.h_view(r);
563  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
564  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
565  std::invalid_argument, "numEntPerRow(" << r << ") "
566  "specifies an invalid number of entries "
567  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
568  }
569  }
570 
571  resumeFill (params);
572  checkInternalState ();
573  }
574 
575 
576  template <class LocalOrdinal, class GlobalOrdinal, class Node>
578  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
579  const Teuchos::RCP<const map_type>& colMap,
580  const Teuchos::ArrayView<const size_t>& numEntPerRow,
581  const ProfileType pftype,
582  const Teuchos::RCP<Teuchos::ParameterList>& params) :
583  dist_object_type (rowMap)
584  , rowMap_ (rowMap)
585  , colMap_ (colMap)
586  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
587  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
588  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
589  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
590  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
591  , pftype_ (pftype)
592  , numAllocForAllRows_ (0)
593  , storageStatus_ (pftype == StaticProfile ?
594  ::Tpetra::Details::STORAGE_1D_UNPACKED :
595  ::Tpetra::Details::STORAGE_2D)
596  , indicesAreAllocated_ (false)
597  , indicesAreLocal_ (false)
598  , indicesAreGlobal_ (false)
599  , fillComplete_ (false)
600  , lowerTriangular_ (false)
601  , upperTriangular_ (false)
602  , indicesAreSorted_ (true)
603  , noRedundancies_ (true)
604  , haveLocalConstants_ (false)
605  , haveGlobalConstants_ (false)
606  , sortGhostsAssociatedWithEachProcessor_ (true)
607  {
608  const char tfecfFuncName[] = "CrsGraph(rowMap,colMap,numEntPerRow,pftype,"
609  "params): ";
610  staticAssertions ();
611 
612  const size_t lclNumRows = rowMap.is_null () ?
613  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
614  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
615  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
616  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
617  << " != the local number of rows " << lclNumRows << " as specified by "
618  "the input row Map.");
619 
620  const bool debug = ::Tpetra::Details::Behavior::debug ();
621  if (debug) {
622  for (size_t r = 0; r < lclNumRows; ++r) {
623  const size_t curRowCount = numEntPerRow[r];
624  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
625  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
626  std::invalid_argument, "numEntPerRow(" << r << ") "
627  "specifies an invalid number of entries "
628  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
629  }
630  }
631 
632  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
633  // The latter is a const View, so we have to copy into a nonconst
634  // View first, then assign.
635  typedef decltype (k_numAllocPerRow_) out_view_type;
636  typedef typename out_view_type::non_const_type nc_view_type;
637  typedef Kokkos::View<const size_t*,
638  typename nc_view_type::array_layout,
639  Kokkos::HostSpace,
640  Kokkos::MemoryUnmanaged> in_view_type;
641  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
642  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
643  lclNumRows);
644  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
645  k_numAllocPerRow_ = numAllocPerRowOut;
646 
647  resumeFill (params);
648  checkInternalState ();
649  }
650 
651 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
652  template <class LocalOrdinal, class GlobalOrdinal, class Node>
654  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
655  const Teuchos::RCP<const map_type>& colMap,
656  const Teuchos::ArrayRCP<const size_t>& numEntPerRow,
657  const ProfileType pftype,
658  const Teuchos::RCP<Teuchos::ParameterList>& params) :
659  dist_object_type (rowMap)
660  , rowMap_ (rowMap)
661  , colMap_ (colMap)
662  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
663  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
664  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
665  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
666  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
667  , pftype_ (pftype)
668  , numAllocForAllRows_ (0)
669  , storageStatus_ (pftype == StaticProfile ?
670  ::Tpetra::Details::STORAGE_1D_UNPACKED :
671  ::Tpetra::Details::STORAGE_2D)
672  , indicesAreAllocated_ (false)
673  , indicesAreLocal_ (false)
674  , indicesAreGlobal_ (false)
675  , fillComplete_ (false)
676  , lowerTriangular_ (false)
677  , upperTriangular_ (false)
678  , indicesAreSorted_ (true)
679  , noRedundancies_ (true)
680  , haveLocalConstants_ (false)
681  , haveGlobalConstants_ (false)
682  , sortGhostsAssociatedWithEachProcessor_ (true)
683  {
684  const char tfecfFuncName[] = "CrsGraph(RCP<const Map>,RCP<const Map>,"
685  "ArrayRCP<const size_t>,ProfileType,RCP<ParameterList>): ";
686  staticAssertions ();
687 
688  const size_t lclNumRows = rowMap.is_null () ?
689  static_cast<size_t> (0) : rowMap->getNodeNumElements ();
690  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
691  static_cast<size_t> (numEntPerRow.size ()) != lclNumRows,
692  std::invalid_argument, "numEntPerRow has length " << numEntPerRow.size ()
693  << " != the local number of rows " << lclNumRows << " as specified by "
694  "the input row Map.");
695 
696  const bool debug = ::Tpetra::Details::Behavior::debug ();
697  if (debug) {
698  for (size_t r = 0; r < lclNumRows; ++r) {
699  const size_t curRowCount = numEntPerRow[r];
700  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
701  (curRowCount == Teuchos::OrdinalTraits<size_t>::invalid (),
702  std::invalid_argument, "numEntPerRow(" << r << ") "
703  "specifies an invalid number of entries "
704  "(Teuchos::OrdinalTraits<size_t>::invalid()).");
705  }
706  }
707 
708  // Deep-copy the (host-accessible) input into k_numAllocPerRow_.
709  // The latter is a const View, so we have to copy into a nonconst
710  // View first, then assign.
711  typedef decltype (k_numAllocPerRow_) out_view_type;
712  typedef typename out_view_type::non_const_type nc_view_type;
713  typedef Kokkos::View<const size_t*,
714  typename nc_view_type::array_layout,
715  Kokkos::HostSpace,
716  Kokkos::MemoryUnmanaged> in_view_type;
717  in_view_type numAllocPerRowIn (numEntPerRow.getRawPtr (), lclNumRows);
718  nc_view_type numAllocPerRowOut ("Tpetra::CrsGraph::numAllocPerRow",
719  lclNumRows);
720  Kokkos::deep_copy (numAllocPerRowOut, numAllocPerRowIn);
721  k_numAllocPerRow_ = numAllocPerRowOut;
722 
723  resumeFill (params);
724  checkInternalState ();
725  }
726 #endif // TPETRA_ENABLE_DEPRECATED_CODE
727 
728  template <class LocalOrdinal, class GlobalOrdinal, class Node>
730  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
731  const Teuchos::RCP<const map_type>& colMap,
732  const typename local_graph_type::row_map_type& rowPointers,
733  const typename local_graph_type::entries_type::non_const_type& columnIndices,
734  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
735  dist_object_type (rowMap)
736  , rowMap_(rowMap)
737  , colMap_(colMap)
738  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
739  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
740  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
741  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
742  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
743  , pftype_(StaticProfile)
744  , numAllocForAllRows_(0)
745  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
746  , indicesAreAllocated_(true)
747  , indicesAreLocal_(true)
748  , indicesAreGlobal_(false)
749  , fillComplete_(false)
750  , lowerTriangular_ (false)
751  , upperTriangular_ (false)
752  , indicesAreSorted_(true)
753  , noRedundancies_(true)
754  , haveLocalConstants_ (false)
755  , haveGlobalConstants_ (false)
756  , sortGhostsAssociatedWithEachProcessor_(true)
757  {
758  staticAssertions ();
759  setAllIndices (rowPointers, columnIndices);
760  checkInternalState ();
761  }
762 
763  template <class LocalOrdinal, class GlobalOrdinal, class Node>
765  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
766  const Teuchos::RCP<const map_type>& colMap,
767  const Teuchos::ArrayRCP<size_t>& rowPointers,
768  const Teuchos::ArrayRCP<LocalOrdinal> & columnIndices,
769  const Teuchos::RCP<Teuchos::ParameterList>& /* params */) :
770  dist_object_type (rowMap)
771  , rowMap_ (rowMap)
772  , colMap_ (colMap)
773  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
774  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
775  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
776  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
777  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
778  , pftype_ (StaticProfile)
779  , numAllocForAllRows_ (0)
780  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
781  , indicesAreAllocated_ (true)
782  , indicesAreLocal_ (true)
783  , indicesAreGlobal_ (false)
784  , fillComplete_ (false)
785  , lowerTriangular_ (false)
786  , upperTriangular_ (false)
787  , indicesAreSorted_ (true)
788  , noRedundancies_ (true)
789  , haveLocalConstants_ (false)
790  , haveGlobalConstants_ (false)
791  , sortGhostsAssociatedWithEachProcessor_ (true)
792  {
793  staticAssertions ();
794  setAllIndices (rowPointers, columnIndices);
795  checkInternalState ();
796  }
797 
798  template <class LocalOrdinal, class GlobalOrdinal, class Node>
800  CrsGraph (const Teuchos::RCP<const map_type>& rowMap,
801  const Teuchos::RCP<const map_type>& colMap,
802  const local_graph_type& k_local_graph_,
803  const Teuchos::RCP<Teuchos::ParameterList>& params)
804  : CrsGraph (k_local_graph_,
805  rowMap,
806  colMap,
807  Teuchos::null,
808  Teuchos::null,
809  params)
810  {}
811 
812  template <class LocalOrdinal, class GlobalOrdinal, class Node>
814  CrsGraph (const local_graph_type& k_local_graph_,
815  const Teuchos::RCP<const map_type>& rowMap,
816  const Teuchos::RCP<const map_type>& colMap,
817  const Teuchos::RCP<const map_type>& domainMap,
818  const Teuchos::RCP<const map_type>& rangeMap,
819  const Teuchos::RCP<Teuchos::ParameterList>& params)
820  : DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap)
821  , rowMap_ (rowMap)
822  , colMap_ (colMap)
823  , lclGraph_ (k_local_graph_)
824  , nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ())
825  , nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ())
826  , globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
827  , globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
828  , globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ())
829  , pftype_ (StaticProfile)
830  , numAllocForAllRows_ (0)
831  , storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED)
832  , indicesAreAllocated_ (true)
833  , indicesAreLocal_ (true)
834  , indicesAreGlobal_ (false)
835  , fillComplete_ (false)
836  , lowerTriangular_ (false)
837  , upperTriangular_ (false)
838  , indicesAreSorted_ (true)
839  , noRedundancies_ (true)
840  , haveLocalConstants_ (false)
841  , haveGlobalConstants_ (false)
842  , sortGhostsAssociatedWithEachProcessor_ (true)
843  {
844  staticAssertions();
845  const char tfecfFuncName[] = "CrsGraph(Kokkos::LocalStaticCrsGraph,Map,Map,Map,Map)";
846 
847  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
848  colMap.is_null (), std::runtime_error,
849  ": The input column Map must be nonnull.");
850  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
851  k_local_graph_.numRows () != rowMap->getNodeNumElements (),
852  std::runtime_error,
853  ": The input row Map and the input local graph need to have the same "
854  "number of rows. The row Map claims " << rowMap->getNodeNumElements ()
855  << " row(s), but the local graph claims " << k_local_graph_.numRows ()
856  << " row(s).");
857  // NOTE (mfh 17 Mar 2014) getNodeNumRows() returns
858  // rowMap_->getNodeNumElements(), but it doesn't have to.
859  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
860  // k_local_graph_.numRows () != getNodeNumRows (), std::runtime_error,
861  // ": The input row Map and the input local graph need to have the same "
862  // "number of rows. The row Map claims " << getNodeNumRows () << " row(s), "
863  // "but the local graph claims " << k_local_graph_.numRows () << " row(s).");
864  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
865  k_lclInds1D_.extent (0) != 0 || k_gblInds1D_.extent (0) != 0, std::logic_error,
866  ": cannot have 1D data structures allocated.");
867  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
868  ! lclInds2D_.is_null () || ! gblInds2D_.is_null (), std::logic_error,
869  ": cannot have 2D data structures allocated.");
870 
871  setDomainRangeMaps (domainMap.is_null() ? rowMap_ : domainMap,
872  rangeMap .is_null() ? rowMap_ : rangeMap);
873  Teuchos::Array<int> remotePIDs (0); // unused output argument
874  this->makeImportExport (remotePIDs, false);
875 
876  k_lclInds1D_ = lclGraph_.entries;
877  k_rowPtrs_ = lclGraph_.row_map;
878 
879  const bool callComputeGlobalConstants = params.get () == nullptr ||
880  params->get ("compute global constants", true);
881  const bool computeLocalTriangularConstants = params.get () == nullptr ||
882  params->get ("compute local triangular constants", true);
883 
884  if (callComputeGlobalConstants) {
885  this->computeGlobalConstants (computeLocalTriangularConstants);
886  }
887  this->fillComplete_ = true;
888  this->checkInternalState ();
889  }
890 
891  template <class LocalOrdinal, class GlobalOrdinal, class Node>
893  CrsGraph (const local_graph_type& lclGraph,
894  const Teuchos::RCP<const map_type>& rowMap,
895  const Teuchos::RCP<const map_type>& colMap,
896  const Teuchos::RCP<const map_type>& domainMap,
897  const Teuchos::RCP<const map_type>& rangeMap,
898  const Teuchos::RCP<const import_type>& importer,
899  const Teuchos::RCP<const export_type>& exporter,
900  const Teuchos::RCP<Teuchos::ParameterList>& params) :
901  DistObject<GlobalOrdinal, LocalOrdinal, GlobalOrdinal, node_type> (rowMap),
902  rowMap_ (rowMap),
903  colMap_ (colMap),
904  rangeMap_ (rangeMap.is_null () ? rowMap : rangeMap),
905  domainMap_ (domainMap.is_null () ? rowMap : domainMap),
906  importer_ (importer),
907  exporter_ (exporter),
908  lclGraph_ (lclGraph),
909  nodeNumDiags_ (Teuchos::OrdinalTraits<size_t>::invalid ()),
910  nodeMaxNumRowEntries_ (Teuchos::OrdinalTraits<size_t>::invalid ()),
911  globalNumEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()),
912  globalNumDiags_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()),
913  globalMaxNumRowEntries_ (Teuchos::OrdinalTraits<global_size_t>::invalid ()),
914  pftype_ (StaticProfile),
915  numAllocForAllRows_ (0),
916  storageStatus_ (::Tpetra::Details::STORAGE_1D_PACKED),
917  indicesAreAllocated_ (true),
918  indicesAreLocal_ (true),
919  indicesAreGlobal_ (false),
920  fillComplete_ (false), // not yet, but see below
921  lowerTriangular_ (false),
922  upperTriangular_ (false),
923  indicesAreSorted_ (true),
924  noRedundancies_ (true),
925  haveLocalConstants_ (false),
926  haveGlobalConstants_ (false),
927  sortGhostsAssociatedWithEachProcessor_ (true)
928  {
929  staticAssertions();
930  const char tfecfFuncName[] = "Tpetra::CrsGraph(local_graph_type,"
931  "Map,Map,Map,Map,Import,Export,params): ";
932 
933  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
934  (colMap.is_null (), std::runtime_error,
935  "The input column Map must be nonnull.");
936 
937  k_lclInds1D_ = lclGraph_.entries;
938  k_rowPtrs_ = lclGraph_.row_map;
939  const bool callComputeGlobalConstants =
940  params.get () == nullptr ||
941  params->get ("compute global constants", true);
942  const bool computeLocalTriangularConstants =
943  params.get () == nullptr ||
944  params->get ("compute local triangular constants", true);
945  if (callComputeGlobalConstants) {
946  this->computeGlobalConstants (computeLocalTriangularConstants);
947  }
948  fillComplete_ = true;
949  checkInternalState ();
950  }
951 
952  template <class LocalOrdinal, class GlobalOrdinal, class Node>
953  Teuchos::RCP<const Teuchos::ParameterList>
955  getValidParameters () const
956  {
957  using Teuchos::RCP;
958  using Teuchos::ParameterList;
959  using Teuchos::parameterList;
960 
961  RCP<ParameterList> params = parameterList ("Tpetra::CrsGraph");
962 
963  // Make a sublist for the Import.
964  RCP<ParameterList> importSublist = parameterList ("Import");
965 
966  // FIXME (mfh 02 Apr 2012) We should really have the Import and
967  // Export objects fill in these lists. However, we don't want to
968  // create an Import or Export unless we need them. For now, we
969  // know that the Import and Export just pass the list directly to
970  // their Distributor, so we can create a Distributor here
971  // (Distributor's constructor is a lightweight operation) and have
972  // it fill in the list.
973 
974  // Fill in Distributor default parameters by creating a
975  // Distributor and asking it to do the work.
976  Distributor distributor (rowMap_->getComm (), importSublist);
977  params->set ("Import", *importSublist, "How the Import performs communication.");
978 
979  // Make a sublist for the Export. For now, it's a clone of the
980  // Import sublist. It's not a shallow copy, though, since we
981  // might like the Import to do communication differently than the
982  // Export.
983  params->set ("Export", *importSublist, "How the Export performs communication.");
984 
985  return params;
986  }
987 
988  template <class LocalOrdinal, class GlobalOrdinal, class Node>
989  void
991  setParameterList (const Teuchos::RCP<Teuchos::ParameterList>& params)
992  {
993  Teuchos::RCP<const Teuchos::ParameterList> validParams =
994  getValidParameters ();
995  params->validateParametersAndSetDefaults (*validParams);
996  this->setMyParamList (params);
997  }
998 
999  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1002  getGlobalNumRows () const
1003  {
1004  return rowMap_->getGlobalNumElements ();
1005  }
1006 
1007  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1010  getGlobalNumCols () const
1011  {
1012  const char tfecfFuncName[] = "getGlobalNumCols: ";
1013  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1014  ! isFillComplete () || getDomainMap ().is_null (), std::runtime_error,
1015  "The graph does not have a domain Map. You may not call this method in "
1016  "that case.");
1017  return getDomainMap ()->getGlobalNumElements ();
1018  }
1019 
1020  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1021  size_t
1023  getNodeNumRows () const
1024  {
1025  return this->rowMap_.is_null () ?
1026  static_cast<size_t> (0) :
1027  this->rowMap_->getNodeNumElements ();
1028  }
1029 
1030  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1031  size_t
1033  getNodeNumCols () const
1034  {
1035  const char tfecfFuncName[] = "getNodeNumCols: ";
1036  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
1037  ! hasColMap (), std::runtime_error,
1038  "The graph does not have a column Map. You may not call this method "
1039  "unless the graph has a column Map. This requires either that a custom "
1040  "column Map was given to the constructor, or that fillComplete() has "
1041  "been called.");
1042  return colMap_.is_null () ? static_cast<size_t> (0) :
1043  colMap_->getNodeNumElements ();
1044  }
1045 
1046 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1048  global_size_t TPETRA_DEPRECATED
1050  getGlobalNumDiags () const
1051  {
1052  return this->getGlobalNumDiagsImpl ();
1053  }
1054 
1055  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1056  size_t TPETRA_DEPRECATED
1058  getNodeNumDiags () const
1059  {
1060  return this->getNodeNumDiagsImpl ();
1061  }
1062 
1063  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1066  getGlobalNumDiagsImpl () const
1067  {
1068  const char tfecfFuncName[] = "getGlobalNumDiags: ";
1069  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1070  (! this->haveGlobalConstants_, std::logic_error,
1071  "The graph does not have global constants computed, "
1072  "but the user has requested them.");
1073 
1074  return globalNumDiags_;
1075  }
1076 
1077  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1078  size_t
1080  getNodeNumDiagsImpl () const
1081  {
1082  return nodeNumDiags_;
1083  }
1084 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1085 
1086 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1087  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1088  TPETRA_DEPRECATED
1089  Teuchos::RCP<Node>
1091  getNode () const
1092  {
1093  return Teuchos::null;
1094  }
1095 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1096 
1097  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1098  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1100  getRowMap () const
1101  {
1102  return rowMap_;
1103  }
1104 
1105  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1106  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1108  getColMap () const
1109  {
1110  return colMap_;
1111  }
1112 
1113  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1114  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1116  getDomainMap () const
1117  {
1118  return domainMap_;
1119  }
1120 
1121  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1122  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::map_type>
1124  getRangeMap () const
1125  {
1126  return rangeMap_;
1127  }
1128 
1129  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1130  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::import_type>
1132  getImporter () const
1133  {
1134  return importer_;
1135  }
1136 
1137  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1138  Teuchos::RCP<const typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::export_type>
1140  getExporter () const
1141  {
1142  return exporter_;
1143  }
1144 
1145  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1146  bool
1148  hasColMap () const
1149  {
1150  return ! colMap_.is_null ();
1151  }
1152 
1153  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1154  bool
1156  isStorageOptimized () const
1157  {
1158  // FIXME (mfh 07 Aug 2014) Why wouldn't storage be optimized if
1159  // getNodeNumRows() is zero?
1160 
1161  const bool isOpt = indicesAreAllocated_ &&
1162  k_numRowEntries_.extent (0) == 0 &&
1163  getNodeNumRows () > 0;
1164 
1165  const char tfecfFuncName[] = "isStorageOptimized: ";
1166  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1167  (isOpt && getProfileType () != StaticProfile, std::logic_error,
1168  "The matrix claims to have optimized storage, but getProfileType() "
1169  "returns DynamicProfile. This should never happen. Please report this "
1170  "bug to the Tpetra developers.");
1171 
1172  return isOpt;
1173  }
1174 
1175  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1176  ProfileType
1178  getProfileType () const
1179  {
1180  return pftype_;
1181  }
1182 
1183  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1186  getGlobalNumEntries () const
1187  {
1188  const char tfecfFuncName[] = "getGlobalNumEntries: ";
1189  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1190  (! this->haveGlobalConstants_, std::logic_error,
1191  "The graph does not have global constants computed, "
1192  "but the user has requested them.");
1193 
1194  return globalNumEntries_;
1195  }
1196 
1197  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1198  size_t
1200  getNodeNumEntries () const
1201  {
1202  typedef LocalOrdinal LO;
1203 
1204  if (this->indicesAreAllocated_) {
1205  const LO lclNumRows = this->getNodeNumRows ();
1206  if (lclNumRows == 0) {
1207  return static_cast<size_t> (0);
1208  }
1209  else {
1210  // Avoid the "*this capture" issue by creating a local Kokkos::View.
1211  auto numEntPerRow = this->k_numRowEntries_;
1212  const LO numNumEntPerRow = numEntPerRow.extent (0);
1213  if (numNumEntPerRow == 0) {
1214  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1215  static_cast<LO> (lclNumRows + 1)) {
1216  return static_cast<size_t> (0);
1217  }
1218  else {
1219  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1220  }
1221  }
1222  else { // k_numRowEntries_ is populated
1223  // k_numRowEntries_ is actually be a host View, so we run
1224  // the sum in its native execution space. This also means
1225  // that we can use explicit capture (which could perhaps
1226  // improve build time) instead of KOKKOS_LAMBDA, and avoid
1227  // any CUDA build issues with trying to run a __device__ -
1228  // only function on host.
1229  typedef typename num_row_entries_type::execution_space
1230  host_exec_space;
1231  typedef Kokkos::RangePolicy<host_exec_space, LO> range_type;
1232 
1233  const LO upperLoopBound = lclNumRows < numNumEntPerRow ?
1234  lclNumRows :
1235  numNumEntPerRow;
1236  size_t nodeNumEnt = 0;
1237  Kokkos::parallel_reduce ("Tpetra::CrsGraph::getNumNodeEntries",
1238  range_type (0, upperLoopBound),
1239  [=] (const LO& k, size_t& lclSum) {
1240  lclSum += numEntPerRow(k);
1241  }, nodeNumEnt);
1242  return nodeNumEnt;
1243  }
1244  }
1245  }
1246  else { // nothing allocated on this process, so no entries
1247  return static_cast<size_t> (0);
1248  }
1249  }
1250 
1251  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1252  global_size_t
1255  {
1256  const char tfecfFuncName[] = "getGlobalMaxNumRowEntries: ";
1257  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1258  (! this->haveGlobalConstants_, std::logic_error,
1259  "The graph does not have global constants computed, "
1260  "but the user has requested them.");
1262  return globalMaxNumRowEntries_;
1263  }
1264 
1265  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1266  size_t
1268  getNodeMaxNumRowEntries () const
1269  {
1270  return nodeMaxNumRowEntries_;
1271  }
1272 
1273  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1274  bool
1276  isFillComplete () const
1277  {
1278  return fillComplete_;
1279  }
1280 
1281  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1282  bool
1284  isFillActive () const
1285  {
1286  return ! fillComplete_;
1287  }
1288 
1289 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
1290  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1291  bool TPETRA_DEPRECATED
1293  isLowerTriangular () const
1294  {
1295  return this->isLowerTriangularImpl ();
1296  }
1297 
1298  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1299  bool TPETRA_DEPRECATED
1301  isUpperTriangular () const
1302  {
1303  return this->isUpperTriangularImpl ();
1304  }
1305 
1306  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1307  bool
1308  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1309  isLowerTriangularImpl () const
1310  {
1311  return this->lowerTriangular_;
1312  }
1313 
1314  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1315  bool
1317  isUpperTriangularImpl () const
1318  {
1319  return this->upperTriangular_;
1320  }
1321 #endif // TPETRA_ENABLE_DEPRECATED_CODE
1322 
1323  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1324  bool
1326  isLocallyIndexed () const
1327  {
1328  return indicesAreLocal_;
1329  }
1330 
1331  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1332  bool
1334  isGloballyIndexed () const
1335  {
1336  return indicesAreGlobal_;
1337  }
1338 
1339  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1340  size_t
1342  getNodeAllocationSize () const
1343  {
1344  typedef LocalOrdinal LO;
1345 
1346  if (this->indicesAreAllocated_) {
1347  const LO lclNumRows = this->getNodeNumRows ();
1348  if (lclNumRows == 0) {
1349  return static_cast<size_t> (0);
1350  }
1351  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED) {
1352  if (static_cast<LO> (this->lclGraph_.row_map.extent (0)) <
1353  static_cast<LO> (lclNumRows + 1)) {
1354  return static_cast<size_t> (0);
1355  }
1356  else {
1357  return ::Tpetra::Details::getEntryOnHost (this->lclGraph_.row_map, lclNumRows);
1358  }
1359  }
1360  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) {
1361  if (this->k_rowPtrs_.extent (0) == 0) {
1362  return static_cast<size_t> (0);
1363  }
1364  else {
1365  return ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, lclNumRows);
1366  }
1367  }
1368  else if (this->storageStatus_ == ::Tpetra::Details::STORAGE_2D) {
1369  size_t numAllocated = 0;
1370  if (this->isLocallyIndexed ()) {
1371  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1372  numAllocated += this->lclInds2D_[lclRow].size ();
1373  }
1374  }
1375  else if (this->isGloballyIndexed ()) {
1376  for (LocalOrdinal lclRow = 0; lclRow < lclNumRows; ++lclRow) {
1377  numAllocated += this->gblInds2D_[lclRow].size ();
1378  }
1379  }
1380  // Neither locally nor globally indexed, means no indices allocated.
1381  return numAllocated;
1382  }
1383  else {
1384  return static_cast<size_t> (0);
1385  }
1386  }
1387  else {
1388  return Tpetra::Details::OrdinalTraits<size_t>::invalid ();
1389  }
1390  }
1391 
1392  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1393  Teuchos::RCP<const Teuchos::Comm<int> >
1395  getComm () const
1396  {
1397  return this->rowMap_.is_null () ? Teuchos::null : this->rowMap_->getComm ();
1398  }
1399 
1400  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1401  GlobalOrdinal
1403  getIndexBase () const
1404  {
1405  return rowMap_->getIndexBase ();
1406  }
1407 
1408  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1409  bool
1411  indicesAreAllocated () const
1412  {
1413  return indicesAreAllocated_;
1414  }
1415 
1416  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1417  bool
1419  isSorted () const
1420  {
1421  return indicesAreSorted_;
1422  }
1423 
1424  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1425  bool
1427  isMerged () const
1428  {
1429  return noRedundancies_;
1430  }
1431 
1432  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1433  void
1436  {
1437  // FIXME (mfh 07 May 2013) How do we know that the change
1438  // introduced a redundancy, or even that it invalidated the sorted
1439  // order of indices? CrsGraph has always made this conservative
1440  // guess. It could be a bit costly to check at insertion time,
1441  // though.
1442  indicesAreSorted_ = false;
1443  noRedundancies_ = false;
1444 
1445  // We've modified the graph, so we'll have to recompute local
1446  // constants like the number of diagonal entries on this process.
1447  haveLocalConstants_ = false;
1448  }
1449 
1450  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1451  void
1453  allocateIndices (const ELocalGlobal lg)
1454  {
1455  using Teuchos::arcp;
1456  using Teuchos::Array;
1457  using Teuchos::ArrayRCP;
1458  typedef Teuchos::ArrayRCP<size_t>::size_type size_type;
1459  typedef typename local_graph_type::row_map_type::non_const_type
1460  non_const_row_map_type;
1461  typedef typename local_graph_type::entries_type::non_const_type
1462  lcl_col_inds_type;
1463  typedef Kokkos::View<GlobalOrdinal*,
1464  typename lcl_col_inds_type::array_layout,
1465  device_type> gbl_col_inds_type;
1466  const char tfecfFuncName[] = "allocateIndices: ";
1467  const char suffix[] = " Please report this bug to the Tpetra developers.";
1468 
1469  // This is a protected function, only callable by us. If it was
1470  // called incorrectly, it is our fault. That's why the tests
1471  // below throw std::logic_error instead of std::invalid_argument.
1472  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1473  (this->isLocallyIndexed () && lg == GlobalIndices, std::logic_error,
1474  "The graph is locally indexed, but Tpetra code is calling this method "
1475  "with lg=GlobalIndices." << suffix);
1476  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1477  (this->isGloballyIndexed () && lg == LocalIndices, std::logic_error,
1478  "The graph is globally indexed, but Tpetra code is calling this method "
1479  "with lg=LocalIndices. " << suffix);
1480  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1481  (this->indicesAreAllocated (), std::logic_error, "The graph's indices "
1482  "are already allocated, but Tpetra is calling allocateIndices again."
1483  << suffix);
1484  const size_t numRows = this->getNodeNumRows ();
1485 
1486  if (this->getProfileType () == StaticProfile) {
1487  //
1488  // STATIC ALLOCATION PROFILE
1489  //
1490  non_const_row_map_type k_rowPtrs ("Tpetra::CrsGraph::ptr", numRows + 1);
1491 
1492  if (this->k_numAllocPerRow_.extent (0) != 0) {
1493  // It's OK to throw std::invalid_argument here, because we
1494  // haven't incurred any side effects yet. Throwing that
1495  // exception (and not, say, std::logic_error) implies that the
1496  // instance can recover.
1497  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1498  (this->k_numAllocPerRow_.extent (0) != numRows,
1499  std::invalid_argument, "k_numAllocPerRow_ is allocated, that is, "
1500  "has nonzero length " << this->k_numAllocPerRow_.extent (0)
1501  << ", but its length != numRows = " << numRows << ".");
1503  // k_numAllocPerRow_ is a host View, but k_rowPtrs (the thing
1504  // we want to compute here) lives on device. That's OK;
1505  // computeOffsetsFromCounts can handle this case.
1507 
1508  // FIXME (mfh 27 Jun 2016) Currently, computeOffsetsFromCounts
1509  // doesn't attempt to check its input for "invalid" flag
1510  // values. For now, we omit that feature of the sequential
1511  // code disabled below.
1512  computeOffsetsFromCounts (k_rowPtrs, k_numAllocPerRow_);
1513  }
1514  else {
1515  // It's OK to throw std::invalid_argument here, because we
1516  // haven't incurred any side effects yet. Throwing that
1517  // exception (and not, say, std::logic_error) implies that the
1518  // instance can recover.
1519  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1520  (this->numAllocForAllRows_ ==
1521  Tpetra::Details::OrdinalTraits<size_t>::invalid (),
1522  std::invalid_argument, "numAllocForAllRows_ has an invalid value, "
1523  "namely Tpetra::Details::OrdinalTraits<size_t>::invalid() = " <<
1524  Tpetra::Details::OrdinalTraits<size_t>::invalid () << ".");
1525 
1527  computeOffsetsFromConstantCount (k_rowPtrs, this->numAllocForAllRows_);
1528  }
1530  // "Commit" the resulting row offsets.
1531  this->k_rowPtrs_ = k_rowPtrs;
1532 
1533  const size_type numInds = ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, numRows);
1534  // const size_type numInds = static_cast<size_type> (this->k_rowPtrs_(numRows));
1535  if (lg == LocalIndices) {
1536  k_lclInds1D_ = lcl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1537  }
1538  else {
1539  k_gblInds1D_ = gbl_col_inds_type ("Tpetra::CrsGraph::ind", numInds);
1540  }
1541  storageStatus_ = ::Tpetra::Details::STORAGE_1D_UNPACKED;
1542  }
1543  else {
1544  //
1545  // DYNAMIC ALLOCATION PROFILE
1546  //
1547  const bool useNumAllocPerRow =
1548  (this->k_numAllocPerRow_.extent (0) != 0);
1549 
1550  if (lg == LocalIndices) {
1551  this->lclInds2D_ = arcp<Array<LocalOrdinal> > (numRows);
1552  for (size_t i = 0; i < numRows; ++i) {
1553  const size_t howMany = useNumAllocPerRow ?
1554  this->k_numAllocPerRow_(i) :
1555  this->numAllocForAllRows_;
1556  if (howMany > 0) {
1557  this->lclInds2D_[i].resize (howMany);
1558  }
1559  }
1560  }
1561  else { // allocate global indices
1562  this->gblInds2D_ = arcp<Array<GlobalOrdinal> > (numRows);
1563  for (size_t i = 0; i < numRows; ++i) {
1564  const size_t howMany = useNumAllocPerRow ?
1565  this->k_numAllocPerRow_(i) :
1566  this->numAllocForAllRows_;
1567  if (howMany > 0) {
1568  this->gblInds2D_[i].resize (howMany);
1569  }
1570  }
1571  }
1572  this->storageStatus_ = ::Tpetra::Details::STORAGE_2D;
1573  }
1574 
1575  this->indicesAreLocal_ = (lg == LocalIndices);
1576  this->indicesAreGlobal_ = (lg == GlobalIndices);
1577 
1578  if (numRows > 0) { // reallocate k_numRowEntries_ & fill w/ 0s
1579  using Kokkos::ViewAllocateWithoutInitializing;
1580  typedef decltype (k_numRowEntries_) row_ent_type;
1581  const char label[] = "Tpetra::CrsGraph::numRowEntries";
1582 
1583  row_ent_type numRowEnt (ViewAllocateWithoutInitializing (label), numRows);
1584  Kokkos::deep_copy (numRowEnt, static_cast<size_t> (0)); // fill w/ 0s
1585  this->k_numRowEntries_ = numRowEnt; // "commit" our allocation
1586  }
1587 
1588  // Once indices are allocated, CrsGraph needs to free this information.
1589  this->numAllocForAllRows_ = 0;
1590  this->k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
1591  this->indicesAreAllocated_ = true;
1592 
1593  try {
1594  this->checkInternalState ();
1595  }
1596  catch (std::logic_error& e) {
1597  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1598  (true, std::logic_error, "At end of allocateIndices, "
1599  "checkInternalState threw std::logic_error: "
1600  << e.what ());
1601  }
1602  catch (std::exception& e) {
1603  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1604  (true, std::runtime_error, "At end of allocateIndices, "
1605  "checkInternalState threw std::exception: "
1606  << e.what ());
1607  }
1608  catch (...) {
1609  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
1610  (true, std::runtime_error, "At end of allocateIndices, "
1611  "checkInternalState threw an exception "
1612  "not a subclass of std::exception.");
1613  }
1614  }
1615 
1616  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1617  Teuchos::ArrayView<const LocalOrdinal>
1619  getLocalView (const RowInfo& rowinfo) const
1620  {
1621  using Kokkos::subview;
1622  typedef LocalOrdinal LO;
1623  typedef Kokkos::View<const LO*, execution_space,
1624  Kokkos::MemoryUnmanaged> row_view_type;
1625 
1626  if (rowinfo.allocSize == 0) {
1627  return Teuchos::ArrayView<const LO> ();
1628  }
1629  else { // nothing in the row to view
1630  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1631  const size_t start = rowinfo.offset1D;
1632  const size_t len = rowinfo.allocSize;
1633  const std::pair<size_t, size_t> rng (start, start + len);
1634  // mfh 23 Nov 2015: Don't just create a subview of
1635  // k_lclInds1D_ directly, because that first creates a
1636  // _managed_ subview, then returns an unmanaged version of
1637  // that. That touches the reference count, which costs
1638  // performance in a measurable way.
1639  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1640  const LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1641  return Teuchos::ArrayView<const LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1642  }
1643  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1644  return lclInds2D_[rowinfo.localRow] ();
1645  }
1646  else {
1647  return Teuchos::ArrayView<const LO> (); // nothing in the row to view
1648  }
1649  }
1650  }
1651 
1652  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1653  LocalOrdinal
1655  getLocalViewRawConst (const LocalOrdinal*& lclInds,
1656  LocalOrdinal& capacity,
1657  const RowInfo& rowInfo) const
1658  {
1659  lclInds = nullptr;
1660  capacity = 0;
1661 #ifdef HAVE_TPETRA_DEBUG
1662  constexpr bool debug = true;
1663 #else
1664  constexpr bool debug = false;
1665 #endif // HAVE_TPETRA_DEBUG
1666 
1667  if (rowInfo.allocSize != 0) {
1668  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1669  if (debug) {
1670  if (rowInfo.offset1D + rowInfo.allocSize >
1671  static_cast<size_t> (k_lclInds1D_.extent (0))) {
1672  return static_cast<LocalOrdinal> (-1);
1673  }
1674  }
1675  lclInds = &k_lclInds1D_[rowInfo.offset1D];
1676  capacity = rowInfo.allocSize;
1677  }
1678  else { // 2-D storage
1679  if (debug) {
1680  if (rowInfo.localRow >= static_cast<size_t> (lclInds2D_.size ())) {
1681  return static_cast<LocalOrdinal> (-1);
1682  }
1683  }
1684  // Use a const reference so we don't touch the ArrayRCP's ref
1685  // count, since ArrayRCP's ref count is not thread safe.
1686  const auto& curRow = lclInds2D_[rowInfo.localRow];
1687  if (! curRow.empty ()) {
1688  lclInds = curRow.getRawPtr ();
1689  capacity = curRow.size ();
1690  }
1691  }
1692  }
1693  return static_cast<LocalOrdinal> (0);
1694  }
1695 
1696  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1697  Teuchos::ArrayView<LocalOrdinal>
1699  getLocalViewNonConst (const RowInfo& rowinfo)
1700  {
1701  using Kokkos::subview;
1702  typedef LocalOrdinal LO;
1703  typedef Kokkos::View<LO*, execution_space,
1704  Kokkos::MemoryUnmanaged> row_view_type;
1705 
1706  if (rowinfo.allocSize == 0) { // nothing in the row to view
1707  return Teuchos::ArrayView<LO> ();
1708  }
1709  else {
1710  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1711  const size_t start = rowinfo.offset1D;
1712  const size_t len = rowinfo.allocSize;
1713  const std::pair<size_t, size_t> rng (start, start + len);
1714  // mfh 23 Nov 2015: Don't just create a subview of
1715  // k_lclInds1D_ directly, because that first creates a
1716  // _managed_ subview, then returns an unmanaged version of
1717  // that. That touches the reference count, which costs
1718  // performance in a measurable way.
1719  row_view_type rowView = subview (row_view_type (k_lclInds1D_), rng);
1720  LO* const rowViewRaw = (len == 0) ? nullptr : rowView.data ();
1721  return Teuchos::ArrayView<LO> (rowViewRaw, len, Teuchos::RCP_DISABLE_NODE_LOOKUP);
1722  }
1723  else if (! lclInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1724  return lclInds2D_[rowinfo.localRow] ();
1725  }
1726  else {
1727  return Teuchos::ArrayView<LO> (); // nothing in the row to view
1728  }
1729  }
1730  }
1731 
1732 
1733  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1734  Kokkos::View<const LocalOrdinal*,
1736  Kokkos::MemoryUnmanaged>
1738  getLocalKokkosRowView (const RowInfo& rowInfo) const
1739  {
1740  typedef LocalOrdinal LO;
1741  typedef Kokkos::View<const LO*, execution_space,
1742  Kokkos::MemoryUnmanaged> row_view_type;
1743 
1744  if (rowInfo.allocSize == 0) {
1745  return row_view_type ();
1746  }
1747  else { // nothing in the row to view
1748  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1749  const size_t start = rowInfo.offset1D;
1750  const size_t len = rowInfo.allocSize;
1751  const std::pair<size_t, size_t> rng (start, start + len);
1752  // mfh 23 Nov 2015: Don't just create a subview of
1753  // k_lclInds1D_ directly, because that first creates a
1754  // _managed_ subview, then returns an unmanaged version of
1755  // that. That touches the reference count, which costs
1756  // performance in a measurable way.
1757  return Kokkos::subview (row_view_type (k_lclInds1D_), rng);
1758  }
1759  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1760  // Use a reference, so that I don't touch the
1761  // Teuchos::ArrayView reference count in a debug build. (It
1762  // has no reference count in a release build.) This ensures
1763  // thread safety.
1764  //
1765  // lclInds2D_ lives on host, so this code does not assume UVM.
1766  Teuchos::Array<LO>& lclInds = this->lclInds2D_[rowInfo.localRow];
1767  return row_view_type (lclInds.getRawPtr (), lclInds.size ());
1768  }
1769  else {
1770  return row_view_type (); // nothing in the row to view
1771  }
1772  }
1773  }
1774 
1775 
1776  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1777  Kokkos::View<LocalOrdinal*,
1778  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1779  Kokkos::MemoryUnmanaged>
1780  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1781  getLocalKokkosRowViewNonConst (const RowInfo& rowInfo)
1782  {
1783  typedef LocalOrdinal LO;
1784  typedef Kokkos::View<LO*, execution_space,
1785  Kokkos::MemoryUnmanaged> row_view_type;
1786 
1787  if (rowInfo.allocSize == 0) {
1788  return row_view_type ();
1789  }
1790  else { // nothing in the row to view
1791  if (k_lclInds1D_.extent (0) != 0) { // 1-D storage
1792  const size_t start = rowInfo.offset1D;
1793  const size_t len = rowInfo.allocSize;
1794  const std::pair<size_t, size_t> rng (start, start + len);
1795  // mfh 23 Nov 2015: Don't just create a subview of
1796  // k_lclInds1D_ directly, because that first creates a
1797  // _managed_ subview, then returns an unmanaged version of
1798  // that. That touches the reference count, which costs
1799  // performance in a measurable way.
1800  return Kokkos::subview (row_view_type (this->k_lclInds1D_), rng);
1801  }
1802  else if (! this->lclInds2D_[rowInfo.localRow].empty ()) { // 2-D storage
1803  // Use a reference, so that I don't touch the
1804  // Teuchos::ArrayView reference count in a debug build. (It
1805  // has no reference count in a release build.) This ensures
1806  // thread safety.
1807  //
1808  // lclInds2D_ lives on host, so this code does not assume UVM.
1809  Teuchos::Array<LO>& cols = this->lclInds2D_[rowInfo.localRow];
1810  LO* const colsRaw = cols.getRawPtr ();
1811  return row_view_type (colsRaw, cols.size ());
1812  }
1813  else {
1814  return row_view_type (); // nothing in the row to view
1815  }
1816  }
1817  }
1818 
1819 
1820  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1821  Kokkos::View<const GlobalOrdinal*,
1822  typename CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::execution_space,
1823  Kokkos::MemoryUnmanaged>
1824  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
1825  getGlobalKokkosRowView (const RowInfo& rowinfo) const
1826  {
1827  typedef GlobalOrdinal GO;
1828  typedef Kokkos::View<const GO*, execution_space,
1829  Kokkos::MemoryUnmanaged> row_view_type;
1830 
1831  if (rowinfo.allocSize == 0) {
1832  return row_view_type ();
1833  }
1834  else { // nothing in the row to view
1835  if (this->k_gblInds1D_.extent (0) != 0) { // 1-D storage
1836  const size_t start = rowinfo.offset1D;
1837  const size_t len = rowinfo.allocSize;
1838  const std::pair<size_t, size_t> rng (start, start + len);
1839  // mfh 23 Nov 2015: Don't just create a subview of
1840  // k_gblInds1D_ directly, because that first creates a
1841  // _managed_ subview, then returns an unmanaged version of
1842  // that. That touches the reference count, which costs
1843  // performance in a measurable way.
1844  return Kokkos::subview (row_view_type (this->k_gblInds1D_), rng);
1845  }
1846  else if (! this->gblInds2D_[rowinfo.localRow].empty ()) { // 2-D storage
1847  // Use a reference, so that I don't touch the
1848  // Teuchos::ArrayView reference count in a debug build. (It
1849  // has no reference count in a release build.) This ensures
1850  // thread safety.
1851  //
1852  // gblInds2D_ lives on host, so this code does not assume UVM.
1853  Teuchos::Array<GO>& cols = this->gblInds2D_[rowinfo.localRow];
1854  return row_view_type (cols.getRawPtr (), cols.size ());
1855  }
1856  else {
1857  return row_view_type (); // nothing in the row to view
1858  }
1859  }
1860  }
1861 
1862 
1863  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1864  Teuchos::ArrayView<const GlobalOrdinal>
1866  getGlobalView (const RowInfo& rowinfo) const
1867  {
1868  Teuchos::ArrayView<const GlobalOrdinal> view;
1869  if (rowinfo.allocSize > 0) {
1870  if (k_gblInds1D_.extent (0) != 0) {
1871  auto rng = std::make_pair (rowinfo.offset1D,
1872  rowinfo.offset1D + rowinfo.allocSize);
1873  // mfh 23 Nov 2015: Don't just create a subview of
1874  // k_gblInds1D_ directly, because that first creates a
1875  // _managed_ subview, then returns an unmanaged version of
1876  // that. That touches the reference count, which costs
1877  // performance in a measurable way.
1878  Kokkos::View<const GlobalOrdinal*, execution_space,
1879  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1880  view = Kokkos::Compat::getConstArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1881  }
1882  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1883  view = gblInds2D_[rowinfo.localRow] ();
1884  }
1885  }
1886  return view;
1887  }
1888 
1889 
1890  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1891  LocalOrdinal
1893  getGlobalViewRawConst (const GlobalOrdinal*& gblInds,
1894  LocalOrdinal& capacity,
1895  const RowInfo& rowInfo) const
1896  {
1897  gblInds = nullptr;
1898  capacity = 0;
1899 #ifdef HAVE_TPETRA_DEBUG
1900  constexpr bool debug = true;
1901 #else
1902  constexpr bool debug = false;
1903 #endif // HAVE_TPETRA_DEBUG
1904 
1905  if (rowInfo.allocSize != 0) {
1906  if (k_gblInds1D_.extent (0) != 0) { // 1-D storage
1907  if (debug) {
1908  if (rowInfo.offset1D + rowInfo.allocSize >
1909  static_cast<size_t> (k_gblInds1D_.extent (0))) {
1910  return static_cast<LocalOrdinal> (-1);
1911  }
1912  }
1913  gblInds = &k_gblInds1D_[rowInfo.offset1D];
1914  capacity = rowInfo.allocSize;
1915  }
1916  else {
1917  if (debug) {
1918  if (rowInfo.localRow >= static_cast<size_t> (gblInds2D_.size ())) {
1919  return static_cast<LocalOrdinal> (-1);
1920  }
1921  }
1922  const auto& curRow = gblInds2D_[rowInfo.localRow];
1923  if (! curRow.empty ()) {
1924  gblInds = curRow.getRawPtr ();
1925  capacity = curRow.size ();
1926  }
1927  }
1928  }
1929  return static_cast<LocalOrdinal> (0);
1930  }
1931 
1932 
1933  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1934  Teuchos::ArrayView<GlobalOrdinal>
1936  getGlobalViewNonConst (const RowInfo& rowinfo)
1937  {
1938  Teuchos::ArrayView<GlobalOrdinal> view;
1939  if (rowinfo.allocSize > 0) {
1940  if (k_gblInds1D_.extent (0) != 0) {
1941  auto rng = std::make_pair (rowinfo.offset1D,
1942  rowinfo.offset1D + rowinfo.allocSize);
1943  // mfh 23 Nov 2015: Don't just create a subview of
1944  // k_gblInds1D_ directly, because that first creates a
1945  // _managed_ subview, then returns an unmanaged version of
1946  // that. That touches the reference count, which costs
1947  // performance in a measurable way.
1948  Kokkos::View<GlobalOrdinal*, execution_space,
1949  Kokkos::MemoryUnmanaged> k_gblInds1D_unmanaged = k_gblInds1D_;
1950  view = Kokkos::Compat::getArrayView (Kokkos::subview (k_gblInds1D_unmanaged, rng));
1951  }
1952  else if (! gblInds2D_[rowinfo.localRow].empty()) {
1953  view = gblInds2D_[rowinfo.localRow] ();
1954  }
1955  }
1956  return view;
1957  }
1958 
1959 
1960  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1961  RowInfo
1963  getRowInfo (const LocalOrdinal myRow) const
1964  {
1965  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
1966  RowInfo ret;
1967  if (this->rowMap_.is_null () || ! this->rowMap_->isNodeLocalElement (myRow)) {
1968  ret.localRow = STINV;
1969  ret.allocSize = 0;
1970  ret.numEntries = 0;
1971  ret.offset1D = STINV;
1972  return ret;
1973  }
1974 
1975  ret.localRow = static_cast<size_t> (myRow);
1976  if (this->indicesAreAllocated ()) {
1977  if (this->getProfileType () == StaticProfile) {
1978  // Offsets tell us the allocation size in this case.
1979  if (this->k_rowPtrs_.extent (0) == 0) {
1980  ret.offset1D = 0;
1981  ret.allocSize = 0;
1982  }
1983  else {
1984  ret.offset1D = this->k_rowPtrs_(myRow);
1985  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
1986  }
1987 
1988  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
1989  ret.allocSize :
1990  this->k_numRowEntries_(myRow);
1991  }
1992  else { // DynamicProfile
1993  ret.offset1D = STINV;
1994  if (this->isLocallyIndexed ()) {
1995  ret.allocSize = (this->lclInds2D_.size () == 0) ?
1996  size_t (0) :
1997  this->lclInds2D_[myRow].size ();
1998  }
1999  else if (this->isGloballyIndexed ()) {
2000  ret.allocSize = (this->gblInds2D_.size () == 0) ?
2001  size_t (0) :
2002  this->gblInds2D_[myRow].size ();
2003  }
2004  else { // neither locally nor globally indexed means no indices alloc'd
2005  ret.allocSize = 0;
2006  }
2007 
2008  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2009  size_t (0) :
2010  this->k_numRowEntries_(myRow);
2011  }
2012  }
2013  else { // haven't performed allocation yet; probably won't hit this code
2014  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
2015  // allocate, rather than doing lazy allocation at first insert.
2016  // This will make k_numAllocPerRow_ obsolete.
2017  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
2018  this->k_numAllocPerRow_(myRow) : // this is a host View
2019  this->numAllocForAllRows_;
2020  ret.numEntries = 0;
2021  ret.offset1D = STINV;
2022  }
2023 
2024  return ret;
2025  }
2026 
2027 
2028  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2029  RowInfo
2031  getRowInfoFromGlobalRowIndex (const GlobalOrdinal gblRow) const
2032  {
2033  const size_t STINV = Teuchos::OrdinalTraits<size_t>::invalid ();
2034  RowInfo ret;
2035  if (this->rowMap_.is_null ()) {
2036  ret.localRow = STINV;
2037  ret.allocSize = 0;
2038  ret.numEntries = 0;
2039  ret.offset1D = STINV;
2040  return ret;
2041  }
2042  const LocalOrdinal myRow = this->rowMap_->getLocalElement (gblRow);
2043  if (myRow == Teuchos::OrdinalTraits<LocalOrdinal>::invalid ()) {
2044  ret.localRow = STINV;
2045  ret.allocSize = 0;
2046  ret.numEntries = 0;
2047  ret.offset1D = STINV;
2048  return ret;
2049  }
2050 
2051  ret.localRow = static_cast<size_t> (myRow);
2052  if (this->indicesAreAllocated ()) {
2053  // graph data structures have the info that we need
2054  //
2055  // if static graph, offsets tell us the allocation size
2056  if (this->getProfileType() == StaticProfile) {
2057  if (this->k_rowPtrs_.extent (0) == 0) {
2058  ret.offset1D = 0;
2059  ret.allocSize = 0;
2060  }
2061  else {
2062  ret.offset1D = this->k_rowPtrs_(myRow);
2063  ret.allocSize = this->k_rowPtrs_(myRow+1) - this->k_rowPtrs_(myRow);
2064  }
2065 
2066  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2067  ret.allocSize :
2068  this->k_numRowEntries_(myRow);
2069  }
2070  else { // DynamicProfile
2071  ret.offset1D = STINV;
2072  if (this->isLocallyIndexed ()) {
2073  ret.allocSize = (this->lclInds2D_.size () == 0) ?
2074  size_t (0) :
2075  this->lclInds2D_[myRow].size ();
2076  }
2077  else {
2078  ret.allocSize = (this->gblInds2D_.size () == 0) ?
2079  size_t (0) :
2080  this->gblInds2D_[myRow].size ();
2081  }
2082 
2083  ret.numEntries = (this->k_numRowEntries_.extent (0) == 0) ?
2084  size_t (0) :
2085  this->k_numRowEntries_(myRow);
2086  }
2087  }
2088  else { // haven't performed allocation yet; probably won't hit this code
2089  // FIXME (mfh 07 Aug 2014) We want graph's constructors to
2090  // allocate, rather than doing lazy allocation at first insert.
2091  // This will make k_numAllocPerRow_ obsolete.
2092  ret.allocSize = (this->k_numAllocPerRow_.extent (0) != 0) ?
2093  this->k_numAllocPerRow_(myRow) : // this is a host View
2094  this->numAllocForAllRows_;
2095  ret.numEntries = 0;
2096  ret.offset1D = STINV;
2097  }
2098 
2099  return ret;
2100  }
2101 
2102 
2103  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2104  void
2105  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
2106  staticAssertions () const
2107  {
2108  using Teuchos::OrdinalTraits;
2109  typedef LocalOrdinal LO;
2110  typedef GlobalOrdinal GO;
2111  typedef global_size_t GST;
2112 
2113  // Assumption: sizeof(GlobalOrdinal) >= sizeof(LocalOrdinal):
2114  // This is so that we can store local indices in the memory
2115  // formerly occupied by global indices.
2116  static_assert (sizeof (GlobalOrdinal) >= sizeof (LocalOrdinal),
2117  "Tpetra::CrsGraph: sizeof(GlobalOrdinal) must be >= sizeof(LocalOrdinal).");
2118  // Assumption: max(size_t) >= max(LocalOrdinal)
2119  // This is so that we can represent any LocalOrdinal as a size_t.
2120  static_assert (sizeof (size_t) >= sizeof (LocalOrdinal),
2121  "Tpetra::CrsGraph: sizeof(size_t) must be >= sizeof(LocalOrdinal).");
2122  static_assert (sizeof(GST) >= sizeof(size_t),
2123  "Tpetra::CrsGraph: sizeof(Tpetra::global_size_t) must be >= sizeof(size_t).");
2124 
2125  // FIXME (mfh 30 Sep 2015) We're not using
2126  // Teuchos::CompileTimeAssert any more. Can we do these checks
2127  // with static_assert?
2128 
2129  // can't call max() with CompileTimeAssert, because it isn't a
2130  // constant expression; will need to make this a runtime check
2131  const char msg[] = "Tpetra::CrsGraph: Object cannot be created with the "
2132  "given template arguments: size assumptions are not valid.";
2133  TEUCHOS_TEST_FOR_EXCEPTION(
2134  static_cast<size_t> (Teuchos::OrdinalTraits<LO>::max ()) > Teuchos::OrdinalTraits<size_t>::max (),
2135  std::runtime_error, msg);
2136  TEUCHOS_TEST_FOR_EXCEPTION(
2137  static_cast<GST> (Teuchos::OrdinalTraits<LO>::max ()) > static_cast<GST> (Teuchos::OrdinalTraits<GO>::max ()),
2138  std::runtime_error, msg);
2139  TEUCHOS_TEST_FOR_EXCEPTION(
2140  static_cast<size_t> (Teuchos::OrdinalTraits<GO>::max ()) > Teuchos::OrdinalTraits<GST>::max(),
2141  std::runtime_error, msg);
2142  TEUCHOS_TEST_FOR_EXCEPTION(
2143  Teuchos::OrdinalTraits<size_t>::max () > Teuchos::OrdinalTraits<GST>::max (),
2144  std::runtime_error, msg);
2145  }
2146 
2147 
2148  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2149  size_t
2151  insertIndices (RowInfo& rowinfo,
2152  const SLocalGlobalViews &newInds,
2153  const ELocalGlobal lg,
2154  const ELocalGlobal I)
2155  {
2156  using Teuchos::ArrayView;
2157  typedef LocalOrdinal LO;
2158  typedef GlobalOrdinal GO;
2159  const char tfecfFuncName[] = "insertIndices: ";
2160 #ifdef HAVE_TPETRA_DEBUG
2161  constexpr bool debug = true;
2162 #else
2163  constexpr bool debug = false;
2164 #endif // HAVE_TPETRA_DEBUG
2165 
2166  size_t oldNumEnt = 0;
2167  if (debug) {
2168  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2169  (lg != GlobalIndices && lg != LocalIndices, std::invalid_argument,
2170  "lg must be either GlobalIndices or LocalIndices.");
2171  oldNumEnt = this->getNumEntriesInLocalRow (rowinfo.localRow);
2172  }
2173 
2174  size_t numNewInds = 0;
2175  if (lg == GlobalIndices) { // input indices are global
2176  ArrayView<const GO> new_ginds = newInds.ginds;
2177  numNewInds = new_ginds.size();
2178  if (I == GlobalIndices) { // store global indices
2179  ArrayView<GO> gind_view = this->getGlobalViewNonConst (rowinfo);
2180  if (debug) {
2181  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2182  (static_cast<size_t> (gind_view.size ()) <
2183  rowinfo.numEntries + numNewInds, std::logic_error,
2184  "gind_view.size() = " << gind_view.size ()
2185  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2186  << ") + numNewInds (= " << numNewInds << ").");
2187  }
2188  GO* const gblColInds_out = gind_view.getRawPtr () + rowinfo.numEntries;
2189  for (size_t k = 0; k < numNewInds; ++k) {
2190  gblColInds_out[k] = new_ginds[k];
2191  }
2192  }
2193  else if (I == LocalIndices) { // store local indices
2194  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2195  if (debug) {
2196  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2197  (static_cast<size_t> (lind_view.size ()) <
2198  rowinfo.numEntries + numNewInds, std::logic_error,
2199  "lind_view.size() = " << lind_view.size ()
2200  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2201  << ") + numNewInds (= " << numNewInds << ").");
2202  }
2203  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2204  for (size_t k = 0; k < numNewInds; ++k) {
2205  lclColInds_out[k] = colMap_->getLocalElement (new_ginds[k]);
2206  }
2207  }
2208  }
2209  else if (lg == LocalIndices) { // input indices are local
2210  ArrayView<const LO> new_linds = newInds.linds;
2211  numNewInds = new_linds.size();
2212  if (I == LocalIndices) { // store local indices
2213  ArrayView<LO> lind_view = this->getLocalViewNonConst (rowinfo);
2214  if (debug) {
2215  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2216  (static_cast<size_t> (lind_view.size ()) <
2217  rowinfo.numEntries + numNewInds, std::logic_error,
2218  "lind_view.size() = " << lind_view.size ()
2219  << " < rowinfo.numEntries (= " << rowinfo.numEntries
2220  << ") + numNewInds (= " << numNewInds << ").");
2221  }
2222  LO* const lclColInds_out = lind_view.getRawPtr () + rowinfo.numEntries;
2223  for (size_t k = 0; k < numNewInds; ++k) {
2224  lclColInds_out[k] = new_linds[k];
2225  }
2226  }
2227  else if (I == GlobalIndices) {
2228  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2229  (true, std::logic_error, "The case where the input indices are local "
2230  "and the indices to write are global (lg=LocalIndices, I="
2231  "GlobalIndices) is not implemented, because it does not make sense."
2232  << std::endl << "If you have correct local column indices, that "
2233  "means the graph has a column Map. In that case, you should be "
2234  "storing local indices.");
2235  }
2236  }
2237 
2238  rowinfo.numEntries += numNewInds;
2239  this->k_numRowEntries_(rowinfo.localRow) += numNewInds;
2240  this->setLocallyModified ();
2241 
2242  if (debug) {
2243  const size_t chkNewNumEnt =
2244  this->getNumEntriesInLocalRow (rowinfo.localRow);
2245  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2246  (chkNewNumEnt != oldNumEnt + numNewInds, std::logic_error,
2247  "chkNewNumEnt = " << chkNewNumEnt
2248  << " != oldNumEnt (= " << oldNumEnt
2249  << ") + numNewInds (= " << numNewInds << ").");
2250  }
2251 
2252  return numNewInds;
2253  }
2254 
2255  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2256  size_t
2258  insertGlobalIndicesImpl (const LocalOrdinal lclRow,
2259  const GlobalOrdinal inputGblColInds[],
2260  const size_t numInputInds)
2261  {
2262  return this->insertGlobalIndicesImpl (this->getRowInfo (lclRow),
2263  inputGblColInds, numInputInds);
2264  }
2265 
2266  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2267  size_t
2269  insertGlobalIndicesImpl (const RowInfo& rowInfo,
2270  const GlobalOrdinal inputGblColInds[],
2271  const size_t numInputInds,
2272  std::function<void(const size_t, const size_t, const size_t)> fun)
2273  {
2274  using Kokkos::View;
2275  using Kokkos::subview;
2276  using Kokkos::MemoryUnmanaged;
2277  using LO = LocalOrdinal;
2278  using GO = GlobalOrdinal;
2279  const char tfecfFuncName[] = "insertGlobalIndicesImpl: ";
2280 #ifdef HAVE_TPETRA_DEBUG
2281  constexpr bool debug = true;
2282 #else
2283  constexpr bool debug = false;
2284 #endif // HAVE_TPETRA_DEBUG
2285 
2286  const LO lclRow = static_cast<LO> (rowInfo.localRow);
2287 
2288  if (this->getProfileType () == StaticProfile) {
2289  auto numEntries = rowInfo.numEntries;
2290  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
2291  inp_view_type inputInds(inputGblColInds, numInputInds);
2292  size_t numInserted = Details::insertCrsIndices(lclRow, k_rowPtrs_,
2293  this->k_gblInds1D_, numEntries, inputInds, fun);
2294  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2295  numInserted == Teuchos::OrdinalTraits<size_t>::invalid(),
2296  std::runtime_error,
2297  "There is not enough capacity to insert indices in to row " << lclRow <<
2298  ". The upper bound on the number of entries in this row must be increased to "
2299  "accommodate one or more of the new indices.");
2300  this->k_numRowEntries_(lclRow) += numInserted;
2301  this->setLocallyModified();
2302  return numInserted;
2303  }
2304  else {
2305  // NOTE (DYNAMICPROFILE_REMOVAL) remove block
2306  size_t newNumEntries = rowInfo.numEntries + numInputInds; // preliminary
2307  if (newNumEntries > rowInfo.allocSize) {
2308  // update allocation, doubling size to reduce # reallocations
2309  size_t newAllocSize = 2*rowInfo.allocSize;
2310  if (newAllocSize < newNumEntries) {
2311  newAllocSize = newNumEntries;
2312  }
2313  this->gblInds2D_[lclRow].resize (newAllocSize);
2314  } // newNumEntries > rowInfo.allocSize
2315 
2316  // Copy new indices at end of global index array
2317  GO* const whereToPutGblColInds =
2318  this->gblInds2D_[lclRow].getRawPtr () + rowInfo.numEntries;
2319  for (size_t k_new = 0; k_new < numInputInds; ++k_new) {
2320  whereToPutGblColInds[k_new] = inputGblColInds[k_new];
2321  }
2322  this->k_numRowEntries_(lclRow) += numInputInds;
2323  this->setLocallyModified ();
2324 
2325  if (debug) {
2326  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (lclRow);
2327  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2328  (chkNewNumEntries != newNumEntries, std::logic_error,
2329  "getNumEntriesInLocalRow(lclRow=" << lclRow << ") = "
2330  << chkNewNumEntries << " != newNumEntries = " << newNumEntries
2331  << ". Please report this bug to the Tpetra developers.");
2332  }
2333  return numInputInds;
2334  }
2335  }
2336 
2337 
2338  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2339  void
2340  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
2341  insertLocalIndicesImpl (const LocalOrdinal myRow,
2342  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2343  std::function<void(const size_t, const size_t, const size_t)> fun)
2344  {
2345  using Kokkos::MemoryUnmanaged;
2346  using Kokkos::subview;
2347  using Kokkos::View;
2348  using LO = LocalOrdinal;
2349  const char tfecfFuncName[] = "insertLocallIndicesImpl: ";
2350 
2351  const RowInfo rowInfo = this->getRowInfo(myRow);
2352 
2353  size_t numNewInds = 0;
2354  size_t newNumEntries = 0;
2355 
2356  if (this->getProfileType () == StaticProfile) {
2357  auto numEntries = rowInfo.numEntries;
2358  // Note: Teuchos::ArrayViews are in HostSpace
2359  using inp_view_type = View<const LO*, Kokkos::HostSpace, MemoryUnmanaged>;
2360  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2361  auto numInserted = Details::insertCrsIndices(myRow, k_rowPtrs_,
2362  this->k_lclInds1D_, numEntries, inputInds, fun);
2363  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2364  numInserted == Teuchos::OrdinalTraits<size_t>::invalid(),
2365  std::runtime_error,
2366  "There is not enough capacity to insert indices in to row " << myRow <<
2367  ". The upper bound on the number of entries in this row must be increased to "
2368  "accommodate one or more of the new indices.");
2369  numNewInds = numInserted;
2370  newNumEntries = rowInfo.numEntries + numNewInds;
2371  }
2372  else {
2373  // NOTE (DYNAMICPROFILE_REMOVAL) remove block
2374  numNewInds = indices.size();
2375  newNumEntries = rowInfo.numEntries + numNewInds;
2376  if (newNumEntries > rowInfo.allocSize) {
2377  // update allocation, doubling size to reduce number of reallocations
2378  size_t newAllocSize = 2*rowInfo.allocSize;
2379  if (newAllocSize < newNumEntries) {
2380  newAllocSize = newNumEntries;
2381  }
2382  this->lclInds2D_[myRow].resize(newAllocSize);
2383  }
2384  std::copy (indices.begin (), indices.end (),
2385  this->lclInds2D_[myRow].begin () + rowInfo.numEntries);
2386  }
2387 
2388  this->k_numRowEntries_(myRow) += numNewInds;
2389  this->setLocallyModified ();
2390 
2391 #ifdef HAVE_TPETRA_DEBUG
2392  constexpr bool debug = true;
2393 #else
2394  constexpr bool debug = false;
2395 #endif // HAVE_TPETRA_DEBUG
2396 
2397  if (debug) {
2398  const size_t chkNewNumEntries = this->getNumEntriesInLocalRow (myRow);
2399  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2400  (chkNewNumEntries != newNumEntries, std::logic_error,
2401  "getNumEntriesInLocalRow(" << myRow << ") = " << chkNewNumEntries
2402  << " != newNumEntries = " << newNumEntries
2403  << ". Please report this bug to the Tpetra developers.");
2404  }
2405  }
2406 
2407 
2408  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2409  size_t
2411  findLocalIndices(const RowInfo& rowInfo,
2412  const Teuchos::ArrayView<const LocalOrdinal>& indices,
2413  std::function<void(const size_t, const size_t, const size_t)> fun) const
2414  {
2415 #ifdef HAVE_TPETRA_DEBUG
2416  const char tfecfFuncName[] = "findLocalIndices: ";
2417  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2418  (this->getProfileType() != StaticProfile, std::runtime_error,
2419  "findLocalIndices requires that the graph have StaticProfile.");
2420 #endif // HAVE_TPETRA_DEBUG
2421  using LO = LocalOrdinal;
2422  using inp_view_type = Kokkos::View<const LO*, Kokkos::HostSpace,
2423  Kokkos::MemoryUnmanaged>;
2424  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2425 
2426  size_t numFound = 0;
2427  LO lclRow = rowInfo.localRow;
2428  if (this->isLocallyIndexed())
2429  {
2430  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2431  this->k_lclInds1D_, inputInds, fun);
2432  }
2433  else if (this->isGloballyIndexed())
2434  {
2435  if (this->colMap_.is_null())
2436  return Teuchos::OrdinalTraits<size_t>::invalid();
2437  const auto& colMap = *(this->colMap_);
2438  auto map = [&](LO const lclInd){return colMap.getGlobalElement(lclInd);};
2439  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2440  this->k_gblInds1D_, inputInds, map, fun);
2441  }
2442  return numFound;
2443  }
2444 
2445 
2446  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2447  size_t
2449  findGlobalIndices(const RowInfo& rowInfo,
2450  const Teuchos::ArrayView<const GlobalOrdinal>& indices,
2451  std::function<void(const size_t, const size_t, const size_t)> fun) const
2452  {
2453  const char tfecfFuncName[] = "findGlobalIndices: ";
2454  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
2455  this->getProfileType() != StaticProfile,
2456  std::runtime_error,
2457  "findLocalIndices requires the graph have StaticProfile");
2458 
2459  using GO = GlobalOrdinal;
2460  using Kokkos::View;
2461  using Kokkos::MemoryUnmanaged;
2462  auto invalidCount = Teuchos::OrdinalTraits<size_t>::invalid();
2463 
2464  using inp_view_type = View<const GO*, execution_space, MemoryUnmanaged>;
2465  inp_view_type inputInds(indices.getRawPtr(), indices.size());
2466 
2467  size_t numFound = 0;
2468  LocalOrdinal lclRow = rowInfo.localRow;
2469  if (this->isLocallyIndexed())
2470  {
2471  if (this->colMap_.is_null())
2472  return invalidCount;
2473  const auto& colMap = *(this->colMap_);
2474  auto map = [&](GO const gblInd){return colMap.getLocalElement(gblInd);};
2475  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2476  this->k_lclInds1D_, inputInds, map, fun);
2477  }
2478  else if (this->isGloballyIndexed())
2479  {
2480  numFound = Details::findCrsIndices(lclRow, k_rowPtrs_, rowInfo.numEntries,
2481  this->k_gblInds1D_, inputInds, fun);
2482  }
2483  return numFound;
2484  }
2485 
2486 
2487  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2488  size_t
2490  sortAndMergeRowIndices (const RowInfo& rowInfo,
2491  const bool sorted,
2492  const bool merged)
2493  {
2494  const size_t origNumEnt = rowInfo.numEntries;
2495  if (origNumEnt != Tpetra::Details::OrdinalTraits<size_t>::invalid () &&
2496  origNumEnt != 0) {
2497  auto lclColInds = this->getLocalKokkosRowViewNonConst (rowInfo);
2498 
2499  LocalOrdinal* const lclColIndsRaw = lclColInds.data ();
2500  if (! sorted) {
2501  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2502  std::sort (lclColIndsRaw, lclColIndsRaw + origNumEnt);
2503  }
2504 
2505  if (! merged) {
2506  LocalOrdinal* const beg = lclColIndsRaw;
2507  LocalOrdinal* const end = beg + rowInfo.numEntries;
2508  // FIXME (mfh 08 May 2017) This assumes CUDA UVM.
2509  LocalOrdinal* const newend = std::unique (beg, end);
2510  const size_t newNumEnt = newend - beg;
2511 
2512  // NOTE (mfh 08 May 2017) This is a host View, so it does not assume UVM.
2513  this->k_numRowEntries_(rowInfo.localRow) = newNumEnt;
2514  return origNumEnt - newNumEnt; // the number of duplicates in the row
2515  }
2516  else {
2517  return static_cast<size_t> (0); // assume no duplicates
2518  }
2519  }
2520  else {
2521  return static_cast<size_t> (0); // no entries in the row
2522  }
2523  }
2524 
2525 
2526  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2527  void
2529  setDomainRangeMaps (const Teuchos::RCP<const map_type>& domainMap,
2530  const Teuchos::RCP<const map_type>& rangeMap)
2531  {
2532  // simple pointer comparison for equality
2533  if (domainMap_ != domainMap) {
2534  domainMap_ = domainMap;
2535  importer_ = Teuchos::null;
2536  }
2537  if (rangeMap_ != rangeMap) {
2538  rangeMap_ = rangeMap;
2539  exporter_ = Teuchos::null;
2540  }
2541  }
2542 
2543 
2544  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2545  void
2548  {
2549  globalNumEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2550  globalNumDiags_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2551  globalMaxNumRowEntries_ = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2552  haveGlobalConstants_ = false;
2553  }
2554 
2555 
2556  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2557  void
2559  checkInternalState () const
2560  {
2561  const bool debug = ::Tpetra::Details::Behavior::debug ();
2562  if (debug) {
2563  const char tfecfFuncName[] = "checkInternalState: ";
2564  const char suffix[] = " Please report this bug to the Tpetra developers.";
2565 
2566  const global_size_t GSTI = Teuchos::OrdinalTraits<global_size_t>::invalid ();
2567  //const size_t STI = Teuchos::OrdinalTraits<size_t>::invalid (); // unused
2568  // check the internal state of this data structure
2569  // this is called by numerous state-changing methods, in a debug build, to ensure that the object
2570  // always remains in a valid state
2571 
2572  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2573  (this->rowMap_.is_null (), std::logic_error,
2574  "Row Map is null." << suffix);
2575  // This may access the row Map, so we need to check first (above)
2576  // whether the row Map is null.
2577  const LocalOrdinal lclNumRows =
2578  static_cast<LocalOrdinal> (this->getNodeNumRows ());
2579 
2580  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2581  (this->isFillActive () == this->isFillComplete (), std::logic_error,
2582  "Graph cannot be both fill active and fill complete." << suffix);
2583  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2584  (this->isFillComplete () &&
2585  (this->colMap_.is_null () ||
2586  this->rangeMap_.is_null () ||
2587  this->domainMap_.is_null ()),
2588  std::logic_error,
2589  "Graph is full complete, but at least one of {column, range, domain} "
2590  "Map is null." << suffix);
2591  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2592  (this->isStorageOptimized () && ! this->indicesAreAllocated (),
2593  std::logic_error, "Storage is optimized, but indices are not "
2594  "allocated, not even trivially." << suffix);
2595  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2596  (this->indicesAreAllocated_ &&
2597  (this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_PACKED ||
2598  this->storageStatus_ == ::Tpetra::Details::STORAGE_1D_UNPACKED) &&
2599  this->pftype_ != StaticProfile, std::logic_error,
2600  "Graph claims to have allocated indices and 1-D storage "
2601  "(either packed or unpacked), but also claims to be DynamicProfile.");
2602  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2603  (this->indicesAreAllocated_ &&
2604  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2605  this->pftype_ == StaticProfile, std::logic_error,
2606  "Graph claims to have allocated indices and 2-D storage, "
2607  "but also claims to be StaticProfile.");
2608  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2609  (this->indicesAreAllocated_ &&
2610  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2611  this->isLocallyIndexed () &&
2612  static_cast<LocalOrdinal> (this->lclInds2D_.size ()) != lclNumRows,
2613  std::logic_error,
2614  "Graph claims to have allocated indices, be locally indexed, and have "
2615  "2-D storage, but lclInds2D_.size() = " << this->lclInds2D_.size ()
2616  << " != getNodeNumRows() = " << lclNumRows << ".");
2617  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2618  (this->indicesAreAllocated_ &&
2619  this->storageStatus_ == ::Tpetra::Details::STORAGE_2D &&
2620  this->isGloballyIndexed () &&
2621  static_cast<LocalOrdinal> (this->gblInds2D_.size ()) != lclNumRows,
2622  std::logic_error,
2623  "Graph claims to have allocated indices, be globally indexed, and have "
2624  "2-D storage, but gblInds2D_.size() = " << this->gblInds2D_.size ()
2625  << " != getNodeNumRows() = " << lclNumRows << ".");
2626 
2627  size_t nodeAllocSize = 0;
2628  try {
2629  nodeAllocSize = this->getNodeAllocationSize ();
2630  }
2631  catch (std::logic_error& e) {
2632  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2633  (true, std::runtime_error, "getNodeAllocationSize threw "
2634  "std::logic_error: " << e.what ());
2635  }
2636  catch (std::exception& e) {
2637  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2638  (true, std::runtime_error, "getNodeAllocationSize threw an "
2639  "std::exception: " << e.what ());
2640  }
2641  catch (...) {
2642  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2643  (true, std::runtime_error, "getNodeAllocationSize threw an exception "
2644  "not a subclass of std::exception.");
2645  }
2646 
2647  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2648  (this->isStorageOptimized () &&
2649  nodeAllocSize != this->getNodeNumEntries (),
2650  std::logic_error, "Storage is optimized, but "
2651  "this->getNodeAllocationSize() = " << nodeAllocSize
2652  << " != this->getNodeNumEntries() = " << this->getNodeNumEntries ()
2653  << "." << suffix);
2654  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2655  (! this->haveGlobalConstants_ &&
2656  (this->globalNumEntries_ != GSTI ||
2657  this->globalMaxNumRowEntries_ != GSTI),
2658  std::logic_error, "Graph claims not to have global constants, but "
2659  "some of the global constants are not marked as invalid." << suffix);
2660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2661  (this->haveGlobalConstants_ &&
2662  (this->globalNumEntries_ == GSTI ||
2663  this->globalMaxNumRowEntries_ == GSTI),
2664  std::logic_error, "Graph claims to have global constants, but "
2665  "some of them are marked as invalid." << suffix);
2666  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2667  (this->haveGlobalConstants_ &&
2668  (this->globalNumEntries_ < this->getNodeNumEntries () ||
2669  this->globalMaxNumRowEntries_ < this->nodeMaxNumRowEntries_),
2670  std::logic_error, "Graph claims to have global constants, and "
2671  "all of the values of the global constants are valid, but "
2672  "some of the local constants are greater than "
2673  "their corresponding global constants." << suffix);
2674  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2675  (this->indicesAreAllocated () &&
2676  (this->numAllocForAllRows_ != 0 ||
2677  this->k_numAllocPerRow_.extent (0) != 0),
2678  std::logic_error, "The graph claims that its indices are allocated, but "
2679  "either numAllocForAllRows_ (= " << this->numAllocForAllRows_ << ") is "
2680  "nonzero, or k_numAllocPerRow_ has nonzero dimension. In other words, "
2681  "the graph is supposed to release its \"allocation specifications\" "
2682  "when it allocates its indices." << suffix);
2683  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2684  (this->isStorageOptimized () && this->pftype_ != StaticProfile,
2685  std::logic_error,
2686  "Storage is optimized, but graph is not StaticProfile." << suffix);
2687  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2688  (this->isGloballyIndexed () &&
2689  this->k_rowPtrs_.extent (0) != 0 &&
2690  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2691  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_gblInds1D_.extent (0))),
2692  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2693  "the graph is globally indexed, then "
2694  "k_rowPtrs_ must have N+1 rows, and "
2695  "k_rowPtrs_(N) must equal k_gblInds1D_.extent(0)." << suffix);
2696  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2697  (this->isLocallyIndexed () &&
2698  this->k_rowPtrs_.extent (0) != 0 &&
2699  (static_cast<size_t> (k_rowPtrs_.extent (0)) != static_cast<size_t> (lclNumRows + 1) ||
2700  this->k_rowPtrs_(lclNumRows) != static_cast<size_t> (this->k_lclInds1D_.extent (0))),
2701  std::logic_error, "If k_rowPtrs_ has nonzero size and "
2702  "the graph is locally indexed, then "
2703  "k_rowPtrs_ must have N+1 rows, and "
2704  "k_rowPtrs_(N) must equal k_lclInds1D_.extent(0)." << suffix);
2705 
2706  if (this->pftype_ != StaticProfile) {
2707  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2708  (this->indicesAreAllocated () &&
2709  this->getNodeNumRows () > 0 &&
2710  this->lclInds2D_.is_null () &&
2711  this->gblInds2D_.is_null (),
2712  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2713  "the calling process has nonzero rows, but 2-D column index storage "
2714  "(whether local or global) is not present." << suffix);
2715  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2716  (this->indicesAreAllocated () &&
2717  this->getNodeNumRows () > 0 &&
2718  this->k_numRowEntries_.extent (0) == 0,
2719  std::logic_error, "Graph has DynamicProfile, indices are allocated, and "
2720  "the calling process has nonzero rows, but k_numRowEntries_ is not "
2721  "present." << suffix);
2722  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2723  (this->k_lclInds1D_.extent (0) != 0 ||
2724  this->k_gblInds1D_.extent (0) != 0,
2725  std::logic_error, "Graph has DynamicProfile, but "
2726  "1-D allocations are present." << suffix);
2727  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2728  (this->k_rowPtrs_.extent (0) != 0,
2729  std::logic_error, "Graph has DynamicProfile, but "
2730  "row offsets are present." << suffix);
2731  }
2732  else if (this->pftype_ == StaticProfile) {
2733  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2734  (this->indicesAreAllocated () &&
2735  nodeAllocSize > 0 &&
2736  this->k_lclInds1D_.extent (0) == 0 &&
2737  this->k_gblInds1D_.extent (0) == 0,
2738  std::logic_error, "Graph has StaticProfile and is allocated "
2739  "nonnontrivally, but 1-D allocations are not present." << suffix);
2740  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2741  (this->lclInds2D_ != Teuchos::null || this->gblInds2D_ != Teuchos::null,
2742  std::logic_error, "Graph has StaticProfile, but 2-D allocations are "
2743  "present." << suffix);
2744  }
2745 
2746  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2747  (! this->indicesAreAllocated () &&
2748  ((this->k_rowPtrs_.extent (0) != 0 ||
2749  this->k_numRowEntries_.extent (0) != 0) ||
2750  this->k_lclInds1D_.extent (0) != 0 ||
2751  this->lclInds2D_ != Teuchos::null ||
2752  this->k_gblInds1D_.extent (0) != 0 ||
2753  this->gblInds2D_ != Teuchos::null),
2754  std::logic_error, "If indices are not allocated, "
2755  "then none of the buffers should be." << suffix);
2756  // indices may be local or global only if they are allocated
2757  // (numAllocated is redundant; could simply be indicesAreLocal_ ||
2758  // indicesAreGlobal_)
2759  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2760  ((this->indicesAreLocal_ || this->indicesAreGlobal_) &&
2761  ! this->indicesAreAllocated_,
2762  std::logic_error, "Indices may be local or global only if they are "
2763  "allocated." << suffix);
2764  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2765  (this->indicesAreLocal_ && this->indicesAreGlobal_,
2766  std::logic_error, "Indices may not be both local and global." << suffix);
2767  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2768  (this->indicesAreLocal_ &&
2769  (this->k_gblInds1D_.extent (0) != 0 || ! this->gblInds2D_.is_null ()),
2770  std::logic_error, "Indices are local, but either "
2771  "k_gblInds1D_.extent(0) (= "
2772  << this->k_gblInds1D_.extent (0) << ") != 0, or "
2773  "gblInds2D_ is not null. In other words, if indices are local, "
2774  "then global allocations should not be present." << suffix);
2775  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2776  (this->indicesAreGlobal_ &&
2777  (this->k_lclInds1D_.extent (0) != 0 ||
2778  ! this->lclInds2D_.is_null ()),
2779  std::logic_error, "Indices are global, but either "
2780  "k_lclInds1D_.extent(0) (= "
2781  << this->k_lclInds1D_.extent (0) << ") != 0, or "
2782  "lclInds2D_ is not null. In other words, if indices are global, "
2783  "then local allocations should not be present." << suffix);
2784  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2785  (this->indicesAreLocal_ &&
2786  nodeAllocSize > 0 &&
2787  this->k_lclInds1D_.extent (0) == 0 &&
2788  this->getNodeNumRows () > 0 &&
2789  this->lclInds2D_.is_null (),
2790  std::logic_error, "Indices are local, getNodeAllocationSize() = "
2791  << nodeAllocSize << " > 0, k_lclInds1D_.extent(0) = 0, "
2792  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2793  "lclInds2D_ is null." << suffix);
2794  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2795  (this->indicesAreGlobal_ &&
2796  nodeAllocSize > 0 &&
2797  this->k_gblInds1D_.extent (0) == 0 &&
2798  this->getNodeNumRows () > 0 &&
2799  this->gblInds2D_.is_null (),
2800  std::logic_error, "Indices are global, getNodeAllocationSize() = "
2801  << nodeAllocSize << " > 0, k_gblInds1D_.extent(0) = 0, "
2802  "getNodeNumRows() = " << this->getNodeNumRows () << " > 0, and "
2803  "gblInds2D_ is null." << suffix);
2804  // check the actual allocations
2805  if (this->indicesAreAllocated () &&
2806  this->pftype_ == StaticProfile &&
2807  this->k_rowPtrs_.extent (0) != 0) {
2808  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2809  (static_cast<size_t> (this->k_rowPtrs_.extent (0)) !=
2810  this->getNodeNumRows () + 1,
2811  std::logic_error, "Graph is StaticProfile, indices are allocated, and "
2812  "k_rowPtrs_ has nonzero length, but k_rowPtrs_.extent(0) = "
2813  << this->k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = "
2814  << (this->getNodeNumRows () + 1) << "." << suffix);
2815  const size_t actualNumAllocated =
2816  ::Tpetra::Details::getEntryOnHost (this->k_rowPtrs_, this->getNodeNumRows ());
2817  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2818  (this->isLocallyIndexed () &&
2819  static_cast<size_t> (this->k_lclInds1D_.extent (0)) != actualNumAllocated,
2820  std::logic_error, "Graph is StaticProfile and locally indexed, "
2821  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2822  "k_lclInds1D_.extent(0) = " << this->k_lclInds1D_.extent (0)
2823  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2824  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
2825  (this->isGloballyIndexed () &&
2826  static_cast<size_t> (this->k_gblInds1D_.extent (0)) != actualNumAllocated,
2827  std::logic_error, "Graph is StaticProfile and globally indexed, "
2828  "indices are allocated, and k_rowPtrs_ has nonzero length, but "
2829  "k_gblInds1D_.extent(0) = " << this->k_gblInds1D_.extent (0)
2830  << " != actualNumAllocated = " << actualNumAllocated << suffix);
2831  }
2832  }
2833  }
2834 
2835 
2836  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2837  size_t
2839  getNumEntriesInGlobalRow (GlobalOrdinal globalRow) const
2840  {
2841  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2842  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2843  return Teuchos::OrdinalTraits<size_t>::invalid ();
2844  }
2845  else {
2846  return rowInfo.numEntries;
2847  }
2848  }
2849 
2850 
2851  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2852  size_t
2854  getNumEntriesInLocalRow (LocalOrdinal localRow) const
2855  {
2856  const RowInfo rowInfo = this->getRowInfo (localRow);
2857  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2858  return Teuchos::OrdinalTraits<size_t>::invalid ();
2859  }
2860  else {
2861  return rowInfo.numEntries;
2862  }
2863  }
2864 
2865 
2866  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2867  size_t
2869  getNumAllocatedEntriesInGlobalRow (GlobalOrdinal globalRow) const
2870  {
2871  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (globalRow);
2872  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2873  return Teuchos::OrdinalTraits<size_t>::invalid ();
2874  }
2875  else {
2876  return rowInfo.allocSize;
2877  }
2878  }
2879 
2880 
2881  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2882  size_t
2884  getNumAllocatedEntriesInLocalRow (LocalOrdinal localRow) const
2885  {
2886  const RowInfo rowInfo = this->getRowInfo (localRow);
2887  if (rowInfo.localRow == Teuchos::OrdinalTraits<size_t>::invalid ()) {
2888  return Teuchos::OrdinalTraits<size_t>::invalid ();
2889  }
2890  else {
2891  return rowInfo.allocSize;
2892  }
2893  }
2894 
2895 
2896  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2897  Teuchos::ArrayRCP<const size_t>
2899  getNodeRowPtrs () const
2900  {
2901  using Kokkos::ViewAllocateWithoutInitializing;
2902  using Kokkos::create_mirror_view;
2903  using Teuchos::ArrayRCP;
2904  typedef typename local_graph_type::row_map_type row_map_type;
2905  typedef typename row_map_type::non_const_value_type row_offset_type;
2906  const char prefix[] = "Tpetra::CrsGraph::getNodeRowPtrs: ";
2907  const char suffix[] = " Please report this bug to the Tpetra developers.";
2908  const bool debug = ::Tpetra::Details::Behavior::debug ();
2909 
2910  const size_t size = k_rowPtrs_.extent (0);
2911  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
2912 
2913  if (size == 0) {
2914  return ArrayRCP<const size_t> ();
2915  }
2916 
2917  ArrayRCP<const row_offset_type> ptr_rot;
2918  ArrayRCP<const size_t> ptr_st;
2919  if (same) { // size_t == row_offset_type
2920  // NOTE (mfh 22 Mar 2015) In a debug build of Kokkos, the result
2921  // of create_mirror_view might actually be a new allocation.
2922  // This helps with debugging when there are two memory spaces.
2923  typename row_map_type::HostMirror ptr_h = create_mirror_view (k_rowPtrs_);
2924  Kokkos::deep_copy (ptr_h, k_rowPtrs_);
2925  if (debug) {
2926  TEUCHOS_TEST_FOR_EXCEPTION
2927  (ptr_h.extent (0) != k_rowPtrs_.extent (0), std::logic_error,
2928  prefix << "size_t == row_offset_type, but ptr_h.extent(0) = "
2929  << ptr_h.extent (0) << " != k_rowPtrs_.extent(0) = "
2930  << k_rowPtrs_.extent (0) << ".");
2931  TEUCHOS_TEST_FOR_EXCEPTION
2932  (same && size != 0 && k_rowPtrs_.data () == nullptr, std::logic_error,
2933  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2934  << size << " != 0, but k_rowPtrs_.data() == nullptr." << suffix);
2935  TEUCHOS_TEST_FOR_EXCEPTION
2936  (same && size != 0 && ptr_h.data () == nullptr, std::logic_error,
2937  prefix << "size_t == row_offset_type and k_rowPtrs_.extent(0) = "
2938  << size << " != 0, but create_mirror_view(k_rowPtrs_).data() "
2939  "== nullptr." << suffix);
2940  }
2941  ptr_rot = Kokkos::Compat::persistingView (ptr_h);
2942  }
2943  else { // size_t != row_offset_type
2944  typedef Kokkos::View<size_t*, device_type> ret_view_type;
2945  ret_view_type ptr_d (ViewAllocateWithoutInitializing ("ptr"), size);
2946  ::Tpetra::Details::copyOffsets (ptr_d, k_rowPtrs_);
2947  typename ret_view_type::HostMirror ptr_h = create_mirror_view (ptr_d);
2948  Kokkos::deep_copy (ptr_h, ptr_d);
2949  ptr_st = Kokkos::Compat::persistingView (ptr_h);
2950  }
2951  if (debug) {
2952  TEUCHOS_TEST_FOR_EXCEPTION
2953  (same && size != 0 && ptr_rot.is_null (), std::logic_error,
2954  prefix << "size_t == row_offset_type and size = " << size
2955  << " != 0, but ptr_rot is null." << suffix);
2956  TEUCHOS_TEST_FOR_EXCEPTION
2957  (! same && size != 0 && ptr_st.is_null (), std::logic_error,
2958  prefix << "size_t != row_offset_type and size = " << size
2959  << " != 0, but ptr_st is null." << suffix);
2960  }
2961 
2962  // If size_t == row_offset_type, return a persisting host view of
2963  // k_rowPtrs_. Otherwise, return a size_t host copy of k_rowPtrs_.
2964  ArrayRCP<const size_t> retval =
2965  Kokkos::Impl::if_c<same,
2966  ArrayRCP<const row_offset_type>,
2967  ArrayRCP<const size_t> >::select (ptr_rot, ptr_st);
2968  if (debug) {
2969  TEUCHOS_TEST_FOR_EXCEPTION
2970  (size != 0 && retval.is_null (), std::logic_error,
2971  prefix << "size = " << size << " != 0, but retval is null." << suffix);
2972  }
2973  return retval;
2974  }
2975 
2976 
2977  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2978  Teuchos::ArrayRCP<const LocalOrdinal>
2980  getNodePackedIndices () const
2981  {
2982  return Kokkos::Compat::persistingView (k_lclInds1D_);
2983  }
2984 
2985 
2986  template <class LocalOrdinal, class GlobalOrdinal, class Node>
2987  void
2989  getLocalRowCopy (LocalOrdinal localRow,
2990  const Teuchos::ArrayView<LocalOrdinal>&indices,
2991  size_t& numEntries) const
2992  {
2993  using Teuchos::ArrayView;
2994  typedef LocalOrdinal LO;
2995  typedef GlobalOrdinal GO;
2996  const char tfecfFuncName[] = "getLocalRowCopy: ";
2997 
2998  TEUCHOS_TEST_FOR_EXCEPTION(
2999  isGloballyIndexed () && ! hasColMap (), std::runtime_error,
3000  "Tpetra::CrsGraph::getLocalRowCopy: The graph is globally indexed and "
3001  "does not have a column Map yet. That means we don't have local indices "
3002  "for columns yet, so it doesn't make sense to call this method. If the "
3003  "graph doesn't have a column Map yet, you should call fillComplete on "
3004  "it first.");
3005 
3006  // This does the right thing (reports an empty row) if the input
3007  // row is invalid.
3008  const RowInfo rowinfo = this->getRowInfo (localRow);
3009  // No side effects on error.
3010  const size_t theNumEntries = rowinfo.numEntries;
3011  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3012  (static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
3013  "Specified storage (size==" << indices.size () << ") does not suffice "
3014  "to hold all " << theNumEntries << " entry/ies for this row.");
3015  numEntries = theNumEntries;
3016 
3017  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3018  if (isLocallyIndexed ()) {
3019  ArrayView<const LO> lview = getLocalView (rowinfo);
3020  for (size_t j = 0; j < theNumEntries; ++j) {
3021  indices[j] = lview[j];
3022  }
3023  }
3024  else if (isGloballyIndexed ()) {
3025  ArrayView<const GO> gview = getGlobalView (rowinfo);
3026  for (size_t j = 0; j < theNumEntries; ++j) {
3027  indices[j] = colMap_->getLocalElement (gview[j]);
3028  }
3029  }
3030  }
3031  }
3032 
3033 
3034  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3035  void
3037  getGlobalRowCopy (GlobalOrdinal globalRow,
3038  const Teuchos::ArrayView<GlobalOrdinal>& indices,
3039  size_t& numEntries) const
3040  {
3041  using Teuchos::ArrayView;
3042  const char tfecfFuncName[] = "getGlobalRowCopy: ";
3043 
3044  // This does the right thing (reports an empty row) if the input
3045  // row is invalid.
3046  const RowInfo rowinfo = getRowInfoFromGlobalRowIndex (globalRow);
3047  const size_t theNumEntries = rowinfo.numEntries;
3048  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3049  static_cast<size_t> (indices.size ()) < theNumEntries, std::runtime_error,
3050  "Specified storage (size==" << indices.size () << ") does not suffice "
3051  "to hold all " << theNumEntries << " entry/ies for this row.");
3052  numEntries = theNumEntries; // first side effect
3053 
3054  if (rowinfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid ()) {
3055  if (isLocallyIndexed ()) {
3056  ArrayView<const LocalOrdinal> lview = getLocalView (rowinfo);
3057  for (size_t j = 0; j < theNumEntries; ++j) {
3058  indices[j] = colMap_->getGlobalElement (lview[j]);
3059  }
3060  }
3061  else if (isGloballyIndexed ()) {
3062  ArrayView<const GlobalOrdinal> gview = getGlobalView (rowinfo);
3063  for (size_t j = 0; j < theNumEntries; ++j) {
3064  indices[j] = gview[j];
3065  }
3066  }
3067  }
3068  }
3069 
3070 
3071  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3072  void
3074  getLocalRowView (const LocalOrdinal localRow,
3075  Teuchos::ArrayView<const LocalOrdinal>& indices) const
3076  {
3077  const char tfecfFuncName[] = "getLocalRowView: ";
3078 #ifdef HAVE_TPETRA_DEBUG
3079  constexpr bool debug = true;
3080 #else
3081  constexpr bool debug = false;
3082 #endif // HAVE_TPETRA_DEBUG
3083 
3084  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3085  (isGloballyIndexed (), std::runtime_error, "The graph's indices are "
3086  "currently stored as global indices, so we cannot return a view with "
3087  "local column indices, whether or not the graph has a column Map. If "
3088  "the graph _does_ have a column Map, use getLocalRowCopy() instead.");
3089 
3090  // This does the right thing (reports an empty row) if the input
3091  // row is invalid.
3092  const RowInfo rowInfo = getRowInfo (localRow);
3093  indices = Teuchos::null;
3094  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3095  rowInfo.numEntries > 0) {
3096  indices = this->getLocalView (rowInfo);
3097  // getLocalView returns a view of the _entire_ row, including
3098  // any extra space at the end (which 1-D unpacked storage
3099  // might have, for example). That's why we have to take a
3100  // subview of the returned view.
3101  indices = indices (0, rowInfo.numEntries);
3102  }
3103 
3104  if (debug) {
3105  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3106  (static_cast<size_t> (indices.size ()) !=
3107  getNumEntriesInLocalRow (localRow), std::logic_error, "indices.size() "
3108  "= " << indices.size () << " != getNumEntriesInLocalRow(localRow=" <<
3109  localRow << ") = " << getNumEntriesInLocalRow (localRow) <<
3110  ". Please report this bug to the Tpetra developers.");
3111  }
3112  }
3113 
3114 
3115  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3116  void
3118  getGlobalRowView (const GlobalOrdinal globalRow,
3119  Teuchos::ArrayView<const GlobalOrdinal>& indices) const
3120  {
3121  const char tfecfFuncName[] = "getGlobalRowView: ";
3122 #ifdef HAVE_TPETRA_DEBUG
3123  constexpr bool debug = true;
3124 #else
3125  constexpr bool debug = false;
3126 #endif // HAVE_TPETRA_DEBUG
3127 
3128  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3129  (isLocallyIndexed (), std::runtime_error, "The graph's indices are "
3130  "currently stored as local indices, so we cannot return a view with "
3131  "global column indices. Use getGlobalRowCopy() instead.");
3132 
3133  // This does the right thing (reports an empty row) if the input
3134  // row is invalid.
3135  const RowInfo rowInfo = getRowInfoFromGlobalRowIndex (globalRow);
3136  indices = Teuchos::null;
3137  if (rowInfo.localRow != Teuchos::OrdinalTraits<size_t>::invalid () &&
3138  rowInfo.numEntries > 0) {
3139  indices = (this->getGlobalView (rowInfo)) (0, rowInfo.numEntries);
3140  }
3141 
3142  if (debug) {
3143  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3144  (static_cast<size_t> (indices.size ()) !=
3145  getNumEntriesInGlobalRow (globalRow),
3146  std::logic_error, "indices.size() = " << indices.size ()
3147  << " != getNumEntriesInGlobalRow(globalRow=" << globalRow << ") = "
3148  << getNumEntriesInGlobalRow (globalRow)
3149  << ". Please report this bug to the Tpetra developers.");
3150  }
3151  }
3152 
3153 
3154  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3155  void
3157  insertLocalIndices (const LocalOrdinal localRow,
3158  const Teuchos::ArrayView<const LocalOrdinal>& indices)
3159  {
3160  const char tfecfFuncName[] = "insertLocalIndices: ";
3161 
3162  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3163  (! isFillActive (), std::runtime_error, "Fill must be active.");
3164  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3165  (isGloballyIndexed (), std::runtime_error,
3166  "Graph indices are global; use insertGlobalIndices().");
3167  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3168  (! hasColMap (), std::runtime_error,
3169  "Cannot insert local indices without a column Map.");
3170  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3171  (! rowMap_->isNodeLocalElement (localRow), std::runtime_error,
3172  "Local row index " << localRow << " is not in the row Map "
3173  "on the calling process.");
3174  if (! indicesAreAllocated ()) {
3175  allocateIndices (LocalIndices);
3176  }
3177 
3178 #ifdef HAVE_TPETRA_DEBUG
3179  constexpr bool debug = true;
3180 #else
3181  constexpr bool debug = false;
3182 #endif // HAVE_TPETRA_DEBUG
3183 
3184  if (debug) {
3185  // In debug mode, if the graph has a column Map, test whether any
3186  // of the given column indices are not in the column Map. Keep
3187  // track of the invalid column indices so we can tell the user
3188  // about them.
3189  if (hasColMap ()) {
3190  using Teuchos::Array;
3191  using Teuchos::toString;
3192  using std::endl;
3193  typedef typename Teuchos::ArrayView<const LocalOrdinal>::size_type size_type;
3194 
3195  const map_type& colMap = *colMap_;
3196  Array<LocalOrdinal> badColInds;
3197  bool allInColMap = true;
3198  for (size_type k = 0; k < indices.size (); ++k) {
3199  if (! colMap.isNodeLocalElement (indices[k])) {
3200  allInColMap = false;
3201  badColInds.push_back (indices[k]);
3202  }
3203  }
3204  if (! allInColMap) {
3205  std::ostringstream os;
3206  os << "Tpetra::CrsGraph::insertLocalIndices: You attempted to insert "
3207  "entries in owned row " << localRow << ", at the following column "
3208  "indices: " << toString (indices) << "." << endl;
3209  os << "Of those, the following indices are not in the column Map on "
3210  "this process: " << toString (badColInds) << "." << endl << "Since "
3211  "the graph has a column Map already, it is invalid to insert entries "
3212  "at those locations.";
3213  TEUCHOS_TEST_FOR_EXCEPTION(! allInColMap, std::invalid_argument, os.str ());
3214  }
3215  }
3216  }
3217 
3218  insertLocalIndicesImpl (localRow, indices);
3219 
3220  if (debug) {
3221  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3222  (! indicesAreAllocated () || ! isLocallyIndexed (), std::logic_error,
3223  "At the end of insertLocalIndices, ! indicesAreAllocated() || "
3224  "! isLocallyIndexed() is true. Please report this bug to the "
3225  "Tpetra developers.");
3226  }
3227  }
3228 
3229  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3230  void
3232  insertLocalIndices (const LocalOrdinal localRow,
3233  const LocalOrdinal numEnt,
3234  const LocalOrdinal inds[])
3235  {
3236  Teuchos::ArrayView<const LocalOrdinal> indsT (inds, numEnt);
3237  this->insertLocalIndices (localRow, indsT);
3238  }
3239 
3240 
3241  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3242  void
3244  insertGlobalIndices (const GlobalOrdinal gblRow,
3245  const LocalOrdinal numInputInds,
3246  const GlobalOrdinal inputGblColInds[])
3247  {
3248  typedef LocalOrdinal LO;
3249  const char tfecfFuncName[] = "insertGlobalIndices: ";
3250 #ifdef HAVE_TPETRA_DEBUG
3251  constexpr bool debug = true;
3252 #else
3253  constexpr bool debug = false;
3254 #endif // HAVE_TPETRA_DEBUG
3255 
3256  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3257  (this->isLocallyIndexed (), std::runtime_error,
3258  "graph indices are local; use insertLocalIndices().");
3259  // This can't really be satisfied for now, because if we are
3260  // fillComplete(), then we are local. In the future, this may
3261  // change. However, the rule that modification require active
3262  // fill will not change.
3263  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3264  (! this->isFillActive (), std::runtime_error,
3265  "You are not allowed to call this method if fill is not active. "
3266  "If fillComplete has been called, you must first call resumeFill "
3267  "before you may insert indices.");
3268  if (! this->indicesAreAllocated ()) {
3269  this->allocateIndices (GlobalIndices);
3270  }
3271  const LO lclRow = this->rowMap_->getLocalElement (gblRow);
3272  if (lclRow != Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3273  if (debug) {
3274  if (this->hasColMap ()) {
3275  using std::endl;
3276  const map_type& colMap = * (this->colMap_);
3277  // In a debug build, keep track of the nonowned ("bad") column
3278  // indices, so that we can display them in the exception
3279  // message. In a release build, just ditch the loop early if
3280  // we encounter a nonowned column index.
3281  std::vector<GlobalOrdinal> badColInds;
3282  bool allInColMap = true;
3283  for (LO k = 0; k < numInputInds; ++k) {
3284  if (! colMap.isNodeGlobalElement (inputGblColInds[k])) {
3285  allInColMap = false;
3286  badColInds.push_back (inputGblColInds[k]);
3287  }
3288  }
3289  if (! allInColMap) {
3290  std::ostringstream os;
3291  os << "You attempted to insert entries in owned row " << gblRow
3292  << ", at the following column indices: [";
3293  for (LO k = 0; k < numInputInds; ++k) {
3294  os << inputGblColInds[k];
3295  if (k + static_cast<LO> (1) < numInputInds) {
3296  os << ",";
3297  }
3298  }
3299  os << "]." << endl << "Of those, the following indices are not in "
3300  "the column Map on this process: [";
3301  for (size_t k = 0; k < badColInds.size (); ++k) {
3302  os << badColInds[k];
3303  if (k + size_t (1) < badColInds.size ()) {
3304  os << ",";
3305  }
3306  }
3307  os << "]." << endl << "Since the matrix has a column Map already, "
3308  "it is invalid to insert entries at those locations.";
3309  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3310  (true, std::invalid_argument, os.str ());
3311  }
3312  }
3313  } // debug
3314  this->insertGlobalIndicesImpl (lclRow, inputGblColInds, numInputInds);
3315  }
3316  else { // a nonlocal row
3317  this->insertGlobalIndicesIntoNonownedRows (gblRow, inputGblColInds,
3318  numInputInds);
3319  }
3320  }
3321 
3322 
3323  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3324  void
3326  insertGlobalIndices (const GlobalOrdinal gblRow,
3327  const Teuchos::ArrayView<const GlobalOrdinal>& inputGblColInds)
3328  {
3329  this->insertGlobalIndices (gblRow, inputGblColInds.size (),
3330  inputGblColInds.getRawPtr ());
3331  }
3332 
3333 
3334  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3335  void
3337  insertGlobalIndicesFiltered (const LocalOrdinal lclRow,
3338  const GlobalOrdinal gblColInds[],
3339  const LocalOrdinal numGblColInds)
3340  {
3341  typedef LocalOrdinal LO;
3342  typedef GlobalOrdinal GO;
3343  const char tfecfFuncName[] = "insertGlobalIndicesFiltered: ";
3344 
3345  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3346  (this->isLocallyIndexed (), std::runtime_error,
3347  "Graph indices are local; use insertLocalIndices().");
3348  // This can't really be satisfied for now, because if we are
3349  // fillComplete(), then we are local. In the future, this may
3350  // change. However, the rule that modification require active
3351  // fill will not change.
3352  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3353  (! this->isFillActive (), std::runtime_error,
3354  "You are not allowed to call this method if fill is not active. "
3355  "If fillComplete has been called, you must first call resumeFill "
3356  "before you may insert indices.");
3357  if (! this->indicesAreAllocated ()) {
3358  this->allocateIndices (GlobalIndices);
3359  }
3360 
3361  Teuchos::ArrayView<const GO> gblColInds_av (gblColInds, numGblColInds);
3362  // If we have a column Map, use it to filter the entries.
3363  if (! this->colMap_.is_null ()) {
3364  const map_type& colMap = * (this->colMap_);
3365 
3366  LO curOffset = 0;
3367  while (curOffset < numGblColInds) {
3368  // Find a sequence of input indices that are in the column Map
3369  // on the calling process. Doing a sequence at a time,
3370  // instead of one at a time, amortizes some overhead.
3371  LO endOffset = curOffset;
3372  for ( ; endOffset < numGblColInds; ++endOffset) {
3373  const LO lclCol = colMap.getLocalElement (gblColInds[endOffset]);
3374  if (lclCol == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
3375  break; // first entry, in current sequence, not in the column Map
3376  }
3377  }
3378  // curOffset, endOffset: half-exclusive range of indices in
3379  // the column Map on the calling process. If endOffset ==
3380  // curOffset, the range is empty.
3381  const LO numIndInSeq = (endOffset - curOffset);
3382  if (numIndInSeq != 0) {
3383  this->insertGlobalIndicesImpl (lclRow, gblColInds + curOffset,
3384  numIndInSeq);
3385  }
3386  // Invariant before this line: Either endOffset ==
3387  // numGblColInds, or gblColInds[endOffset] is not in the
3388  // column Map on the calling process.
3389  curOffset = endOffset + 1;
3390  }
3391  }
3392  else {
3393  this->insertGlobalIndicesImpl (lclRow, gblColInds_av.getRawPtr (),
3394  gblColInds_av.size ());
3395  }
3396  }
3397 
3398  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3399  void
3401  insertGlobalIndicesIntoNonownedRows (const GlobalOrdinal gblRow,
3402  const GlobalOrdinal gblColInds[],
3403  const LocalOrdinal numGblColInds)
3404  {
3405  // This creates the std::vector if it doesn't exist yet.
3406  // std::map's operator[] does a lookup each time, so it's better
3407  // to pull nonlocals_[grow] out of the loop.
3408  std::vector<GlobalOrdinal>& nonlocalRow = this->nonlocals_[gblRow];
3409  for (LocalOrdinal k = 0; k < numGblColInds; ++k) {
3410  // FIXME (mfh 20 Jul 2017) Would be better to use a set, in
3411  // order to avoid duplicates. globalAssemble() sorts these
3412  // anyway.
3413  nonlocalRow.push_back (gblColInds[k]);
3414  }
3415  }
3416 
3417  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3418  void
3420  removeLocalIndices (LocalOrdinal lrow)
3421  {
3422  const char tfecfFuncName[] = "removeLocalIndices: ";
3423  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3424  ! isFillActive (), std::runtime_error, "requires that fill is active.");
3425  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3426  isStorageOptimized (), std::runtime_error,
3427  "cannot remove indices after optimizeStorage() has been called.");
3428  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3429  isGloballyIndexed (), std::runtime_error, "graph indices are global.");
3430  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3431  ! rowMap_->isNodeLocalElement (lrow), std::runtime_error,
3432  "Local row " << lrow << " is not in the row Map on the calling process.");
3433  if (! indicesAreAllocated ()) {
3434  allocateIndices (LocalIndices);
3435  }
3436 
3437  // FIXME (mfh 13 Aug 2014) What if they haven't been cleared on
3438  // all processes?
3439  clearGlobalConstants ();
3440 
3441  if (k_numRowEntries_.extent (0) != 0) {
3442  this->k_numRowEntries_(lrow) = 0;
3443  }
3444 #ifdef HAVE_TPETRA_DEBUG
3445  constexpr bool debug = true;
3446 #else
3447  constexpr bool debug = false;
3448 #endif // HAVE_TPETRA_DEBUG
3449 
3450  if (debug) {
3451  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3452  (getNumEntriesInLocalRow (lrow) != 0 ||
3453  ! indicesAreAllocated () ||
3454  ! isLocallyIndexed (), std::logic_error,
3455  "Violated stated post-conditions. Please contact Tpetra team.");
3456  }
3457  }
3458 
3459 
3460  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3461  void
3463  setAllIndices (const typename local_graph_type::row_map_type& rowPointers,
3464  const typename local_graph_type::entries_type::non_const_type& columnIndices)
3465  {
3466  const char tfecfFuncName[] = "setAllIndices: ";
3467  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3468  ! hasColMap () || getColMap ().is_null (), std::runtime_error,
3469  "The graph must have a column Map before you may call this method.");
3470  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
3471  static_cast<size_t> (rowPointers.size ()) != this->getNodeNumRows () + 1,
3472  std::runtime_error, "rowPointers.size() = " << rowPointers.size () <<
3473  " != this->getNodeNumRows()+1 = " << (this->getNodeNumRows () + 1) <<
3474  ".");
3475 
3476  // FIXME (mfh 07 Aug 2014) We need to relax this restriction,
3477  // since the future model will be allocation at construction, not
3478  // lazy allocation on first insert.
3479  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3480  ((this->k_lclInds1D_.extent (0) != 0 || this->k_gblInds1D_.extent (0) != 0),
3481  std::runtime_error, "You may not call this method if 1-D data "
3482  "structures are already allocated.");
3483 
3484  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3485  (this->lclInds2D_ != Teuchos::null ||
3486  this->gblInds2D_ != Teuchos::null,
3487  std::runtime_error, "You may not call this method if 2-D data "
3488  "structures are already allocated.");
3489 
3490  indicesAreAllocated_ = true;
3491  indicesAreLocal_ = true;
3492  pftype_ = StaticProfile; // if the profile wasn't static before, it sure is now.
3493  k_lclInds1D_ = columnIndices;
3494  k_rowPtrs_ = rowPointers;
3495  // Storage MUST be packed, since the interface doesn't give any
3496  // way to indicate any extra space at the end of each row.
3497  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
3498 
3499  // Build the local graph.
3500  lclGraph_ = local_graph_type (k_lclInds1D_, k_rowPtrs_);
3501 
3502  // These normally get cleared out at the end of allocateIndices.
3503  // It makes sense to clear them out here, because at the end of
3504  // this method, the graph is allocated on the calling process.
3505  numAllocForAllRows_ = 0;
3506  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
3507 
3508  checkInternalState ();
3509  }
3510 
3511 
3512  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3513  void
3515  setAllIndices (const Teuchos::ArrayRCP<size_t>& rowPointers,
3516  const Teuchos::ArrayRCP<LocalOrdinal>& columnIndices)
3517  {
3518  using Kokkos::View;
3519  typedef typename local_graph_type::row_map_type row_map_type;
3520  typedef typename row_map_type::array_layout layout_type;
3521  typedef typename row_map_type::non_const_value_type row_offset_type;
3522  typedef View<size_t*, layout_type , Kokkos::HostSpace,
3523  Kokkos::MemoryUnmanaged> input_view_type;
3524  typedef typename row_map_type::non_const_type nc_row_map_type;
3525 
3526  const size_t size = static_cast<size_t> (rowPointers.size ());
3527  constexpr bool same = std::is_same<size_t, row_offset_type>::value;
3528  input_view_type ptr_in (rowPointers.getRawPtr (), size);
3529 
3530  nc_row_map_type ptr_rot ("Tpetra::CrsGraph::ptr", size);
3531 
3532  if (same) { // size_t == row_offset_type
3533  // This compile-time logic ensures that the compiler never sees
3534  // an assignment of View<row_offset_type*, ...> to View<size_t*,
3535  // ...> unless size_t == row_offset_type.
3536  input_view_type ptr_decoy (rowPointers.getRawPtr (), size); // never used
3537  Kokkos::deep_copy (Kokkos::Impl::if_c<same,
3538  nc_row_map_type,
3539  input_view_type>::select (ptr_rot, ptr_decoy),
3540  ptr_in);
3541  }
3542  else { // size_t != row_offset_type
3543  // CudaUvmSpace != HostSpace, so this will be false in that case.
3544  constexpr bool inHostMemory =
3545  std::is_same<typename row_map_type::memory_space,
3546  Kokkos::HostSpace>::value;
3547  if (inHostMemory) {
3548  // Copy (with cast from size_t to row_offset_type, with bounds
3549  // checking if necessary) to ptr_rot.
3550  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_in);
3551  }
3552  else { // Copy input row offsets to device first.
3553  //
3554  // FIXME (mfh 24 Mar 2015) If CUDA UVM, running in the host's
3555  // execution space would avoid the double copy.
3556  //
3557  View<size_t*, layout_type ,execution_space > ptr_st ("Tpetra::CrsGraph::ptr", size);
3558  Kokkos::deep_copy (ptr_st, ptr_in);
3559  // Copy on device (casting from size_t to row_offset_type,
3560  // with bounds checking if necessary) to ptr_rot. This
3561  // executes in the output View's execution space, which is the
3562  // same as execution_space.
3563  ::Tpetra::Details::copyOffsets (ptr_rot, ptr_st);
3564  }
3565  }
3566 
3567  Kokkos::View<LocalOrdinal*, layout_type , execution_space > k_ind =
3568  Kokkos::Compat::getKokkosViewDeepCopy<device_type> (columnIndices ());
3569  setAllIndices (ptr_rot, k_ind);
3570  }
3571 
3572 
3573  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3574  void
3576  getNumEntriesPerLocalRowUpperBound (Teuchos::ArrayRCP<const size_t>& boundPerLocalRow,
3577  size_t& boundForAllLocalRows,
3578  bool& boundSameForAllLocalRows) const
3579  {
3580  const char tfecfFuncName[] = "getNumEntriesPerLocalRowUpperBound: ";
3581  const char suffix[] = " Please report this bug to the Tpetra developers.";
3582 
3583  // The three output arguments. We assign them to the actual
3584  // output arguments at the end, in order to implement
3585  // transactional semantics.
3586  Teuchos::ArrayRCP<const size_t> numEntriesPerRow;
3587  size_t numEntriesForAll = 0;
3588  bool allRowsSame = true;
3589 
3590  const ptrdiff_t numRows = static_cast<ptrdiff_t> (this->getNodeNumRows ());
3591 
3592  if (this->indicesAreAllocated ()) {
3593  if (this->isStorageOptimized ()) {
3594  // left with the case that we have optimized storage. in this
3595  // case, we have to construct a list of row sizes.
3596  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3597  (this->getProfileType () != StaticProfile, std::logic_error,
3598  "The graph is not StaticProfile, but storage appears to be optimized."
3599  << suffix);
3600  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3601  (numRows != 0 && k_rowPtrs_.extent (0) == 0, std::logic_error,
3602  "The graph has " << numRows << " (> 0) row" << (numRows != 1 ? "s" : "")
3603  << " on the calling process, but the k_rowPtrs_ array has zero entries."
3604  << suffix);
3605  Teuchos::ArrayRCP<size_t> numEnt;
3606  if (numRows != 0) {
3607  numEnt = Teuchos::arcp<size_t> (numRows);
3608  }
3609 
3610  // We have to iterate through the row offsets anyway, so we
3611  // might as well check whether all rows' bounds are the same.
3612  bool allRowsReallySame = false;
3613  for (ptrdiff_t i = 0; i < numRows; ++i) {
3614  numEnt[i] = this->k_rowPtrs_(i+1) - this->k_rowPtrs_(i);
3615  if (i != 0 && numEnt[i] != numEnt[i-1]) {
3616  allRowsReallySame = false;
3617  }
3618  }
3619  if (allRowsReallySame) {
3620  if (numRows == 0) {
3621  numEntriesForAll = 0;
3622  } else {
3623  numEntriesForAll = numEnt[1] - numEnt[0];
3624  }
3625  allRowsSame = true;
3626  }
3627  else {
3628  numEntriesPerRow = numEnt; // Teuchos::arcp_const_cast<const size_t> (numEnt);
3629  allRowsSame = false; // conservatively; we don't check the array
3630  }
3631  }
3632  else if (k_numRowEntries_.extent (0) != 0) {
3633  // This is a shallow copy; the ArrayRCP wraps the View in a
3634  // custom destructor, which ensures correct deallocation if
3635  // that is the only reference to the View. Furthermore, this
3636  // View is a host View, so this doesn't assume UVM.
3637  numEntriesPerRow = Kokkos::Compat::persistingView (k_numRowEntries_);
3638  allRowsSame = false; // conservatively; we don't check the array
3639  }
3640  else {
3641  numEntriesForAll = 0;
3642  allRowsSame = true;
3643  }
3644  }
3645  else { // indices not allocated
3646  if (k_numAllocPerRow_.extent (0) != 0) {
3647  // This is a shallow copy; the ArrayRCP wraps the View in a
3648  // custom destructor, which ensures correct deallocation if
3649  // that is the only reference to the View. Furthermore, this
3650  // View is a host View, so this doesn't assume UVM.
3651  numEntriesPerRow = Kokkos::Compat::persistingView (k_numAllocPerRow_);
3652  allRowsSame = false; // conservatively; we don't check the array
3653  }
3654  else {
3655  numEntriesForAll = numAllocForAllRows_;
3656  allRowsSame = true;
3657  }
3658  }
3659 
3660  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3661  (numEntriesForAll != 0 && numEntriesPerRow.size () != 0, std::logic_error,
3662  "numEntriesForAll and numEntriesPerRow are not consistent. The former "
3663  "is nonzero (" << numEntriesForAll << "), but the latter has nonzero "
3664  "size " << numEntriesPerRow.size () << "." << suffix);
3665  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3666  (numEntriesForAll != 0 && ! allRowsSame, std::logic_error,
3667  "numEntriesForAll and allRowsSame are not consistent. The former "
3668  "is nonzero (" << numEntriesForAll << "), but the latter is false."
3669  << suffix);
3670  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3671  (numEntriesPerRow.size () != 0 && allRowsSame, std::logic_error,
3672  "numEntriesPerRow and allRowsSame are not consistent. The former has "
3673  "nonzero length " << numEntriesForAll << ", but the latter is true."
3674  << suffix);
3675 
3676  boundPerLocalRow = numEntriesPerRow;
3677  boundForAllLocalRows = numEntriesForAll;
3678  boundSameForAllLocalRows = allRowsSame;
3679  }
3680 
3681 
3682  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3683  void
3685  globalAssemble ()
3686  {
3687  using Teuchos::Comm;
3688  using Teuchos::outArg;
3689  using Teuchos::RCP;
3690  using Teuchos::rcp;
3691  using Teuchos::REDUCE_MAX;
3692  using Teuchos::REDUCE_MIN;
3693  using Teuchos::reduceAll;
3694  typedef CrsGraph<LocalOrdinal, GlobalOrdinal, Node> crs_graph_type;
3695  typedef LocalOrdinal LO;
3696  typedef GlobalOrdinal GO;
3697  typedef typename Teuchos::Array<GO>::size_type size_type;
3698  const char tfecfFuncName[] = "globalAssemble: "; // for exception macro
3699 
3700  RCP<const Comm<int> > comm = getComm ();
3701 
3702  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3703  (! isFillActive (), std::runtime_error, "Fill must be active before "
3704  "you may call this method.");
3705 
3706  const size_t myNumNonlocalRows = this->nonlocals_.size ();
3707 
3708  // If no processes have nonlocal rows, then we don't have to do
3709  // anything. Checking this is probably cheaper than constructing
3710  // the Map of nonlocal rows (see below) and noticing that it has
3711  // zero global entries.
3712  {
3713  const int iHaveNonlocalRows = (myNumNonlocalRows == 0) ? 0 : 1;
3714  int someoneHasNonlocalRows = 0;
3715  reduceAll<int, int> (*comm, REDUCE_MAX, iHaveNonlocalRows,
3716  outArg (someoneHasNonlocalRows));
3717  if (someoneHasNonlocalRows == 0) {
3718  return; // no process has nonlocal rows, so nothing to do
3719  }
3720  }
3721 
3722  // 1. Create a list of the "nonlocal" rows on each process. this
3723  // requires iterating over nonlocals_, so while we do this,
3724  // deduplicate the entries and get a count for each nonlocal
3725  // row on this process.
3726  // 2. Construct a new row Map corresponding to those rows. This
3727  // Map is likely overlapping. We know that the Map is not
3728  // empty on all processes, because the above all-reduce and
3729  // return exclude that case.
3730 
3731  RCP<const map_type> nonlocalRowMap;
3732  // Keep this for CrsGraph's constructor, so we can use StaticProfile.
3733  Teuchos::Array<size_t> numEntPerNonlocalRow (myNumNonlocalRows);
3734  {
3735  Teuchos::Array<GO> myNonlocalGblRows (myNumNonlocalRows);
3736  size_type curPos = 0;
3737  for (auto mapIter = this->nonlocals_.begin ();
3738  mapIter != this->nonlocals_.end ();
3739  ++mapIter, ++curPos) {
3740  myNonlocalGblRows[curPos] = mapIter->first;
3741  std::vector<GO>& gblCols = mapIter->second; // by ref; change in place
3742  std::sort (gblCols.begin (), gblCols.end ());
3743  auto vecLast = std::unique (gblCols.begin (), gblCols.end ());
3744  gblCols.erase (vecLast, gblCols.end ());
3745  numEntPerNonlocalRow[curPos] = gblCols.size ();
3746  }
3747 
3748  // Currently, Map requires that its indexBase be the global min
3749  // of all its global indices. Map won't compute this for us, so
3750  // we must do it. If our process has no nonlocal rows, set the
3751  // "min" to the max possible GO value. This ensures that if
3752  // some process has at least one nonlocal row, then it will pick
3753  // that up as the min. We know that at least one process has a
3754  // nonlocal row, since the all-reduce and return at the top of
3755  // this method excluded that case.
3756  GO myMinNonlocalGblRow = std::numeric_limits<GO>::max ();
3757  {
3758  auto iter = std::min_element (myNonlocalGblRows.begin (),
3759  myNonlocalGblRows.end ());
3760  if (iter != myNonlocalGblRows.end ()) {
3761  myMinNonlocalGblRow = *iter;
3762  }
3763  }
3764  GO gblMinNonlocalGblRow = 0;
3765  reduceAll<int, GO> (*comm, REDUCE_MIN, myMinNonlocalGblRow,
3766  outArg (gblMinNonlocalGblRow));
3767  const GO indexBase = gblMinNonlocalGblRow;
3768  const global_size_t INV = Teuchos::OrdinalTraits<global_size_t>::invalid ();
3769  nonlocalRowMap = rcp (new map_type (INV, myNonlocalGblRows (), indexBase, comm));
3770  }
3771 
3772  // 3. Use the column indices for each nonlocal row, as stored in
3773  // nonlocals_, to construct a CrsGraph corresponding to
3774  // nonlocal rows. We may use StaticProfile, since we have
3775  // exact counts of the number of entries in each nonlocal row.
3776 
3777  RCP<crs_graph_type> nonlocalGraph =
3778  rcp (new crs_graph_type (nonlocalRowMap, numEntPerNonlocalRow (),
3779  StaticProfile));
3780  {
3781  size_type curPos = 0;
3782  for (auto mapIter = this->nonlocals_.begin ();
3783  mapIter != this->nonlocals_.end ();
3784  ++mapIter, ++curPos) {
3785  const GO gblRow = mapIter->first;
3786  std::vector<GO>& gblCols = mapIter->second; // by ref just to avoid copy
3787  const LO numEnt = static_cast<LO> (numEntPerNonlocalRow[curPos]);
3788  nonlocalGraph->insertGlobalIndices (gblRow, numEnt, gblCols.data ());
3789  }
3790  }
3791  // There's no need to fill-complete the nonlocals graph.
3792  // We just use it as a temporary container for the Export.
3793 
3794  // 4. If the original row Map is one to one, then we can Export
3795  // directly from nonlocalGraph into this. Otherwise, we have
3796  // to create a temporary graph with a one-to-one row Map,
3797  // Export into that, then Import from the temporary graph into
3798  // *this.
3799 
3800  auto origRowMap = this->getRowMap ();
3801  const bool origRowMapIsOneToOne = origRowMap->isOneToOne ();
3802 
3803  if (origRowMapIsOneToOne) {
3804  export_type exportToOrig (nonlocalRowMap, origRowMap);
3805  this->doExport (*nonlocalGraph, exportToOrig, Tpetra::INSERT);
3806  // We're done at this point!
3807  }
3808  else {
3809  // If you ask a Map whether it is one to one, it does some
3810  // communication and stashes intermediate results for later use
3811  // by createOneToOne. Thus, calling createOneToOne doesn't cost
3812  // much more then the original cost of calling isOneToOne.
3813  auto oneToOneRowMap = Tpetra::createOneToOne (origRowMap);
3814  export_type exportToOneToOne (nonlocalRowMap, oneToOneRowMap);
3815 
3816  // Create a temporary graph with the one-to-one row Map.
3817  //
3818  // TODO (mfh 09 Sep 2016) Estimate the number of entries in each
3819  // row, to avoid reallocation during the Export operation.
3820  crs_graph_type oneToOneGraph (oneToOneRowMap, 0);
3821  // Export from graph of nonlocals into the temp one-to-one graph.
3822  oneToOneGraph.doExport (*nonlocalGraph, exportToOneToOne, Tpetra::INSERT);
3823 
3824  // We don't need the graph of nonlocals anymore, so get rid of
3825  // it, to keep the memory high-water mark down.
3826  nonlocalGraph = Teuchos::null;
3827 
3828  // Import from the one-to-one graph to the original graph.
3829  import_type importToOrig (oneToOneRowMap, origRowMap);
3830  this->doImport (oneToOneGraph, importToOrig, Tpetra::INSERT);
3831  }
3832 
3833  // It's safe now to clear out nonlocals_, since we've already
3834  // committed side effects to *this. The standard idiom for
3835  // clearing a Container like std::map, is to swap it with an empty
3836  // Container and let the swapped Container fall out of scope.
3837  decltype (this->nonlocals_) newNonlocals;
3838  std::swap (this->nonlocals_, newNonlocals);
3839 
3840  checkInternalState ();
3841  }
3842 
3843 
3844  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3845  void
3847  resumeFill (const Teuchos::RCP<Teuchos::ParameterList>& params)
3848  {
3849  clearGlobalConstants();
3850  if (params != Teuchos::null) this->setParameterList (params);
3851  lowerTriangular_ = false;
3852  upperTriangular_ = false;
3853  // either still sorted/merged or initially sorted/merged
3854  indicesAreSorted_ = true;
3855  noRedundancies_ = true;
3856  fillComplete_ = false;
3857  }
3858 
3859 
3860  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3861  void
3863  fillComplete (const Teuchos::RCP<Teuchos::ParameterList>& params)
3864  {
3865  // If the graph already has domain and range Maps, don't clobber
3866  // them. If it doesn't, use the current row Map for both the
3867  // domain and range Maps.
3868  //
3869  // NOTE (mfh 28 Sep 2014): If the graph was constructed without a
3870  // column Map, and column indices are inserted which are not in
3871  // the row Map on any process, this will cause troubles. However,
3872  // that is not a common case for most applications that we
3873  // encounter, and checking for it might require more
3874  // communication.
3875  Teuchos::RCP<const map_type> domMap = this->getDomainMap ();
3876  if (domMap.is_null ()) {
3877  domMap = this->getRowMap ();
3878  }
3879  Teuchos::RCP<const map_type> ranMap = this->getRangeMap ();
3880  if (ranMap.is_null ()) {
3881  ranMap = this->getRowMap ();
3882  }
3883  this->fillComplete (domMap, ranMap, params);
3884  }
3885 
3886 
3887  template <class LocalOrdinal, class GlobalOrdinal, class Node>
3888  void
3890  fillComplete (const Teuchos::RCP<const map_type>& domainMap,
3891  const Teuchos::RCP<const map_type>& rangeMap,
3892  const Teuchos::RCP<Teuchos::ParameterList>& params)
3893  {
3894  const char tfecfFuncName[] = "fillComplete: ";
3895  const bool debug = ::Tpetra::Details::Behavior::debug ();
3896 
3897  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3898  (! isFillActive () || isFillComplete (), std::runtime_error,
3899  "Graph fill state must be active (isFillActive() "
3900  "must be true) before calling fillComplete().");
3901 
3902  const int numProcs = getComm ()->getSize ();
3903 
3904  //
3905  // Read and set parameters
3906  //
3907 
3908  // Does the caller want to sort remote GIDs (within those owned by
3909  // the same process) in makeColMap()?
3910  if (! params.is_null ()) {
3911  if (params->isParameter ("sort column map ghost gids")) {
3912  sortGhostsAssociatedWithEachProcessor_ =
3913  params->get<bool> ("sort column map ghost gids",
3914  sortGhostsAssociatedWithEachProcessor_);
3915  }
3916  else if (params->isParameter ("Sort column Map ghost GIDs")) {
3917  sortGhostsAssociatedWithEachProcessor_ =
3918  params->get<bool> ("Sort column Map ghost GIDs",
3919  sortGhostsAssociatedWithEachProcessor_);
3920  }
3921  }
3922 
3923  // If true, the caller promises that no process did nonlocal
3924  // changes since the last call to fillComplete.
3925  bool assertNoNonlocalInserts = false;
3926  if (! params.is_null ()) {
3927  assertNoNonlocalInserts =
3928  params->get<bool> ("No Nonlocal Changes", assertNoNonlocalInserts);
3929  }
3930 
3931  //
3932  // Allocate indices, if they haven't already been allocated
3933  //
3934  if (! indicesAreAllocated ()) {
3935  if (hasColMap ()) {
3936  // We have a column Map, so use local indices.
3937  allocateIndices (LocalIndices);
3938  } else {
3939  // We don't have a column Map, so use global indices.
3940  allocateIndices (GlobalIndices);
3941  }
3942  }
3943 
3944  //
3945  // Do global assembly, if requested and if the communicator
3946  // contains more than one process.
3947  //
3948  const bool mayNeedGlobalAssemble = ! assertNoNonlocalInserts && numProcs > 1;
3949  if (mayNeedGlobalAssemble) {
3950  // This first checks if we need to do global assembly.
3951  // The check costs a single all-reduce.
3952  globalAssemble ();
3953  }
3954  else {
3955  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3956  (numProcs > 1 && this->nonlocals_.size() > 0, std::runtime_error,
3957  "The graph's communicator contains only one process, "
3958  "but there are nonlocal entries. "
3959  "This probably means that invalid entries were added to the graph.");
3960  }
3961 
3962  // Set domain and range Map. This may clear the Import / Export
3963  // objects if the new Maps differ from any old ones.
3964  setDomainRangeMaps (domainMap, rangeMap);
3965 
3966  // If the graph does not already have a column Map (either from
3967  // the user constructor calling the version of the constructor
3968  // that takes a column Map, or from a previous fillComplete call),
3969  // then create it.
3970  Teuchos::Array<int> remotePIDs (0);
3971  const bool mustBuildColMap = ! this->hasColMap ();
3972  if (mustBuildColMap) {
3973  this->makeColMap (remotePIDs); // resized on output
3974  }
3975 
3976  // Make indices local, if they aren't already.
3977  // The method doesn't do any work if the indices are already local.
3978  const std::pair<size_t, std::string> makeIndicesLocalResult =
3979  this->makeIndicesLocal ();
3980  if (debug) { // In debug mode, print error output on all processes
3981  using ::Tpetra::Details::gathervPrint;
3982  using Teuchos::RCP;
3983  using Teuchos::REDUCE_MIN;
3984  using Teuchos::reduceAll;
3985  using Teuchos::outArg;
3986 
3987  RCP<const map_type> map = this->getMap ();
3988  RCP<const Teuchos::Comm<int> > comm;
3989  if (! map.is_null ()) {
3990  comm = map->getComm ();
3991  }
3992  if (comm.is_null ()) {
3993  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
3994  (makeIndicesLocalResult.first != 0, std::runtime_error,
3995  makeIndicesLocalResult.second);
3996  }
3997  else {
3998  const int lclSuccess = (makeIndicesLocalResult.first == 0);
3999  int gblSuccess = 0; // output argument
4000  reduceAll (*comm, REDUCE_MIN, lclSuccess, outArg (gblSuccess));
4001  if (gblSuccess != 1) {
4002  std::ostringstream os;
4003  gathervPrint (os, makeIndicesLocalResult.second, *comm);
4004  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4005  (true, std::runtime_error, os.str ());
4006  }
4007  }
4008  }
4009  else {
4010  // TODO (mfh 20 Jul 2017) Instead of throwing here, pass along
4011  // the error state to makeImportExport or
4012  // computeGlobalConstants, which may do all-reduces and thus may
4013  // have the opportunity to communicate that error state.
4014  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4015  (makeIndicesLocalResult.first != 0, std::runtime_error,
4016  makeIndicesLocalResult.second);
4017  }
4018 
4019  // If this process has no indices, then CrsGraph considers it
4020  // already trivially sorted and merged. Thus, this method need
4021  // not be called on all processes in the row Map's communicator.
4022  this->sortAndMergeAllIndices (this->isSorted (), this->isMerged ());
4023 
4024  // Make Import and Export objects, if they haven't been made
4025  // already. If we made a column Map above, reuse information from
4026  // that process to avoid communiation in the Import setup.
4027  this->makeImportExport (remotePIDs, mustBuildColMap);
4028 
4029  // Create the Kokkos::StaticCrsGraph, if it doesn't already exist.
4030  this->fillLocalGraph (params);
4031 
4032  const bool callComputeGlobalConstants = params.get () == nullptr ||
4033  params->get ("compute global constants", true);
4034  const bool computeLocalTriangularConstants = params.get () == nullptr ||
4035  params->get ("compute local triangular constants", true);
4036  if (callComputeGlobalConstants) {
4037  this->computeGlobalConstants (computeLocalTriangularConstants);
4038  }
4039  else {
4040  this->computeLocalConstants (computeLocalTriangularConstants);
4041  }
4042  this->fillComplete_ = true;
4043  this->checkInternalState ();
4044  }
4045 
4046 
4047  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4048  void
4050  expertStaticFillComplete (const Teuchos::RCP<const map_type>& domainMap,
4051  const Teuchos::RCP<const map_type>& rangeMap,
4052  const Teuchos::RCP<const import_type>& importer,
4053  const Teuchos::RCP<const export_type>& exporter,
4054  const Teuchos::RCP<Teuchos::ParameterList>& params)
4055  {
4056  const char tfecfFuncName[] = "expertStaticFillComplete: ";
4057 #ifdef HAVE_TPETRA_MMM_TIMINGS
4058  std::string label;
4059  if(!params.is_null())
4060  label = params->get("Timer Label",label);
4061  std::string prefix = std::string("Tpetra ")+ label + std::string(": ");
4062  using Teuchos::TimeMonitor;
4063  Teuchos::RCP<Teuchos::TimeMonitor> MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Setup"))));
4064 #endif
4065 
4066 
4067  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4068  domainMap.is_null () || rangeMap.is_null (),
4069  std::runtime_error, "The input domain Map and range Map must be nonnull.");
4070  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4071  pftype_ != StaticProfile, std::runtime_error, "You may not call this "
4072  "method unless the graph is StaticProfile.");
4073  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4074  isFillComplete () || ! hasColMap (), std::runtime_error, "You may not "
4075  "call this method unless the graph has a column Map.");
4076  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4077  getNodeNumRows () > 0 && k_rowPtrs_.extent (0) == 0,
4078  std::runtime_error, "The calling process has getNodeNumRows() = "
4079  << getNodeNumRows () << " > 0 rows, but the row offsets array has not "
4080  "been set.");
4081  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4082  static_cast<size_t> (k_rowPtrs_.extent (0)) != getNodeNumRows () + 1,
4083  std::runtime_error, "The row offsets array has length " <<
4084  k_rowPtrs_.extent (0) << " != getNodeNumRows()+1 = " <<
4085  (getNodeNumRows () + 1) << ".");
4086 
4087  // Note: We don't need to do the following things which are normally done in fillComplete:
4088  // allocateIndices, globalAssemble, makeColMap, makeIndicesLocal, sortAndMergeAllIndices
4089 
4090  // Constants from allocateIndices
4091  //
4092  // mfh 08 Aug 2014: numAllocForAllRows_ and k_numAllocPerRow_ go
4093  // away once the graph is allocated. expertStaticFillComplete
4094  // either presumes that the graph is allocated, or "allocates" it.
4095  //
4096  // FIXME (mfh 08 Aug 2014) The goal for the Kokkos refactor
4097  // version of CrsGraph is to allocate in the constructor, not
4098  // lazily on first insert. That will make both
4099  // numAllocForAllRows_ and k_numAllocPerRow_ obsolete.
4100  numAllocForAllRows_ = 0;
4101  k_numAllocPerRow_ = decltype (k_numAllocPerRow_) ();
4102  indicesAreAllocated_ = true;
4103 
4104  // Constants from makeIndicesLocal
4105  //
4106  // The graph has a column Map, so its indices had better be local.
4107  indicesAreLocal_ = true;
4108  indicesAreGlobal_ = false;
4109 
4110  // set domain/range map: may clear the import/export objects
4111 #ifdef HAVE_TPETRA_MMM_TIMINGS
4112  MM = Teuchos::null;
4113  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-Maps"))));
4114 #endif
4115  setDomainRangeMaps (domainMap, rangeMap);
4116 
4117  // Presume the user sorted and merged the arrays first
4118  indicesAreSorted_ = true;
4119  noRedundancies_ = true;
4120 
4121  // makeImportExport won't create a new importer/exporter if I set one here first.
4122 #ifdef HAVE_TPETRA_MMM_TIMINGS
4123  MM = Teuchos::null;
4124  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckI"))));
4125 #endif
4126 
4127  importer_ = Teuchos::null;
4128  exporter_ = Teuchos::null;
4129  if (importer != Teuchos::null) {
4130  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4131  ! importer->getSourceMap ()->isSameAs (*getDomainMap ()) ||
4132  ! importer->getTargetMap ()->isSameAs (*getColMap ()),
4133  std::invalid_argument,": importer does not match matrix maps.");
4134  importer_ = importer;
4135 
4136  }
4137 
4138 #ifdef HAVE_TPETRA_MMM_TIMINGS
4139  MM = Teuchos::null;
4140  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXcheckE"))));
4141 #endif
4142 
4143  if (exporter != Teuchos::null) {
4144  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4145  ! exporter->getSourceMap ()->isSameAs (*getRowMap ()) ||
4146  ! exporter->getTargetMap ()->isSameAs (*getRangeMap ()),
4147  std::invalid_argument,": exporter does not match matrix maps.");
4148  exporter_ = exporter;
4149  }
4150 
4151 #ifdef HAVE_TPETRA_MMM_TIMINGS
4152  MM = Teuchos::null;
4153  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-mIXmake"))));
4154 #endif
4155  Teuchos::Array<int> remotePIDs (0); // unused output argument
4156  this->makeImportExport (remotePIDs, false);
4157 
4158  // Since we have a StaticProfile, fillLocalGraph will do the right thing...
4159 #ifdef HAVE_TPETRA_MMM_TIMINGS
4160  MM = Teuchos::null;
4161  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-fLG"))));
4162 #endif
4163  this->fillLocalGraph (params);
4164 
4165  const bool callComputeGlobalConstants = params.get () == nullptr ||
4166  params->get ("compute global constants", true);
4167  const bool computeLocalTriangularConstants = params.get () == nullptr ||
4168  params->get ("compute local triangular constants", true);
4169 
4170  if (callComputeGlobalConstants) {
4171 #ifdef HAVE_TPETRA_MMM_TIMINGS
4172  MM = Teuchos::null;
4173  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (const)"))));
4174 #endif // HAVE_TPETRA_MMM_TIMINGS
4175  this->computeGlobalConstants (computeLocalTriangularConstants);
4176  }
4177  else {
4178 #ifdef HAVE_TPETRA_MMM_TIMINGS
4179  MM = Teuchos::null;
4180  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cGC (noconst)"))));
4181 #endif // HAVE_TPETRA_MMM_TIMINGS
4182  this->computeLocalConstants (computeLocalTriangularConstants);
4183  }
4184 
4185  fillComplete_ = true;
4186 
4187 #ifdef HAVE_TPETRA_MMM_TIMINGS
4188  MM = Teuchos::null;
4189  MM = Teuchos::rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("ESFC-G-cIS"))));
4190 #endif
4191  checkInternalState ();
4192  }
4193 
4194 
4195  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4196  void
4198  fillLocalGraph (const Teuchos::RCP<Teuchos::ParameterList>& params)
4199  {
4201  typedef decltype (k_numRowEntries_) row_entries_type;
4202  typedef typename local_graph_type::row_map_type row_map_type;
4203  typedef typename row_map_type::non_const_type non_const_row_map_type;
4204  typedef typename local_graph_type::entries_type::non_const_type lclinds_1d_type;
4205  const char tfecfFuncName[] = "fillLocalGraph (called from fillComplete or "
4206  "expertStaticFillComplete): ";
4207  const bool debug = ::Tpetra::Details::Behavior::debug ();
4208  const size_t lclNumRows = this->getNodeNumRows ();
4209 
4210  // This method's goal is to fill in the two arrays (compressed
4211  // sparse row format) that define the sparse graph's structure.
4212  //
4213  // Use the nonconst version of row_map_type for ptr_d, because
4214  // the latter is const and we need to modify ptr_d here.
4215  non_const_row_map_type ptr_d;
4216  row_map_type ptr_d_const;
4217  lclinds_1d_type ind_d;
4218 
4219  bool requestOptimizedStorage = true;
4220  if (! params.is_null () && ! params->get ("Optimize Storage", true)) {
4221  requestOptimizedStorage = false;
4222  }
4223  if (this->getProfileType () != StaticProfile) {
4224  // Pack 2-D storage (DynamicProfile) into 1-D packed storage.
4225  //
4226  // DynamicProfile means that the graph's column indices are
4227  // currently stored in a 2-D "unpacked" format, in the
4228  // arrays-of-arrays lclInds2D_. We allocate 1-D storage
4229  // (ind_d) and then copy from 2-D storage (lclInds2D_) into 1-D
4230  // storage (ind_d).
4231  if (debug) {
4232  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4233  (static_cast<size_t> (this->k_numRowEntries_.extent (0)) !=
4234  lclNumRows, std::logic_error, "(DynamicProfile branch) "
4235  "k_numRowEntries_.extent(0) = " << k_numRowEntries_.extent (0)
4236  << " != getNodeNumRows() = " << lclNumRows << "");
4237  }
4238 
4239  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4240  // array of valid entry counts per row (k_numRowEntries_). The
4241  // pack method can handle its counts input being a host View.
4242  //
4243  // Total number of entries in the matrix on the calling
4244  // process. We will compute this in the loop below. It's
4245  // cheap to compute and useful as a sanity check.
4246  size_t lclTotalNumEntries = 0;
4247  {
4248  // Allocate the packed row offsets array.
4249  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows+1);
4250  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4251  // This function can handle that numRowEnt_h lives on host.
4252  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4253  ptr_d_const = ptr_d;
4254  }
4255 
4256  if (debug) {
4257  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4258  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4259  std::logic_error, "(DynamicProfile branch) After packing ptr_d, "
4260  "ptr_d.extent(0) = " << ptr_d.extent (0) << " != "
4261  "(lclNumRows+1) = " << (lclNumRows+1) << ".");
4262  {
4263  const auto valToCheck =
4264  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4266  (valToCheck != lclTotalNumEntries, std::logic_error,
4267  "(DynamicProfile branch) After packing ptr_d, ptr_d(lclNumRows = "
4268  << lclNumRows << ") = " << valToCheck << " != total number of "
4269  "entries on the calling process = " << lclTotalNumEntries << ".");
4270  }
4271  }
4272 
4273  // Allocate the array of packed column indices.
4274  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4275  // Pack the column indices. We have to do this sequentially on
4276  // host, since lclInds2D_ is an ArrayRCP<Array<LO>>, which
4277  // doesn't work in parallel kernels (its iterators aren't even
4278  // thread safe in debug mode).
4279  {
4280  auto ptr_h = Kokkos::create_mirror_view (ptr_d);
4281  Kokkos::deep_copy (ptr_h, ptr_d); // we need the entries on host
4282  auto ind_h = Kokkos::create_mirror_view (ind_d); // will fill on host
4283 
4284  // k_numRowEntries_ is a host View already, so we can use it here.
4285  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4286  for (size_t row = 0; row < lclNumRows; ++row) {
4287  const size_t numEnt = numRowEnt_h(row);
4288  std::copy (lclInds2D_[row].begin (),
4289  lclInds2D_[row].begin () + numEnt,
4290  ind_h.data () + ptr_h(row));
4291  }
4292  Kokkos::deep_copy (ind_d, ind_h);
4293  }
4294 
4295  if (debug) {
4296  // Sanity check of packed row offsets.
4297  if (ptr_d.extent (0) != 0) {
4298  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4299  const size_t valToCheck =
4300  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4301  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4302  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4303  std::logic_error, "(DynamicProfile branch) After packing column "
4304  "indices, ptr_d(" << (numOffsets-1) << ") = " << valToCheck
4305  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4306  }
4307  }
4308  }
4309  else if (getProfileType () == StaticProfile) {
4310  // StaticProfile means that the graph's column indices are
4311  // currently stored in a 1-D format, with row offsets in
4312  // k_rowPtrs_ and local column indices in k_lclInds1D_.
4313 
4314  if (debug) {
4315  // StaticProfile also means that the graph's array of row
4316  // offsets must already be allocated.
4317  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4318  (k_rowPtrs_.extent (0) == 0, std::logic_error,
4319  "(StaticProfile branch) k_rowPtrs_ has size zero, but shouldn't");
4320  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4321  (k_rowPtrs_.extent (0) != lclNumRows + 1, std::logic_error,
4322  "(StaticProfile branch) k_rowPtrs_.extent(0) = "
4323  << k_rowPtrs_.extent (0) << " != (lclNumRows + 1) = "
4324  << (lclNumRows + 1) << ".");
4325  {
4326  const size_t numOffsets = k_rowPtrs_.extent (0);
4327  const auto valToCheck =
4328  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4329  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4330  (numOffsets != 0 &&
4331  k_lclInds1D_.extent (0) != valToCheck,
4332  std::logic_error, "(StaticProfile branch) numOffsets = " <<
4333  numOffsets << " != 0 and k_lclInds1D_.extent(0) = " <<
4334  k_lclInds1D_.extent (0) << " != k_rowPtrs_(" << numOffsets <<
4335  ") = " << valToCheck << ".");
4336  }
4337  }
4338 
4339  size_t allocSize = 0;
4340  try {
4341  allocSize = this->getNodeAllocationSize ();
4342  }
4343  catch (std::logic_error& e) {
4344  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4345  (true, std::logic_error, "getNodeAllocationSize threw "
4346  "std::logic_error: " << e.what ());
4347  }
4348  catch (std::runtime_error& e) {
4349  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4350  (true, std::runtime_error, "getNodeAllocationSize threw "
4351  "std::runtime_error: " << e.what ());
4352  }
4353  catch (std::exception& e) {
4354  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4355  (true, std::runtime_error, "getNodeAllocationSize threw "
4356  "std::exception: " << e.what ());
4357  }
4358  catch (...) {
4359  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4360  (true, std::runtime_error, "getNodeAllocationSize threw "
4361  "an exception not a subclass of std::exception.");
4362  }
4363 
4364  if (this->getNodeNumEntries () != allocSize) {
4365  // The graph's current 1-D storage is "unpacked." This means
4366  // the row offsets may differ from what the final row offsets
4367  // should be. This could happen, for example, if the user
4368  // specified StaticProfile in the constructor and set an upper
4369  // bound on the number of entries in each row, but didn't fill
4370  // all those entries.
4371 
4372  if (debug) {
4373  if (k_rowPtrs_.extent (0) != 0) {
4374  const size_t numOffsets =
4375  static_cast<size_t> (k_rowPtrs_.extent (0));
4376  const auto valToCheck =
4377  ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, numOffsets - 1);
4378  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4379  (valToCheck != static_cast<size_t> (k_lclInds1D_.extent (0)),
4380  std::logic_error, "(StaticProfile unpacked branch) Before "
4381  "allocating or packing, k_rowPtrs_(" << (numOffsets-1) << ") = "
4382  << valToCheck << " != k_lclInds1D_.extent(0) = "
4383  << k_lclInds1D_.extent (0) << ".");
4384  }
4385  }
4386 
4387  // Pack the row offsets into ptr_d, by doing a sum-scan of the
4388  // array of valid entry counts per row (k_numRowEntries_).
4389 
4390  // Total number of entries in the matrix on the calling
4391  // process. We will compute this in the loop below. It's
4392  // cheap to compute and useful as a sanity check.
4393  size_t lclTotalNumEntries = 0;
4394  {
4395  // Allocate the packed row offsets array.
4396  ptr_d = non_const_row_map_type ("Tpetra::CrsGraph::ptr", lclNumRows + 1);
4397  ptr_d_const = ptr_d;
4398 
4399  // It's ok that k_numRowEntries_ is a host View; the
4400  // function can handle this.
4401  typename row_entries_type::const_type numRowEnt_h = k_numRowEntries_;
4402  if (debug) {
4403  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4404  (static_cast<size_t> (numRowEnt_h.extent (0)) != lclNumRows,
4405  std::logic_error, "(StaticProfile unpacked branch) "
4406  "numRowEnt_h.extent(0) = " << numRowEnt_h.extent (0)
4407  << " != getNodeNumRows() = " << lclNumRows << "");
4408  }
4409 
4410  lclTotalNumEntries = computeOffsetsFromCounts (ptr_d, numRowEnt_h);
4411 
4412  if (debug) {
4413  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4414  (static_cast<size_t> (ptr_d.extent (0)) != lclNumRows + 1,
4415  std::logic_error, "(StaticProfile unpacked branch) After "
4416  "allocating ptr_d, ptr_d.extent(0) = " << ptr_d.extent (0)
4417  << " != lclNumRows+1 = " << (lclNumRows+1) << ".");
4418  {
4419  const auto valToCheck =
4420  ::Tpetra::Details::getEntryOnHost (ptr_d, lclNumRows);
4421  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4422  (valToCheck != lclTotalNumEntries, std::logic_error,
4423  "Tpetra::CrsGraph::fillLocalGraph: In StaticProfile unpacked "
4424  "branch, after filling ptr_d, ptr_d(lclNumRows=" << lclNumRows
4425  << ") = " << valToCheck << " != total number of entries on "
4426  "the calling process = " << lclTotalNumEntries << ".");
4427  }
4428  }
4429  }
4430 
4431  // Allocate the array of packed column indices.
4432  ind_d = lclinds_1d_type ("Tpetra::CrsGraph::ind", lclTotalNumEntries);
4433 
4434  // k_rowPtrs_ and k_lclInds1D_ are currently unpacked. Pack
4435  // them, using the packed row offsets array ptr_d that we
4436  // created above.
4437  //
4438  // FIXME (mfh 08 Aug 2014) If "Optimize Storage" is false (in
4439  // CrsMatrix?), we need to keep around the unpacked row
4440  // offsets and column indices.
4441 
4442  // Pack the column indices from unpacked k_lclInds1D_ into
4443  // packed ind_d. We will replace k_lclInds1D_ below.
4444  typedef pack_functor<
4445  typename local_graph_type::entries_type::non_const_type,
4446  row_map_type> inds_packer_type;
4447  inds_packer_type f (ind_d, k_lclInds1D_, ptr_d, k_rowPtrs_);
4448  {
4449  typedef typename decltype (ind_d)::execution_space exec_space;
4450  typedef Kokkos::RangePolicy<exec_space, LocalOrdinal> range_type;
4451  Kokkos::parallel_for (range_type (0, lclNumRows), f);
4452  }
4453 
4454  if (debug) {
4455  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4456  (ptr_d.extent (0) == 0, std::logic_error, "(StaticProfile "
4457  "\"Optimize Storage\"=true branch) After packing, "
4458  "ptr_d.extent(0) = 0. This probably means k_rowPtrs_ was "
4459  "never allocated.");
4460  if (ptr_d.extent (0) != 0) {
4461  const size_t numOffsets = static_cast<size_t> (ptr_d.extent (0));
4462  const auto valToCheck =
4463  ::Tpetra::Details::getEntryOnHost (ptr_d, numOffsets - 1);
4464  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4465  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4466  std::logic_error, "(StaticProfile \"Optimize Storage\"=true "
4467  "branch) After packing, ptr_d(" << (numOffsets-1) << ") = "
4468  << valToCheck << " != ind_d.extent(0) = "
4469  << ind_d.extent (0) << ".");
4470  }
4471  }
4472  }
4473  else { // We don't have to pack, so just set the pointers.
4474  ptr_d_const = k_rowPtrs_;
4475  ind_d = k_lclInds1D_;
4476 
4477  if (debug) {
4478  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4479  (ptr_d_const.extent (0) == 0, std::logic_error, "(StaticProfile "
4480  "\"Optimize Storage\"=false branch) ptr_d_const.extent(0) = 0. "
4481  "This probably means that k_rowPtrs_ was never allocated.");
4482  if (ptr_d_const.extent (0) != 0) {
4483  const size_t numOffsets =
4484  static_cast<size_t> (ptr_d_const.extent (0));
4485  const size_t valToCheck =
4486  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4487  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4488  (valToCheck != static_cast<size_t> (ind_d.extent (0)),
4489  std::logic_error, "(StaticProfile \"Optimize Storage\"=false "
4490  "branch) ptr_d_const(" << (numOffsets-1) << ") = " << valToCheck
4491  << " != ind_d.extent(0) = " << ind_d.extent (0) << ".");
4492  }
4493  }
4494  }
4495  }
4496 
4497  if (debug) {
4498  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4499  (static_cast<size_t> (ptr_d_const.extent (0)) != lclNumRows + 1,
4500  std::logic_error, "After packing, ptr_d_const.extent(0) = " <<
4501  ptr_d_const.extent (0) << " != lclNumRows+1 = " << (lclNumRows+1)
4502  << ".");
4503  if (ptr_d_const.extent (0) != 0) {
4504  const size_t numOffsets = static_cast<size_t> (ptr_d_const.extent (0));
4505  const auto valToCheck =
4506  ::Tpetra::Details::getEntryOnHost (ptr_d_const, numOffsets - 1);
4507  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
4508  (static_cast<size_t> (valToCheck) != ind_d.extent (0),
4509  std::logic_error, "After packing, ptr_d_const(" << (numOffsets-1)
4510  << ") = " << valToCheck << " != ind_d.extent(0) = "
4511  << ind_d.extent (0) << ".");
4512  }
4513  }
4514 
4515  if (requestOptimizedStorage) {
4516  // With optimized storage, we don't need to store the 2-D column
4517  // indices array-of-arrays, or the array of row entry counts.
4518 
4519  // Free graph data structures that are only needed for 2-D or
4520  // unpacked 1-D storage.
4521  lclInds2D_ = Teuchos::null;
4522  k_numRowEntries_ = row_entries_type ();
4523 
4524  // Keep the new 1-D packed allocations.
4525  k_rowPtrs_ = ptr_d_const;
4526  k_lclInds1D_ = ind_d;
4527 
4528  // The graph is definitely StaticProfile now, whether or not it
4529  // was before.
4530  pftype_ = StaticProfile;
4531  storageStatus_ = ::Tpetra::Details::STORAGE_1D_PACKED;
4532  }
4533 
4534  // FIXME (mfh 28 Aug 2014) "Local Graph" sublist no longer used.
4535 
4536  // Build the local graph.
4537  lclGraph_ = local_graph_type (ind_d, ptr_d_const);
4538  }
4539 
4540  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4541  void
4543  replaceColMap (const Teuchos::RCP<const map_type>& newColMap)
4544  {
4545  // NOTE: This safety check matches the code, but not the documentation of Crsgraph
4546  //
4547  // FIXME (mfh 18 Aug 2014) This will break if the calling process
4548  // has no entries, because in that case, currently it is neither
4549  // locally nor globally indexed. This will change once we get rid
4550  // of lazy allocation (so that the constructor allocates indices
4551  // and therefore commits to local vs. global).
4552  const char tfecfFuncName[] = "replaceColMap: ";
4553  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4554  isLocallyIndexed () || isGloballyIndexed (), std::runtime_error,
4555  "Requires matching maps and non-static graph.");
4556  colMap_ = newColMap;
4557  }
4558 
4559  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4560  void
4562  reindexColumns (const Teuchos::RCP<const map_type>& newColMap,
4563  const Teuchos::RCP<const import_type>& newImport,
4564  const bool sortIndicesInEachRow)
4565  {
4566  using Teuchos::REDUCE_MIN;
4567  using Teuchos::reduceAll;
4568  using Teuchos::RCP;
4569  typedef GlobalOrdinal GO;
4570  typedef LocalOrdinal LO;
4571  typedef typename local_graph_type::entries_type::non_const_type col_inds_type;
4572  const char tfecfFuncName[] = "reindexColumns: ";
4573 
4574  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4575  isFillComplete (), std::runtime_error, "The graph is fill complete "
4576  "(isFillComplete() returns true). You must call resumeFill() before "
4577  "you may call this method.");
4578 
4579  // mfh 19 Aug 2014: This method does NOT redistribute data; it
4580  // doesn't claim to do the work of an Import or Export. This
4581  // means that for all processes, the calling process MUST own all
4582  // column indices, in both the old column Map (if it exists) and
4583  // the new column Map. We check this via an all-reduce.
4584  //
4585  // Some processes may be globally indexed, others may be locally
4586  // indexed, and others (that have no graph entries) may be
4587  // neither. This method will NOT change the graph's current
4588  // state. If it's locally indexed, it will stay that way, and
4589  // vice versa. It would easy to add an option to convert indices
4590  // from global to local, so as to save a global-to-local
4591  // conversion pass. However, we don't do this here. The intended
4592  // typical use case is that the graph already has a column Map and
4593  // is locally indexed, and this is the case for which we optimize.
4594 
4595  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
4596 
4597  // Attempt to convert indices to the new column Map's version of
4598  // local. This will fail if on the calling process, the graph has
4599  // indices that are not on that process in the new column Map.
4600  // After the local conversion attempt, we will do an all-reduce to
4601  // see if any processes failed.
4602 
4603  // If this is false, then either the graph contains a column index
4604  // which is invalid in the CURRENT column Map, or the graph is
4605  // locally indexed but currently has no column Map. In either
4606  // case, there is no way to convert the current local indices into
4607  // global indices, so that we can convert them into the new column
4608  // Map's local indices. It's possible for this to be true on some
4609  // processes but not others, due to replaceColMap.
4610  bool allCurColIndsValid = true;
4611  // On the calling process, are all valid current column indices
4612  // also in the new column Map on the calling process? In other
4613  // words, does local reindexing suffice, or should the user have
4614  // done an Import or Export instead?
4615  bool localSuffices = true;
4616 
4617  // Final arrays for the local indices. We will allocate exactly
4618  // one of these ONLY if the graph is locally indexed on the
4619  // calling process, and ONLY if the graph has one or more entries
4620  // (is not empty) on the calling process. In that case, we
4621  // allocate the first (1-D storage) if the graph has a static
4622  // profile, else we allocate the second (2-D storage).
4623  typename local_graph_type::entries_type::non_const_type newLclInds1D;
4624  Teuchos::ArrayRCP<Teuchos::Array<LO> > newLclInds2D;
4625 
4626  // If indices aren't allocated, that means the calling process
4627  // owns no entries in the graph. Thus, there is nothing to
4628  // convert, and it trivially succeeds locally.
4629  if (indicesAreAllocated ()) {
4630  if (isLocallyIndexed ()) {
4631  if (hasColMap ()) { // locally indexed, and currently has a column Map
4632  const map_type& oldColMap = * (getColMap ());
4633  if (pftype_ == StaticProfile) {
4634  // Allocate storage for the new local indices.
4635  const size_t allocSize = this->getNodeAllocationSize ();
4636  newLclInds1D = col_inds_type ("Tpetra::CrsGraph::ind", allocSize);
4637  // Attempt to convert the new indices locally.
4638  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4639  const RowInfo rowInfo = this->getRowInfo (lclRow);
4640  const size_t beg = rowInfo.offset1D;
4641  const size_t end = beg + rowInfo.numEntries;
4642  for (size_t k = beg; k < end; ++k) {
4643  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4644  // use a DualView instead.
4645  const LO oldLclCol = k_lclInds1D_(k);
4646  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4647  allCurColIndsValid = false;
4648  break; // Stop at the first invalid index
4649  }
4650  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4651 
4652  // The above conversion MUST succeed. Otherwise, the
4653  // current local index is invalid, which means that
4654  // the graph was constructed incorrectly.
4655  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4656  allCurColIndsValid = false;
4657  break; // Stop at the first invalid index
4658  }
4659  else {
4660  const LO newLclCol = newColMap->getLocalElement (gblCol);
4661  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4662  localSuffices = false;
4663  break; // Stop at the first invalid index
4664  }
4665  // FIXME (mfh 21 Aug 2014) This assumes UVM. Should
4666  // use a DualView instead.
4667  newLclInds1D(k) = newLclCol;
4668  }
4669  } // for each entry in the current row
4670  } // for each locally owned row
4671  }
4672  else { // pftype_ == DynamicProfile
4673  // Allocate storage for the new local indices. We only
4674  // allocate the outer array here; we will allocate the
4675  // inner arrays below.
4676  newLclInds2D = Teuchos::arcp<Teuchos::Array<LO> > (lclNumRows);
4677 
4678  // Attempt to convert the new indices locally.
4679  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4680  const RowInfo rowInfo = this->getRowInfo (lclRow);
4681  newLclInds2D.resize (rowInfo.allocSize);
4682 
4683  Teuchos::ArrayView<const LO> oldLclRowView = getLocalView (rowInfo);
4684  Teuchos::ArrayView<LO> newLclRowView = (newLclInds2D[lclRow]) ();
4685 
4686  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4687  const LO oldLclCol = oldLclRowView[k];
4688  if (oldLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4689  allCurColIndsValid = false;
4690  break; // Stop at the first invalid index
4691  }
4692  const GO gblCol = oldColMap.getGlobalElement (oldLclCol);
4693 
4694  // The above conversion MUST succeed. Otherwise, the
4695  // local index is invalid and the graph is wrong.
4696  if (gblCol == Teuchos::OrdinalTraits<GO>::invalid ()) {
4697  allCurColIndsValid = false;
4698  break; // Stop at the first invalid index
4699  }
4700  else {
4701  const LO newLclCol = newColMap->getLocalElement (gblCol);
4702  if (newLclCol == Teuchos::OrdinalTraits<LO>::invalid ()) {
4703  localSuffices = false;
4704  break; // Stop at the first invalid index.
4705  }
4706  newLclRowView[k] = newLclCol;
4707  }
4708  } // for each entry in the current row
4709  } // for each locally owned row
4710  } // pftype_
4711  }
4712  else { // locally indexed, but no column Map
4713  // This case is only possible if replaceColMap() was called
4714  // with a null argument on the calling process. It's
4715  // possible, but it means that this method can't possibly
4716  // succeed, since we have no way of knowing how to convert
4717  // the current local indices to global indices.
4718  allCurColIndsValid = false;
4719  }
4720  }
4721  else { // globally indexed
4722  // If the graph is globally indexed, we don't need to save
4723  // local indices, but we _do_ need to know whether the current
4724  // global indices are valid in the new column Map. We may
4725  // need to do a getRemoteIndexList call to find this out.
4726  //
4727  // In this case, it doesn't matter whether the graph currently
4728  // has a column Map. We don't need the old column Map to
4729  // convert from global indices to the _new_ column Map's local
4730  // indices. Furthermore, we can use the same code, whether
4731  // the graph is static or dynamic profile.
4732 
4733  // Test whether the current global indices are in the new
4734  // column Map on the calling process.
4735  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
4736  const RowInfo rowInfo = this->getRowInfo (lclRow);
4737  Teuchos::ArrayView<const GO> oldGblRowView = getGlobalView (rowInfo);
4738  for (size_t k = 0; k < rowInfo.numEntries; ++k) {
4739  const GO gblCol = oldGblRowView[k];
4740  if (! newColMap->isNodeGlobalElement (gblCol)) {
4741  localSuffices = false;
4742  break; // Stop at the first invalid index
4743  }
4744  } // for each entry in the current row
4745  } // for each locally owned row
4746  } // locally or globally indexed
4747  } // whether indices are allocated
4748 
4749  // Do an all-reduce to check both possible error conditions.
4750  int lclSuccess[2];
4751  lclSuccess[0] = allCurColIndsValid ? 1 : 0;
4752  lclSuccess[1] = localSuffices ? 1 : 0;
4753  int gblSuccess[2];
4754  gblSuccess[0] = 0;
4755  gblSuccess[1] = 0;
4756  RCP<const Teuchos::Comm<int> > comm =
4757  getRowMap ().is_null () ? Teuchos::null : getRowMap ()->getComm ();
4758  if (! comm.is_null ()) {
4759  reduceAll<int, int> (*comm, REDUCE_MIN, 2, lclSuccess, gblSuccess);
4760  }
4761 
4762  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4763  gblSuccess[0] == 0, std::runtime_error, "It is not possible to continue."
4764  " The most likely reason is that the graph is locally indexed, but the "
4765  "column Map is missing (null) on some processes, due to a previous call "
4766  "to replaceColMap().");
4767 
4768  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(
4769  gblSuccess[1] == 0, std::runtime_error, "On some process, the graph "
4770  "contains column indices that are in the old column Map, but not in the "
4771  "new column Map (on that process). This method does NOT redistribute "
4772  "data; it does not claim to do the work of an Import or Export operation."
4773  " This means that for all processess, the calling process MUST own all "
4774  "column indices, in both the old column Map and the new column Map. In "
4775  "this case, you will need to do an Import or Export operation to "
4776  "redistribute data.");
4777 
4778  // Commit the results.
4779  if (isLocallyIndexed ()) {
4780  if (pftype_ == StaticProfile) {
4781  k_lclInds1D_ = newLclInds1D;
4782  } else { // dynamic profile
4783  lclInds2D_ = newLclInds2D;
4784  }
4785  // We've reindexed, so we don't know if the indices are sorted.
4786  //
4787  // FIXME (mfh 17 Sep 2014) It could make sense to check this,
4788  // since we're already going through all the indices above. We
4789  // could also sort each row in place; that way, we would only
4790  // have to make one pass over the rows.
4791  indicesAreSorted_ = false;
4792  if (sortIndicesInEachRow) {
4793  // NOTE (mfh 17 Sep 2014) The graph must be locally indexed in
4794  // order to call this method.
4795  //
4796  // FIXME (mfh 17 Sep 2014) This violates the strong exception
4797  // guarantee. It would be better to sort the new index arrays
4798  // before committing them.
4799  const bool sorted = false; // need to resort
4800  const bool merged = true; // no need to merge, since no dups
4801  this->sortAndMergeAllIndices (sorted, merged);
4802  }
4803  }
4804  colMap_ = newColMap;
4805 
4806  if (newImport.is_null ()) {
4807  // FIXME (mfh 19 Aug 2014) Should use the above all-reduce to
4808  // check whether the input Import is null on any process.
4809  //
4810  // If the domain Map hasn't been set yet, we can't compute a new
4811  // Import object. Leave it what it is; it should be null, but
4812  // it doesn't matter. If the domain Map _has_ been set, then
4813  // compute a new Import object if necessary.
4814  if (! domainMap_.is_null ()) {
4815  if (! domainMap_->isSameAs (* newColMap)) {
4816  importer_ = Teuchos::rcp (new import_type (domainMap_, newColMap));
4817  } else {
4818  importer_ = Teuchos::null; // don't need an Import
4819  }
4820  }
4821  } else {
4822  // The caller gave us an Import object. Assume that it's valid.
4823  importer_ = newImport;
4824  }
4825  }
4826 
4827 
4828  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4829  void
4831  replaceDomainMapAndImporter (const Teuchos::RCP<const map_type>& newDomainMap,
4832  const Teuchos::RCP<const import_type>& newImporter)
4833  {
4834  const char prefix[] = "Tpetra::CrsGraph::replaceDomainMapAndImporter: ";
4835  TEUCHOS_TEST_FOR_EXCEPTION(
4836  colMap_.is_null (), std::invalid_argument, prefix << "You may not call "
4837  "this method unless the graph already has a column Map.");
4838  TEUCHOS_TEST_FOR_EXCEPTION(
4839  newDomainMap.is_null (), std::invalid_argument,
4840  prefix << "The new domain Map must be nonnull.");
4841 
4842  const bool debug = ::Tpetra::Details::Behavior::debug ();
4843  if (debug) {
4844  if (newImporter.is_null ()) {
4845  // It's not a good idea to put expensive operations in a macro
4846  // clause, even if they are side effect - free, because macros
4847  // don't promise that they won't evaluate their arguments more
4848  // than once. It's polite for them to do so, but not required.
4849  const bool colSameAsDom = colMap_->isSameAs (*newDomainMap);
4850  TEUCHOS_TEST_FOR_EXCEPTION
4851  (colSameAsDom, std::invalid_argument, "If the new Import is null, "
4852  "then the new domain Map must be the same as the current column Map.");
4853  }
4854  else {
4855  const bool colSameAsTgt =
4856  colMap_->isSameAs (* (newImporter->getTargetMap ()));
4857  const bool newDomSameAsSrc =
4858  newDomainMap->isSameAs (* (newImporter->getSourceMap ()));
4859  TEUCHOS_TEST_FOR_EXCEPTION
4860  (! colSameAsTgt || ! newDomSameAsSrc, std::invalid_argument, "If the "
4861  "new Import is nonnull, then the current column Map must be the same "
4862  "as the new Import's target Map, and the new domain Map must be the "
4863  "same as the new Import's source Map.");
4864  }
4865  }
4866 
4867  domainMap_ = newDomainMap;
4868  importer_ = Teuchos::rcp_const_cast<import_type> (newImporter);
4869  }
4870 
4871  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4874  getLocalGraph () const
4875  {
4876  return lclGraph_;
4877  }
4878 
4879  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4880  void
4882  computeGlobalConstants (const bool computeLocalTriangularConstants)
4883  {
4884  using ::Tpetra::Details::ProfilingRegion;
4885  using Teuchos::ArrayView;
4886  using Teuchos::outArg;
4887  using Teuchos::reduceAll;
4888  typedef global_size_t GST;
4889 
4890  ProfilingRegion regionCGC ("Tpetra::CrsGraph::computeGlobalConstants");
4891 
4892  this->computeLocalConstants (computeLocalTriangularConstants);
4893 
4894  // Compute global constants from local constants. Processes that
4895  // already have local constants still participate in the
4896  // all-reduces, using their previously computed values.
4897  if (! this->haveGlobalConstants_) {
4898  const Teuchos::Comm<int>& comm = * (this->getComm ());
4899  // Promote all the nodeNum* and nodeMaxNum* quantities from
4900  // size_t to global_size_t, when doing the all-reduces for
4901  // globalNum* / globalMaxNum* results.
4902  //
4903  // FIXME (mfh 07 May 2013) Unfortunately, we either have to do
4904  // this in two all-reduces (one for the sum and the other for
4905  // the max), or use a custom MPI_Op that combines the sum and
4906  // the max. The latter might even be slower than two
4907  // all-reduces on modern network hardware. It would also be a
4908  // good idea to use nonblocking all-reduces (MPI 3), so that we
4909  // don't have to wait around for the first one to finish before
4910  // starting the second one.
4911  GST lcl[2], gbl[2];
4912  lcl[0] = static_cast<GST> (this->getNodeNumEntries ());
4913 
4914  // mfh 03 May 2018: nodeNumDiags_ is invalid if
4915  // computeLocalTriangularConstants is false, but there's no
4916  // practical network latency difference between an all-reduce of
4917  // length 1 and an all-reduce of length 2, so it's not worth
4918  // distinguishing between the two. However, we do want to avoid
4919  // integer overflow, so we'll just set the input local sum to
4920  // zero in that case.
4921  lcl[1] = computeLocalTriangularConstants ?
4922  static_cast<GST> (this->nodeNumDiags_) :
4923  static_cast<GST> (0);
4924 
4925  reduceAll<int,GST> (comm, Teuchos::REDUCE_SUM, 2, lcl, gbl);
4926  this->globalNumEntries_ = gbl[0];
4927 
4928  // mfh 03 May 2018: If not computing local triangular
4929  // properties, users want this to be invalid, not just zero.
4930  // This will help with debugging.
4931  this->globalNumDiags_ = computeLocalTriangularConstants ?
4932  gbl[1] :
4933  Teuchos::OrdinalTraits<GST>::invalid ();
4934 
4935  const GST lclMaxNumRowEnt = static_cast<GST> (this->nodeMaxNumRowEntries_);
4936  reduceAll<int, GST> (comm, Teuchos::REDUCE_MAX, lclMaxNumRowEnt,
4937  outArg (this->globalMaxNumRowEntries_));
4938  this->haveGlobalConstants_ = true;
4939  }
4940  }
4941 
4942 
4943  template <class LocalOrdinal, class GlobalOrdinal, class Node>
4944  void
4946  computeLocalConstants (const bool computeLocalTriangularConstants)
4947  {
4949  using ::Tpetra::Details::ProfilingRegion;
4950 
4951  ProfilingRegion regionCLC ("Tpetra::CrsGraph::computeLocalConstants");
4952  if (this->haveLocalConstants_) {
4953  return;
4954  }
4955 
4956  // Reset local properties
4957  this->lowerTriangular_ = false;
4958  this->upperTriangular_ = false;
4959  this->nodeMaxNumRowEntries_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4960  this->nodeNumDiags_ = Teuchos::OrdinalTraits<size_t>::invalid ();
4961 
4962  if (computeLocalTriangularConstants) {
4963  const bool hasRowAndColumnMaps =
4964  this->rowMap_.get () != nullptr && this->colMap_.get () != nullptr;
4965  if (hasRowAndColumnMaps) {
4966  auto lclRowMap = this->rowMap_->getLocalMap ();
4967  auto lclColMap = this->colMap_->getLocalMap ();
4968 
4969  // Make sure that the GPU can see any updates made on host.
4970  // This code only reads the local graph, so we don't need a
4971  // fence afterwards.
4972  execution_space().fence ();
4973 
4974  // mfh 01 May 2018: See GitHub Issue #2658.
4975  constexpr bool ignoreMapsForTriStruct = true;
4976  auto result =
4977  determineLocalTriangularStructure (this->lclGraph_, lclRowMap,
4978  lclColMap, ignoreMapsForTriStruct);
4979  this->lowerTriangular_ = result.couldBeLowerTriangular;
4980  this->upperTriangular_ = result.couldBeUpperTriangular;
4981  this->nodeMaxNumRowEntries_ = result.maxNumRowEnt;
4982  this->nodeNumDiags_ = result.diagCount;
4983  }
4984  else {
4985  this->nodeMaxNumRowEntries_ = 0;
4986  this->nodeNumDiags_ = 0;
4987  }
4988  }
4989  else {
4990  using LO = local_ordinal_type;
4991  // Make sure that the GPU can see any updates made on host.
4992  // This code only reads the local graph, so we don't need a
4993  // fence afterwards.
4994  execution_space().fence ();
4995 
4996  auto ptr = this->lclGraph_.row_map;
4997  const LO lclNumRows = ptr.extent(0) == 0 ?
4998  static_cast<LO> (0) :
4999  (static_cast<LO> (ptr.extent(0)) - static_cast<LO> (1));
5000 
5001  const LO lclMaxNumRowEnt =
5002  ::Tpetra::Details::maxDifference ("Tpetra::CrsGraph: nodeMaxNumRowEntries",
5003  ptr, lclNumRows);
5004  this->nodeMaxNumRowEntries_ = static_cast<size_t> (lclMaxNumRowEnt);
5005  }
5006  this->haveLocalConstants_ = true;
5007  }
5008 
5009 
5010  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5011  std::pair<size_t, std::string>
5014  {
5015  using ::Tpetra::Details::ProfilingRegion;
5016  using Teuchos::arcp;
5017  using Teuchos::Array;
5018  using std::endl;
5019  typedef LocalOrdinal LO;
5020  typedef GlobalOrdinal GO;
5021  typedef device_type DT;
5022  typedef typename local_graph_type::row_map_type::non_const_value_type offset_type;
5023  typedef decltype (k_numRowEntries_) row_entries_type;
5024  typedef typename row_entries_type::non_const_value_type num_ent_type;
5025  typedef typename local_graph_type::entries_type::non_const_type
5026  lcl_col_inds_type;
5027  typedef Kokkos::View<GO*, typename lcl_col_inds_type::array_layout,
5028  device_type> gbl_col_inds_type;
5029  const char tfecfFuncName[] = "makeIndicesLocal: ";
5030  ProfilingRegion regionMakeIndicesLocal ("Tpetra::CrsGraph::makeIndicesLocal");
5031 
5032  // These are somewhat global properties, so it's safe to have
5033  // exception checks for them, rather than returning an error code.
5034  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5035  (! this->hasColMap (), std::logic_error, "The graph does not have a "
5036  "column Map yet. This method should never be called in that case. "
5037  "Please report this bug to the Tpetra developers.");
5038  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5039  (this->getColMap ().is_null (), std::logic_error, "The graph claims "
5040  "that it has a column Map, because hasColMap() returns true. However, "
5041  "the result of getColMap() is null. This should never happen. Please "
5042  "report this bug to the Tpetra developers.");
5043 
5044  // Return value 1: The number of column indices (counting
5045  // duplicates) that could not be converted to local indices,
5046  // because they were not in the column Map on the calling process.
5047  size_t lclNumErrs = 0;
5048  std::ostringstream errStrm; // for return value 2 (error string)
5049 
5050  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5051  const map_type& colMap = * (this->getColMap ());
5052 
5053  if (this->isGloballyIndexed () && lclNumRows != 0) {
5054  // This is a host-accessible View.
5055  typename row_entries_type::const_type h_numRowEnt =
5056  this->k_numRowEntries_;
5057 
5058  // Allocate space for local indices.
5059  if (this->getProfileType () == StaticProfile) {
5060  // If GO and LO are the same size, we can reuse the existing
5061  // array of 1-D index storage to convert column indices from
5062  // GO to LO. Otherwise, we'll just allocate a new buffer.
5063  constexpr bool LO_GO_same = std::is_same<LO, GO>::value;
5064  if (LO_GO_same) {
5065  // This prevents a build error (illegal assignment) if
5066  // LO_GO_same is _not_ true. Only the first branch
5067  // (returning k_gblInds1D_) should ever get taken.
5068  k_lclInds1D_ = Kokkos::Impl::if_c<LO_GO_same,
5070  lcl_col_inds_type>::select (k_gblInds1D_, k_lclInds1D_);
5071  }
5072  else {
5073  if (k_rowPtrs_.extent (0) == 0) {
5074  errStrm << "k_rowPtrs_.extent(0) == 0. This should never "
5075  "happen here. Please report this bug to the Tpetra developers."
5076  << endl;
5077  // Need to return early.
5078  return std::make_pair (Tpetra::Details::OrdinalTraits<size_t>::invalid (),
5079  errStrm.str ());
5080  }
5081  const auto numEnt = ::Tpetra::Details::getEntryOnHost (k_rowPtrs_, lclNumRows);
5082 
5083  // mfh 17 Dec 2016: We don't need initial zero-fill of
5084  // k_lclInds1D_, because we will fill it below anyway.
5085  // AllowPadding would only help for aligned access (e.g.,
5086  // for vectorization) if we also were to pad each row to the
5087  // same alignment, so we'll skip AllowPadding for now.
5088 
5089  // using Kokkos::AllowPadding;
5090  using Kokkos::view_alloc;
5091  using Kokkos::WithoutInitializing;
5092 
5093  // When giving the label as an argument to
5094  // Kokkos::view_alloc, the label must be a string and not a
5095  // char*, else the code won't compile. This is because
5096  // view_alloc also allows a raw pointer as its first
5097  // argument. See
5098  // https://github.com/kokkos/kokkos/issues/434. This is a
5099  // large allocation typically, so the overhead of creating
5100  // an std::string is minor.
5101  const std::string label ("Tpetra::CrsGraph::lclind");
5102  k_lclInds1D_ =
5103  lcl_col_inds_type (view_alloc (label, WithoutInitializing), numEnt);
5104  }
5105 
5106  auto lclColMap = colMap.getLocalMap ();
5107  // This is a "device mirror" of the host View h_numRowEnt.
5108  //
5109  // NOTE (mfh 27 Sep 2016) Currently, the right way to get a
5110  // Device instance is to use its default constructor. See the
5111  // following Kokkos issue:
5112  //
5113  // https://github.com/kokkos/kokkos/issues/442
5114  auto k_numRowEnt = Kokkos::create_mirror_view (device_type (), h_numRowEnt);
5115 
5117  lclNumErrs =
5118  convertColumnIndicesFromGlobalToLocal<LO, GO, DT, offset_type, num_ent_type> (k_lclInds1D_,
5119  k_gblInds1D_,
5120  k_rowPtrs_,
5121  lclColMap,
5122  k_numRowEnt);
5123  if (lclNumErrs != 0) {
5124  const int myRank = [this] () {
5125  auto map = this->getMap ();
5126  if (map.is_null ()) {
5127  return 0;
5128  }
5129  else {
5130  auto comm = map->getComm ();
5131  return comm.is_null () ? 0 : comm->getRank ();
5132  }
5133  } ();
5134  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5135  errStrm << "(Process " << myRank << ") When converting column "
5136  "indices from global to local, we encountered " << lclNumErrs
5137  << " ind" << (pluralNumErrs ? "ices" : "ex")
5138  << " that do" << (pluralNumErrs ? "es" : "")
5139  << " not live in the column Map on this process." << endl;
5140  }
5141 
5142  // We've converted column indices from global to local, so we
5143  // can deallocate the global column indices (which we know are
5144  // in 1-D storage, because the graph has static profile).
5145  k_gblInds1D_ = gbl_col_inds_type ();
5146  }
5147  else { // the graph has dynamic profile (2-D index storage)
5148  // Avoid any drama with *this capture, by extracting the
5149  // variables that the thread-parallel loop will need below.
5150  // This is just a shallow copy.
5151  Teuchos::ArrayRCP<Teuchos::Array<LO> > lclInds2D (lclNumRows);
5152  Teuchos::ArrayRCP<Teuchos::Array<GO> > gblInds2D = this->gblInds2D_;
5153 
5154  // We must use a host thread parallelization here, because
5155  // Teuchos::ArrayRCP does not work in CUDA.
5156  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5157  host_execution_space;
5158  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5159  Kokkos::parallel_reduce (
5160  "Tpetra::CrsGraph::makeIndicesLocal (DynamicProfile)",
5161  range_type (0, lclNumRows),
5162  [&gblInds2D, &h_numRowEnt, &lclInds2D, &colMap] (const LO& lclRow, size_t& numErrs) {
5163  const GO* const curGblInds = gblInds2D[lclRow].getRawPtr ();
5164  // NOTE (mfh 26 Jun 2016) It's always legal to cast the
5165  // number of entries in a row to LO, as long as the row
5166  // doesn't have too many duplicate entries.
5167  const LO rna = static_cast<LO> (gblInds2D[lclRow].size ());
5168  const LO numEnt = static_cast<LO> (h_numRowEnt(lclRow));
5169  lclInds2D[lclRow].resize (rna); // purely thread-local, so safe
5170  LO* const curLclInds = lclInds2D[lclRow].getRawPtr ();
5171  for (LO j = 0; j < numEnt; ++j) {
5172  const GO gid = curGblInds[j];
5173  const LO lid = colMap.getLocalElement (gid);
5174  curLclInds[j] = lid;
5175  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5176  ++numErrs;
5177  }
5178  }
5179  }, lclNumErrs);
5180 
5181  this->lclInds2D_ = lclInds2D; // "commit" the result
5182 
5183  // If we detected an error in the above loop, go back and find
5184  // the global column indices not in the column Map on the
5185  // calling process.
5186  if (lclNumErrs != 0) {
5187  const int myRank = [this] () {
5188  auto map = this->getMap ();
5189  if (map.is_null ()) {
5190  return 0;
5191  }
5192  else {
5193  auto comm = map->getComm ();
5194  return comm.is_null () ? 0 : comm->getRank ();
5195  }
5196  } ();
5197 
5198  // If there are too many errors, don't bother printing them.
5199  size_t tooManyErrsToPrint = ::Tpetra::Details::Behavior::verbosePrintCountThreshold();
5200  if (lclNumErrs > tooManyErrsToPrint) {
5201  errStrm << "(Process " << myRank << ") When converting column "
5202  "indices from global to local, we encountered " << lclNumErrs
5203  << " indices that do not live in the column Map on this "
5204  "process. That's exceeds the allowable number to print."
5205  << "This limit is controllable by TPETRA_VERBOSE_PRINT_COUNT_THRESHOLD." << endl;
5206  }
5207  else {
5208  // Map from local row index, to any global column indices
5209  // that do not live in the column Map on the calling process.
5210  std::map<LO, std::vector<GO> > badColInds;
5211  // List of local rows lclRow for which h_numRowEnt[lclRow]
5212  // > gblInds2D_[lclRow].size().
5213  std::vector<LO> badLclRows;
5214 
5215  for (LO lclRow = 0; lclRow < lclNumRows; ++lclRow) {
5216  const size_t numEnt = static_cast<size_t> (h_numRowEnt[lclRow]);
5217 
5218  Teuchos::ArrayView<const GO> curGblInds = gblInds2D_[lclRow] ();
5219  if (numEnt > static_cast<size_t> (curGblInds.size ())) {
5220  badLclRows.push_back (lclRow);
5221  }
5222  else {
5223  for (size_t j = 0; j < numEnt; ++j) {
5224  const GO gid = curGblInds[j];
5225  const LO lid = colMap.getLocalElement (gid);
5226  if (lid == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
5227  badColInds[lclRow].push_back (gid);
5228  }
5229  }
5230  }
5231  }
5232 
5233  const bool pluralNumErrs = (lclNumErrs != static_cast<size_t> (1));
5234  errStrm << "(Process " << myRank << ") When converting column "
5235  "indices from global to local, we encountered " << lclNumErrs
5236  << " ind" << (pluralNumErrs ? "ices" : "ex") << " that "
5237  "do" << (pluralNumErrs ? "es" : "")
5238  << " not live in the column Map on this process." << endl
5239  << "(Process " << myRank << ") Here are the bad global "
5240  "indices, listed by local row: " << endl;
5241  for (auto && eachPair : badColInds) {
5242  const LO lclRow = eachPair.first;
5243  const GO gblRow = rowMap_->getGlobalElement (lclRow);
5244  errStrm << "(Process " << myRank << ") Local row " << lclRow
5245  << " (global row " << gblRow << "): [";
5246  const size_t numBad = eachPair.second.size ();
5247  for (size_t k = 0; k < numBad; ++k) {
5248  errStrm << eachPair.second[k];
5249  if (k + size_t (1) < numBad) {
5250  errStrm << ",";
5251  }
5252  }
5253  errStrm << "]" << endl;
5254  }
5255 
5256  if (badLclRows.size () != 0) {
5257  if (lclNumErrs == 0) {
5258  // We really want lclNumErrs to be just the count of
5259  // bad column indices, but lclNumErrs != 0 also
5260  // doubles as a generic indication of error.
5261  lclNumErrs = badLclRows.size ();
5262  }
5263 
5264  errStrm << "(Process " << myRank << ") When converting column "
5265  "indices from global to local, we (also) encountered the "
5266  "following local rows lclRow on this process for which "
5267  "h_numRowEnt[lclRow] > gblInds2D_[lclRow].size(). This "
5268  "likely indicates a bug in Tpetra." << endl
5269  << "(Process " << myRank << ") [";
5270  const size_t numBad = badLclRows.size ();
5271  for (size_t k = 0; k < numBad; ++k) {
5272  const LO lclRow = badLclRows[k];
5273  errStrm << "{lclRow: " << lclRow
5274  << "h_numRowEnt[lclRow]: " << h_numRowEnt[lclRow]
5275  << "gblInds2D_[lclRow].size(): "
5276  << gblInds2D_[lclRow].size () << "}";
5277  if (k + size_t (1) < numBad) {
5278  errStrm << ", ";
5279  }
5280  }
5281  errStrm << "]" << endl;
5282  }
5283  }
5284  }
5285 
5286  this->gblInds2D_ = Teuchos::null;
5287  }
5288  } // globallyIndexed() && lclNumRows > 0
5289 
5290  this->lclGraph_ = local_graph_type (this->k_lclInds1D_, this->k_rowPtrs_);
5291  this->indicesAreLocal_ = true;
5292  this->indicesAreGlobal_ = false;
5293  this->checkInternalState ();
5294 
5295  return std::make_pair (lclNumErrs, errStrm.str ());
5296  }
5297 
5298 
5299  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5300  void
5302  makeColMap (Teuchos::Array<int>& remotePIDs)
5303  {
5304  using ::Tpetra::Details::ProfilingRegion;
5305  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::makeColMap");
5306  const bool debug = ::Tpetra::Details::Behavior::debug ();
5307 
5308  // this->colMap_ should be null at this point, but we accept the
5309  // future possibility that it might not be (esp. if we decide
5310  // later to support graph structure changes after first
5311  // fillComplete, which CrsGraph does not currently (as of 12 Feb
5312  // 2017) support).
5313  Teuchos::RCP<const map_type> colMap = this->colMap_;
5314  const bool sortEachProcsGids =
5315  this->sortGhostsAssociatedWithEachProcessor_;
5316 
5317  // FIXME (mfh 12 Feb 2017) ::Tpetra::Details::makeColMap returns a
5318  // per-process error code. If an error does occur on a process,
5319  // ::Tpetra::Details::makeColMap does NOT promise that all processes will
5320  // notice that error. This is the caller's responsibility. For
5321  // now, we only propagate (to all processes) and report the error
5322  // in debug mode. In the future, we need to add the local/global
5323  // error handling scheme used in BlockCrsMatrix to this class.
5324  if (debug) {
5325  using Teuchos::outArg;
5326  using Teuchos::REDUCE_MIN;
5327  using Teuchos::reduceAll;
5328  const char tfecfFuncName[] = "makeColMap: ";
5329 
5330  std::ostringstream errStrm;
5331  const int lclErrCode =
5332  ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5333  *this, sortEachProcsGids, &errStrm);
5334  auto comm = this->getComm ();
5335  if (! comm.is_null ()) {
5336  const int lclSuccess = (lclErrCode == 0) ? 1 : 0;
5337  int gblSuccess = 0; // output argument
5338  reduceAll<int, int> (*comm, REDUCE_MIN, lclSuccess,
5339  outArg (gblSuccess));
5340  if (gblSuccess != 1) {
5341  std::ostringstream os;
5342  Tpetra::Details::gathervPrint (os, errStrm.str (), *comm);
5343  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5344  (true, std::runtime_error, "An error happened on at least one "
5345  "(MPI) process in the CrsGraph's communicator. Here are all "
5346  "processes' error messages:" << std::endl << os.str ());
5347  }
5348  }
5349  }
5350  else {
5351  (void) ::Tpetra::Details::makeColMap (colMap, remotePIDs, this->getDomainMap (),
5352  *this, sortEachProcsGids, nullptr);
5353  }
5354  // See above. We want to admit the possibility of makeColMap
5355  // actually revising an existing column Map, even though that
5356  // doesn't currently (as of 10 May 2017) happen.
5357  this->colMap_ = colMap;
5358 
5359  checkInternalState ();
5360  }
5361 
5362 
5363  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5364  void
5366  sortAndMergeAllIndices (const bool sorted, const bool merged)
5367  {
5368  using ::Tpetra::Details::ProfilingRegion;
5369  typedef LocalOrdinal LO;
5370  typedef typename Kokkos::View<LO*, device_type>::HostMirror::execution_space
5371  host_execution_space;
5372  typedef Kokkos::RangePolicy<host_execution_space, LO> range_type;
5373  const char tfecfFuncName[] = "sortAndMergeAllIndices: ";
5374  ProfilingRegion regionSortAndMerge ("Tpetra::CrsGraph::sortAndMergeAllIndices");
5375 
5376  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5377  (this->isGloballyIndexed (), std::logic_error,
5378  "This method may only be called after makeIndicesLocal." );
5379 
5380  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5381  (! merged && this->isStorageOptimized (), std::logic_error,
5382  "The graph is already storage optimized, so we shouldn't be merging any "
5383  "indices. Please report this bug to the Tpetra developers.");
5384 
5385  if (! sorted || ! merged) {
5386  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
5387  size_t totalNumDups = 0;
5388  // FIXME (mfh 08 May 2017) This may assume CUDA UVM.
5389  Kokkos::parallel_reduce (range_type (0, lclNumRows),
5390  [this, sorted, merged] (const LO& lclRow, size_t& numDups) {
5391  const RowInfo rowInfo = this->getRowInfo (lclRow);
5392  numDups += this->sortAndMergeRowIndices (rowInfo, sorted, merged);
5393  }, totalNumDups);
5394  this->indicesAreSorted_ = true; // we just sorted every row
5395  this->noRedundancies_ = true; // we just merged every row
5396  }
5397  }
5398 
5399 
5400  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5401  void
5403  makeImportExport (Teuchos::Array<int>& remotePIDs,
5404  const bool useRemotePIDs)
5405  {
5406  using ::Tpetra::Details::ProfilingRegion;
5407  using Teuchos::ParameterList;
5408  using Teuchos::RCP;
5409  using Teuchos::rcp;
5410  const char tfecfFuncName[] = "makeImportExport: ";
5411  ProfilingRegion regionMIE ("Tpetra::CrsGraph::makeImportExport");
5412 
5413  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5414  (! this->hasColMap (), std::logic_error,
5415  "This method may not be called unless the graph has a column Map.");
5416  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
5417 
5418  // Don't do any checks to see if we need to create the Import, if
5419  // it exists already.
5420  //
5421  // FIXME (mfh 25 Mar 2013) This will become incorrect if we
5422  // change CrsGraph in the future to allow changing the column
5423  // Map after fillComplete. For now, the column Map is fixed
5424  // after the first fillComplete call.
5425  if (importer_.is_null ()) {
5426  // Create the Import instance if necessary.
5427  if (domainMap_ != colMap_ && (! domainMap_->isSameAs (*colMap_))) {
5428  if (params.is_null () || ! params->isSublist ("Import")) {
5429  if (useRemotePIDs) {
5430  importer_ = rcp (new import_type (domainMap_, colMap_, remotePIDs));
5431  }
5432  else {
5433  importer_ = rcp (new import_type (domainMap_, colMap_));
5434  }
5435  }
5436  else {
5437  RCP<ParameterList> importSublist = sublist (params, "Import", true);
5438  if (useRemotePIDs) {
5439  RCP<import_type> newImp =
5440  rcp (new import_type (domainMap_, colMap_, remotePIDs,
5441  importSublist));
5442  importer_ = newImp;
5443  }
5444  else {
5445  importer_ = rcp (new import_type (domainMap_, colMap_, importSublist));
5446  }
5447  }
5448  }
5449  }
5450 
5451  // Don't do any checks to see if we need to create the Export, if
5452  // it exists already.
5453  if (exporter_.is_null ()) {
5454  // Create the Export instance if necessary.
5455  if (rangeMap_ != rowMap_ && ! rangeMap_->isSameAs (*rowMap_)) {
5456  if (params.is_null () || ! params->isSublist ("Export")) {
5457  exporter_ = rcp (new export_type (rowMap_, rangeMap_));
5458  }
5459  else {
5460  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
5461  exporter_ = rcp (new export_type (rowMap_, rangeMap_, exportSublist));
5462  }
5463  }
5464  }
5465  }
5466 
5467 
5468  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5469  std::string
5471  description () const
5472  {
5473  std::ostringstream oss;
5474  oss << dist_object_type::description ();
5475  if (isFillComplete ()) {
5476  oss << "{status = fill complete"
5477  << ", global rows = " << getGlobalNumRows()
5478  << ", global cols = " << getGlobalNumCols()
5479  << ", global num entries = " << getGlobalNumEntries()
5480  << "}";
5481  }
5482  else {
5483  oss << "{status = fill not complete"
5484  << ", global rows = " << getGlobalNumRows()
5485  << "}";
5486  }
5487  return oss.str();
5488  }
5489 
5490 
5491  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5492  void
5494  describe (Teuchos::FancyOStream &out,
5495  const Teuchos::EVerbosityLevel verbLevel) const
5496  {
5497  using Teuchos::ArrayView;
5498  using Teuchos::Comm;
5499  using Teuchos::RCP;
5500  using Teuchos::VERB_DEFAULT;
5501  using Teuchos::VERB_NONE;
5502  using Teuchos::VERB_LOW;
5503  using Teuchos::VERB_MEDIUM;
5504  using Teuchos::VERB_HIGH;
5505  using Teuchos::VERB_EXTREME;
5506  using std::endl;
5507  using std::setw;
5508 
5509  Teuchos::EVerbosityLevel vl = verbLevel;
5510  if (vl == VERB_DEFAULT) vl = VERB_LOW;
5511  RCP<const Comm<int> > comm = this->getComm();
5512  const int myImageID = comm->getRank(),
5513  numImages = comm->getSize();
5514  size_t width = 1;
5515  for (size_t dec=10; dec<getGlobalNumRows(); dec *= 10) {
5516  ++width;
5517  }
5518  width = std::max<size_t> (width, static_cast<size_t> (11)) + 2;
5519  Teuchos::OSTab tab (out);
5520  // none: print nothing
5521  // low: print O(1) info from node 0
5522  // medium: print O(P) info, num entries per node
5523  // high: print O(N) info, num entries per row
5524  // extreme: print O(NNZ) info: print graph indices
5525  //
5526  // for medium and higher, print constituent objects at specified verbLevel
5527  if (vl != VERB_NONE) {
5528  if (myImageID == 0) out << this->description() << std::endl;
5529  // O(1) globals, minus what was already printed by description()
5530  if (isFillComplete() && myImageID == 0) {
5531  out << "Global number of diagonals = " << globalNumDiags_ << std::endl;
5532  out << "Global max number of row entries = " << globalMaxNumRowEntries_ << std::endl;
5533  }
5534  // constituent objects
5535  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5536  if (myImageID == 0) out << "\nRow map: " << std::endl;
5537  rowMap_->describe(out,vl);
5538  if (colMap_ != Teuchos::null) {
5539  if (myImageID == 0) out << "\nColumn map: " << std::endl;
5540  colMap_->describe(out,vl);
5541  }
5542  if (domainMap_ != Teuchos::null) {
5543  if (myImageID == 0) out << "\nDomain map: " << std::endl;
5544  domainMap_->describe(out,vl);
5545  }
5546  if (rangeMap_ != Teuchos::null) {
5547  if (myImageID == 0) out << "\nRange map: " << std::endl;
5548  rangeMap_->describe(out,vl);
5549  }
5550  }
5551  // O(P) data
5552  if (vl == VERB_MEDIUM || vl == VERB_HIGH || vl == VERB_EXTREME) {
5553  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5554  if (myImageID == imageCtr) {
5555  out << "Node ID = " << imageCtr << std::endl
5556  << "Node number of entries = " << this->getNodeNumEntries () << std::endl
5557  << "Node number of diagonals = " << nodeNumDiags_ << std::endl
5558  << "Node max number of entries = " << nodeMaxNumRowEntries_ << std::endl;
5559  if (! indicesAreAllocated ()) {
5560  out << "Indices are not allocated." << std::endl;
5561  }
5562  }
5563  comm->barrier();
5564  comm->barrier();
5565  comm->barrier();
5566  }
5567  }
5568  // O(N) and O(NNZ) data
5569  if (vl == VERB_HIGH || vl == VERB_EXTREME) {
5570  for (int imageCtr = 0; imageCtr < numImages; ++imageCtr) {
5571  if (myImageID == imageCtr) {
5572  out << std::setw(width) << "Node ID"
5573  << std::setw(width) << "Global Row"
5574  << std::setw(width) << "Num Entries";
5575  if (vl == VERB_EXTREME) {
5576  out << " Entries";
5577  }
5578  out << std::endl;
5579  const LocalOrdinal lclNumRows =
5580  static_cast<LocalOrdinal> (this->getNodeNumRows ());
5581  for (LocalOrdinal r=0; r < lclNumRows; ++r) {
5582  const RowInfo rowinfo = this->getRowInfo (r);
5583  GlobalOrdinal gid = rowMap_->getGlobalElement(r);
5584  out << std::setw(width) << myImageID
5585  << std::setw(width) << gid
5586  << std::setw(width) << rowinfo.numEntries;
5587  if (vl == VERB_EXTREME) {
5588  out << " ";
5589  if (isGloballyIndexed()) {
5590  ArrayView<const GlobalOrdinal> rowview = getGlobalView(rowinfo);
5591  for (size_t j=0; j < rowinfo.numEntries; ++j) out << rowview[j] << " ";
5592  }
5593  else if (isLocallyIndexed()) {
5594  ArrayView<const LocalOrdinal> rowview = getLocalView(rowinfo);
5595  for (size_t j=0; j < rowinfo.numEntries; ++j) out << colMap_->getGlobalElement(rowview[j]) << " ";
5596  }
5597  }
5598  out << std::endl;
5599  }
5600  }
5601  comm->barrier();
5602  comm->barrier();
5603  comm->barrier();
5604  }
5605  }
5606  }
5607  }
5608 
5609 
5610  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5611  bool
5613  checkSizes (const SrcDistObject& /* source */)
5614  {
5615  // It's not clear what kind of compatibility checks on sizes can
5616  // be performed here. Epetra_CrsGraph doesn't check any sizes for
5617  // compatibility.
5618  return true;
5619  }
5620 
5621  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5622  void
5624 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
5625  copyAndPermuteNew
5626 #else // TPETRA_ENABLE_DEPRECATED_CODE
5627  copyAndPermute
5628 #endif // TPETRA_ENABLE_DEPRECATED_CODE
5629  (const SrcDistObject& source,
5630  const size_t numSameIDs,
5631  const Kokkos::DualView<const local_ordinal_type*,
5632  buffer_device_type>& permuteToLIDs,
5633  const Kokkos::DualView<const local_ordinal_type*,
5634  buffer_device_type>& permuteFromLIDs)
5635  {
5636  using std::endl;
5637  using LO = local_ordinal_type;
5638  using GO = global_ordinal_type;
5639  using this_type = CrsGraph<LO, GO, node_type>;
5640  using row_graph_type = RowGraph<LO, GO, node_type>;
5641  const char tfecfFuncName[] = "copyAndPermute: ";
5642  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
5643 
5644  std::unique_ptr<std::string> prefix;
5645  if (debug) {
5646  std::ostringstream os;
5647  const int myRank = this->getMap ()->getComm ()->getRank ();
5648  os << "Proc " << myRank << ": Tpetra::CrsGraph::copyAndPermute: ";
5649  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
5650  os << endl;
5651  std::cerr << os.str ();
5652  }
5653 
5654  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5655  (permuteToLIDs.extent (0) != permuteFromLIDs.extent (0),
5656  std::runtime_error, "permuteToLIDs.extent(0) = "
5657  << permuteToLIDs.extent (0) << " != permuteFromLIDs.extent(0) = "
5658  << permuteFromLIDs.extent (0) << ".");
5659 
5660  // We know from checkSizes that the source object is a
5661  // row_graph_type, so we don't need to check again.
5662  const row_graph_type& srcRowGraph =
5663  dynamic_cast<const row_graph_type&> (source);
5664 
5665  if (this->getProfileType () == StaticProfile) {
5666  if (debug) {
5667  std::ostringstream os;
5668  os << *prefix << "Target is StaticProfile; do CRS padding" << endl;
5669  std::cerr << os.str ();
5670  }
5671  auto padding =
5672  computeCrsPadding (srcRowGraph, numSameIDs, permuteToLIDs, permuteFromLIDs);
5673  this->applyCrsPadding(padding);
5674  }
5675  else if (debug) {
5676  std::ostringstream os;
5677  os << *prefix << "Target is DynamicProfile" << endl;
5678  std::cerr << os.str ();
5679  }
5680 
5681  // If the source object is actually a CrsGraph, we can use view
5682  // mode instead of copy mode to access the entries in each row,
5683  // if the graph is not fill complete.
5684  const this_type* srcCrsGraph = dynamic_cast<const this_type*> (&source);
5685 
5686  const map_type& srcRowMap = * (srcRowGraph.getRowMap ());
5687  const map_type& tgtRowMap = * (this->getRowMap ());
5688  const bool src_filled = srcRowGraph.isFillComplete ();
5689  Teuchos::Array<GO> row_copy;
5690  LO myid = 0;
5691 
5692  //
5693  // "Copy" part of "copy and permute."
5694  //
5695  if (src_filled || srcCrsGraph == nullptr) {
5696  if (debug) {
5697  std::ostringstream os;
5698  os << *prefix << "src_filled || srcCrsGraph == nullptr" << endl;
5699  std::cerr << os.str ();
5700  }
5701  // If the source graph is fill complete, we can't use view mode,
5702  // because the data might be stored in a different format not
5703  // compatible with the expectations of view mode. Also, if the
5704  // source graph is not a CrsGraph, we can't use view mode,
5705  // because RowGraph only provides copy mode access to the data.
5706  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5707  const GO gid = srcRowMap.getGlobalElement (myid);
5708  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (gid);
5709  row_copy.resize (row_length);
5710  size_t check_row_length = 0;
5711  srcRowGraph.getGlobalRowCopy (gid, row_copy (), check_row_length);
5712  this->insertGlobalIndices (gid, row_copy ());
5713  }
5714  } else {
5715  if (debug) {
5716  std::ostringstream os;
5717  os << *prefix << "! src_filled && srcCrsGraph != nullptr" << endl;
5718  std::cerr << os.str ();
5719  }
5720  for (size_t i = 0; i < numSameIDs; ++i, ++myid) {
5721  const GO gid = srcRowMap.getGlobalElement (myid);
5722  Teuchos::ArrayView<const GO> row;
5723  srcCrsGraph->getGlobalRowView (gid, row);
5724  this->insertGlobalIndices (gid, row);
5725  }
5726  }
5727 
5728  //
5729  // "Permute" part of "copy and permute."
5730  //
5731  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5732  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5733 
5734  if (src_filled || srcCrsGraph == nullptr) {
5735  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5736  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5737  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5738  size_t row_length = srcRowGraph.getNumEntriesInGlobalRow (srcgid);
5739  row_copy.resize (row_length);
5740  size_t check_row_length = 0;
5741  srcRowGraph.getGlobalRowCopy (srcgid, row_copy (), check_row_length);
5742  this->insertGlobalIndices (mygid, row_copy ());
5743  }
5744  } else {
5745  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5746  const GO mygid = tgtRowMap.getGlobalElement (permuteToLIDs_h[i]);
5747  const GO srcgid = srcRowMap.getGlobalElement (permuteFromLIDs_h[i]);
5748  Teuchos::ArrayView<const GO> row;
5749  srcCrsGraph->getGlobalRowView (srcgid, row);
5750  this->insertGlobalIndices (mygid, row);
5751  }
5752  }
5753 
5754  if (debug) {
5755  std::ostringstream os;
5756  os << *prefix << "Done" << endl;
5757  std::cerr << os.str ();
5758  }
5759  }
5760 
5761  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5762  void
5763  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5764  applyCrsPadding(const Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>& padding)
5765  {
5766  // const char tfecfFuncName[] = "applyCrsPadding";
5767  using execution_space = typename device_type::execution_space;
5768  using row_ptrs_type = typename local_graph_type::row_map_type::non_const_type;
5769  using indices_type = t_GlobalOrdinal_1D;
5770  using local_indices_type = typename local_graph_type::entries_type::non_const_type;
5771  using range_policy = Kokkos::RangePolicy<execution_space, Kokkos::IndexType<LocalOrdinal>>;
5773 
5774  if (padding.size() == 0)
5775  return;
5776 
5777  // Assume global indexing we don't have any indices yet
5778  if (! this->indicesAreAllocated()) {
5779  allocateIndices(GlobalIndices);
5780  }
5781 
5782  // Making copies here because k_rowPtrs_ has a const type. Otherwise, we
5783  // would use it directly.
5784 
5785  row_ptrs_type row_ptrs_beg("row_ptrs_beg", this->k_rowPtrs_.extent(0));
5786  Kokkos::deep_copy(row_ptrs_beg, this->k_rowPtrs_);
5787 
5788  const size_t N = (row_ptrs_beg.extent(0) == 0 ? 0 : row_ptrs_beg.extent(0) - 1);
5789  row_ptrs_type row_ptrs_end("row_ptrs_end", N);
5790 
5791  bool refill_num_row_entries = false;
5792  if (this->k_numRowEntries_.extent(0) > 0) {
5793  // Case 1: Unpacked storage
5794  refill_num_row_entries = true;
5795  auto num_row_entries = this->k_numRowEntries_;
5796  Kokkos::parallel_for("Fill end row pointers", range_policy(0, N),
5797  KOKKOS_LAMBDA(const size_t i){
5798  row_ptrs_end(i) = row_ptrs_beg(i) + num_row_entries(i);
5799  }
5800  );
5801 
5802  } else {
5803  // mfh If packed storage, don't need row_ptrs_end to be separate allocation;
5804  // could just have it alias row_ptrs_beg+1.
5805  // Case 2: Packed storage
5806  Kokkos::parallel_for("Fill end row pointers", range_policy(0, N),
5807  KOKKOS_LAMBDA(const size_t i){
5808  row_ptrs_end(i) = row_ptrs_beg(i+1);
5809  }
5810  );
5811  }
5812 
5813  if(this->isGloballyIndexed()) {
5814  indices_type indices("indices", this->k_gblInds1D_.extent(0));
5815  Kokkos::deep_copy(indices, this->k_gblInds1D_);
5816  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5817  padCrsArrays<row_ptrs_type,indices_type,padding_type>(row_ptrs_beg, row_ptrs_end, indices, padding);
5818  this->k_gblInds1D_ = indices;
5819  }
5820  else {
5821  local_indices_type indices("indices", this->k_lclInds1D_.extent(0));
5822  Kokkos::deep_copy(indices, this->k_lclInds1D_);
5823  using padding_type = Kokkos::UnorderedMap<LocalOrdinal, size_t, device_type>;
5824  padCrsArrays<row_ptrs_type,local_indices_type,padding_type>(row_ptrs_beg, row_ptrs_end, indices, padding);
5825  this->k_lclInds1D_ = indices;
5826  }
5827 
5828  if (refill_num_row_entries) {
5829  auto num_row_entries = this->k_numRowEntries_;
5830  Kokkos::parallel_for("Fill num entries", range_policy(0, N),
5831  KOKKOS_LAMBDA(const size_t i){
5832  num_row_entries(i) = row_ptrs_end(i) - row_ptrs_beg(i);
5833  }
5834  );
5835  }
5836  this->k_rowPtrs_ = row_ptrs_beg;
5837  }
5838 
5839  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5840  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5841  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5842  computeCrsPadding (const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5843  const size_t numSameIDs,
5844  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
5845  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs) const
5846  {
5847  using LO = LocalOrdinal;
5848  using padding_type = Kokkos::UnorderedMap<LO, size_t, device_type>;
5849  padding_type padding (numSameIDs + permuteFromLIDs.extent (0));
5850 
5851  computeCrsPaddingForSameIDs(padding, source, numSameIDs, false);
5852  computeCrsPaddingForPermutedIDs(padding, source, permuteToLIDs, permuteFromLIDs, false);
5853 
5854  Kokkos::fence (); // Make sure device sees changes made by host
5855  TEUCHOS_TEST_FOR_EXCEPTION
5856  (padding.failed_insert(), std::runtime_error,
5857  "failed to insert one or more indices in to padding map");
5858 
5859  return padding;
5860  }
5861 
5862  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5863  void
5864  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5865  computeCrsPaddingForSameIDs (Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>& padding,
5866  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5867  const size_t numSameIDs,
5868  const bool padAll) const
5869  {
5870  using LO = LocalOrdinal;
5871  using GO = GlobalOrdinal;
5872  const char tfecfFuncName[] = "computeCrsPaddingForSameIds: ";
5873 
5874  Kokkos::fence ();
5875 
5876  using insert_result =
5877  typename Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>::insert_result;
5878 
5879  // Compute extra capacity needed to accommodate incoming data
5880  const map_type& src_row_map = * (source.getRowMap ());
5881  for (LO tgt_lid = 0; tgt_lid < static_cast<LO> (numSameIDs); ++tgt_lid) {
5882  const GO src_gid = src_row_map.getGlobalElement(tgt_lid);
5883  auto num_src_entries = source.getNumEntriesInGlobalRow(src_gid);
5884 
5885  if (num_src_entries == 0)
5886  continue;
5887 
5888  insert_result result;
5889  const GO tgt_gid = rowMap_->getGlobalElement(tgt_lid);
5890  if (padAll) {
5891  result = padding.insert(tgt_lid, num_src_entries);
5892  }
5893  else {
5894  size_t check_row_length = 0;
5895  std::vector<GO> src_row_inds(num_src_entries);
5896  Teuchos::ArrayView<GO> src_row_inds_view(src_row_inds.data(), src_row_inds.size());
5897  source.getGlobalRowCopy(src_gid, src_row_inds_view, check_row_length);
5898 
5899  auto num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid);
5900  std::vector<GO> tgt_row_inds(num_tgt_entries);
5901  Teuchos::ArrayView<GO> tgt_row_inds_view(tgt_row_inds.data(), tgt_row_inds.size());
5902  this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, check_row_length);
5903 
5904  size_t how_much_padding = 0;
5905  for (auto src_row_ind : src_row_inds) {
5906  if (std::find(tgt_row_inds.begin(), tgt_row_inds.end(), src_row_ind) == tgt_row_inds.end()) {
5907  // The target row does not have space for
5908  how_much_padding++;
5909  }
5910  }
5911  result = padding.insert (tgt_lid, how_much_padding);
5912  }
5913 
5914  // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to fail even if
5915  // the user did nothing wrong. We should actually have a retry option. I
5916  // just copied this code over from computeCrsPadding.
5917  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5918  (result.failed(), std::runtime_error,
5919  "unable to insert padding for LID " << tgt_lid);
5920  }
5921  }
5922 
5923  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5924  void
5925  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5926  computeCrsPaddingForPermutedIDs (Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>& padding,
5927  const RowGraph<LocalOrdinal,GlobalOrdinal,Node>& source,
5928  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteToLIDs,
5929  const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& permuteFromLIDs,
5930  const bool padAll) const
5931  {
5932  using LO = LocalOrdinal;
5933  using GO = GlobalOrdinal;
5934  const char tfecfFuncName[] = "computeCrsPaddingForPermutedIds: ";
5935  Kokkos::fence ();
5936 
5937  const map_type& src_row_map = * (source.getRowMap ());
5938 
5939  using insert_result =
5940  typename Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>::insert_result;
5941  auto permuteToLIDs_h = permuteToLIDs.view_host ();
5942  auto permuteFromLIDs_h = permuteFromLIDs.view_host ();
5943  for (LO i = 0; i < static_cast<LO> (permuteToLIDs_h.extent (0)); ++i) {
5944  const GO src_gid = src_row_map.getGlobalElement(permuteFromLIDs_h[i]);
5945  auto num_src_entries = source.getNumEntriesInGlobalRow(src_gid);
5946 
5947  if (num_src_entries == 0)
5948  continue;
5949 
5950  insert_result result;
5951  const LO tgt_lid = permuteToLIDs_h[i];
5952  if (padAll)
5953  {
5954  result = padding.insert (tgt_lid, num_src_entries);
5955  }
5956  else {
5957  size_t check_row_length = 0;
5958  std::vector<GO> src_row_inds(num_src_entries);
5959  Teuchos::ArrayView<GO> src_row_inds_view(src_row_inds.data(), src_row_inds.size());
5960  source.getGlobalRowCopy(src_gid, src_row_inds_view, check_row_length);
5961 
5962  const GO tgt_gid = rowMap_->getGlobalElement (tgt_lid);
5963  auto num_tgt_entries = this->getNumEntriesInGlobalRow(tgt_gid);
5964  std::vector<GO> tgt_row_inds(num_tgt_entries);
5965  Teuchos::ArrayView<GO> tgt_row_inds_view(tgt_row_inds.data(), tgt_row_inds.size());
5966  this->getGlobalRowCopy(tgt_gid, tgt_row_inds_view, check_row_length);
5967 
5968  size_t how_much_padding = 0;
5969  for (auto src_row_ind : src_row_inds) {
5970  if (std::find(tgt_row_inds.begin(), tgt_row_inds.end(), src_row_ind) == tgt_row_inds.end()) {
5971  // The target row does not have space for
5972  how_much_padding++;
5973  }
5974  }
5975  result = padding.insert (tgt_lid, how_much_padding);
5976  }
5977  // FIXME (mfh 09 Apr 2019) Kokkos::UnorderedMap is allowed to
5978  // fail even if the user did nothing wrong. We should actually
5979  // have a retry option. I just copied this code over from
5980  // computeCrsPadding.
5981  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
5982  (result.failed(), std::runtime_error,
5983  "unable to insert padding for LID " << tgt_lid);
5984  }
5985 
5986  }
5987 
5988  template <class LocalOrdinal, class GlobalOrdinal, class Node>
5989  Kokkos::UnorderedMap<LocalOrdinal, size_t, typename Node::device_type>
5990  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
5991  computeCrsPadding (const Kokkos::DualView<const local_ordinal_type*,
5992  buffer_device_type>& importLIDs,
5993  Kokkos::DualView<size_t*, buffer_device_type> numPacketsPerLID) const
5994  {
5995  const char tfecfFuncName[] = "computeCrsPadding: ";
5996 
5997  // Creating padding for each new incoming index
5998  Kokkos::fence (); // Make sure device sees changes made by host
5999  using padding_type = Kokkos::UnorderedMap<local_ordinal_type, size_t, device_type>;
6000  padding_type padding (importLIDs.extent (0));
6001  auto numEnt = static_cast<size_t> (importLIDs.extent (0));
6002 
6003  auto importLIDs_h = importLIDs.view_host ();
6004  if (numPacketsPerLID.need_sync_host ()) {
6005  numPacketsPerLID.sync_host ();
6006  }
6007  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6008 
6009  // without unpacking the import/export buffer, we don't know how many of the
6010  // numPacketsPerLID[i] LIDs exist in the target. Below, it is assumed that
6011  // none do, and padding is requested for all.
6012  for (size_t i = 0; i < numEnt; ++i) {
6013  auto result = padding.insert (importLIDs_h[i], numPacketsPerLID_h[i]);
6014  // FIXME (mfh 09 Apr 2019) See note in other computeCrsPaddingoverload.
6015  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6016  (result.failed(), std::runtime_error,
6017  "unable to insert padding for LID " << importLIDs_h[i]);
6018  }
6019 
6020  TEUCHOS_TEST_FOR_EXCEPTION
6021  (padding.failed_insert(), std::runtime_error,
6022  "failed to insert one or more indices in to padding map");
6023  return padding;
6024  }
6025 
6026  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6027  void
6028  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6029 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
6030  packAndPrepareNew
6031 #else // TPETRA_ENABLE_DEPRECATED_CODE
6032  packAndPrepare
6033 #endif // TPETRA_ENABLE_DEPRECATED_CODE
6034  (const SrcDistObject& source,
6035  const Kokkos::DualView<const local_ordinal_type*,
6036  buffer_device_type>& exportLIDs,
6037  Kokkos::DualView<packet_type*,
6038  buffer_device_type>& exports,
6039  Kokkos::DualView<size_t*,
6040  buffer_device_type> numPacketsPerLID,
6041  size_t& constantNumPackets,
6042  Distributor& distor)
6043  {
6045  using GO = global_ordinal_type;
6046  using std::endl;
6047  using crs_graph_type =
6048  CrsGraph<local_ordinal_type, global_ordinal_type, node_type>;
6049  using row_graph_type =
6050  RowGraph<local_ordinal_type, global_ordinal_type, node_type>;
6051  const char tfecfFuncName[] = "packAndPrepare: ";
6052  ProfilingRegion region_papn ("Tpetra::CrsGraph::packAndPrepare");
6053 
6054  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6055  std::unique_ptr<std::string> prefix;
6056  if (debug) {
6057  std::ostringstream os;
6058  const int myRank = this->getMap ()->getComm ()->getRank ();
6059  os << "Proc " << myRank << ": Tpetra::CrsGraph::packAndPrepare: ";
6060  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6061  os << "Start" << endl;
6062  std::cerr << os.str ();
6063  }
6064 
6065  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6066  (exportLIDs.extent (0) != numPacketsPerLID.extent (0),
6067  std::runtime_error,
6068  "exportLIDs.extent(0) = " << exportLIDs.extent (0)
6069  << " != numPacketsPerLID.extent(0) = " << numPacketsPerLID.extent (0)
6070  << ".");
6071  const row_graph_type* srcRowGraphPtr =
6072  dynamic_cast<const row_graph_type*> (&source);
6073  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6074  (srcRowGraphPtr == nullptr, std::invalid_argument, "Source of an Export "
6075  "or Import operation to a CrsGraph must be a RowGraph with the same "
6076  "template parameters.");
6077  // We don't check whether src_graph has had fillComplete called,
6078  // because it doesn't matter whether the *source* graph has been
6079  // fillComplete'd. The target graph can not be fillComplete'd yet.
6080  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6081  (this->isFillComplete (), std::runtime_error,
6082  "The target graph of an Import or Export must not be fill complete.");
6083 
6084  const crs_graph_type* srcCrsGraphPtr =
6085  dynamic_cast<const crs_graph_type*> (&source);
6086 
6087  if (srcCrsGraphPtr == nullptr) {
6088  using Teuchos::ArrayView;
6089  using LO = local_ordinal_type;
6090 
6091  if (debug) {
6092  std::ostringstream os;
6093  os << *prefix << "Source is a RowGraph but not a CrsGraph" << endl;
6094  std::cerr << os.str ();
6095  }
6096  // RowGraph::pack serves the "old" DistObject interface. It
6097  // takes Teuchos::ArrayView and Teuchos::Array&. The latter
6098  // entails deep-copying the exports buffer on output. RowGraph
6099  // is a convenience interface when not a CrsGraph, so we accept
6100  // the performance hit.
6101  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6102  auto exportLIDs_h = exportLIDs.view_host ();
6103  ArrayView<const LO> exportLIDs_av (exportLIDs_h.data (),
6104  exportLIDs_h.extent (0));
6105  Teuchos::Array<GO> exports_a;
6106 
6107  numPacketsPerLID.clear_sync_state ();
6108  numPacketsPerLID.modify_host ();
6109  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6110  ArrayView<size_t> numPacketsPerLID_av (numPacketsPerLID_h.data (),
6111  numPacketsPerLID_h.extent (0));
6112  srcRowGraphPtr->pack (exportLIDs_av, exports_a, numPacketsPerLID_av,
6113  constantNumPackets, distor);
6114  const size_t newSize = static_cast<size_t> (exports_a.size ());
6115  if (static_cast<size_t> (exports.extent (0)) != newSize) {
6116  using exports_dv_type = Kokkos::DualView<packet_type*, buffer_device_type>;
6117  exports = exports_dv_type ("exports", newSize);
6118  }
6119  Kokkos::View<const packet_type*, Kokkos::HostSpace,
6120  Kokkos::MemoryUnmanaged> exports_a_h (exports_a.getRawPtr (), newSize);
6121  exports.clear_sync_state ();
6122  exports.modify_host ();
6123  Kokkos::deep_copy (exports.view_host (), exports_a_h);
6124  }
6125  // packCrsGraphNew requires a valid localGraph.
6126  else if (! getColMap ().is_null () &&
6127  (lclGraph_.row_map.extent (0) != 0 ||
6128  getRowMap ()->getNodeNumElements () == 0)) {
6129  if (debug) {
6130  std::ostringstream os;
6131  os << *prefix << "packCrsGraphNew path" << endl;
6132  std::cerr << os.str ();
6133  }
6134  using export_pids_type =
6135  Kokkos::DualView<const int*, buffer_device_type>;
6136  export_pids_type exportPIDs; // not filling it; needed for syntax
6137  using LO = local_ordinal_type;
6138  using NT = node_type;
6140  packCrsGraphNew<LO,GO,NT> (*srcCrsGraphPtr, exportLIDs, exportPIDs,
6141  exports, numPacketsPerLID,
6142  constantNumPackets, false, distor);
6143  }
6144  else {
6145  srcCrsGraphPtr->packFillActiveNew (exportLIDs, exports, numPacketsPerLID,
6146  constantNumPackets, distor);
6147  }
6148 
6149  if (debug) {
6150  std::ostringstream os;
6151  os << *prefix << "Done" << endl;
6152  std::cerr << os.str ();
6153  }
6154  }
6155 
6156  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6157  void
6159  pack (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
6160  Teuchos::Array<GlobalOrdinal>& exports,
6161  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6162  size_t& constantNumPackets,
6163  Distributor& distor) const
6164  {
6165  auto col_map = this->getColMap();
6166  // packCrsGraph requires a valid localGraph.
6167  if( !col_map.is_null() && (lclGraph_.row_map.extent(0) != 0 || getRowMap()->getNodeNumElements() ==0)) {
6169  packCrsGraph<LocalOrdinal,GlobalOrdinal,Node>(*this, exports, numPacketsPerLID,
6170  exportLIDs, constantNumPackets, distor);
6171  }
6172  else {
6173  this->packFillActive(exportLIDs, exports, numPacketsPerLID,
6174  constantNumPackets, distor);
6175  }
6176  }
6177 
6178  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6179  void
6181  packFillActive (const Teuchos::ArrayView<const LocalOrdinal>& exportLIDs,
6182  Teuchos::Array<GlobalOrdinal>& exports,
6183  const Teuchos::ArrayView<size_t>& numPacketsPerLID,
6184  size_t& constantNumPackets,
6185  Distributor& /* distor */) const
6186  {
6187  typedef LocalOrdinal LO;
6188  typedef GlobalOrdinal GO;
6189  typedef typename Kokkos::View<size_t*,
6190  device_type>::HostMirror::execution_space host_execution_space;
6191  typedef typename device_type::execution_space device_execution_space;
6192  const char tfecfFuncName[] = "packFillActive: ";
6193  const bool debug = ::Tpetra::Details::Behavior::debug("CrsGraph::pack");
6194  const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0;
6195 
6196  const auto numExportLIDs = exportLIDs.size ();
6197  if (debug) {
6198  std::ostringstream os;
6199  os << "Proc " << myRank << ": CrsGraph::pack: numExportLIDs = "
6200  << numExportLIDs << std::endl;
6201  std::cerr << os.str ();
6202  }
6203  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6204  (numExportLIDs != numPacketsPerLID.size (), std::runtime_error,
6205  "exportLIDs.size() = " << numExportLIDs << " != numPacketsPerLID.size()"
6206  " = " << numPacketsPerLID.size () << ".");
6207 
6208  // We may be accessing UVM data on host below, so ensure that the
6209  // device is done accessing it.
6210  device_execution_space().fence ();
6211 
6212  const map_type& rowMap = * (this->getRowMap ());
6213  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6214  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6215  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6216  "This graph claims to be locally indexed, but its column Map is nullptr. "
6217  "This should never happen. Please report this bug to the Tpetra "
6218  "developers.");
6219 
6220  // We may pack different amounts of data for different rows.
6221  constantNumPackets = 0;
6222 
6223  // mfh 20 Sep 2017: Teuchos::ArrayView isn't thread safe (well,
6224  // it might be now, but we might as well be safe).
6225  size_t* const numPacketsPerLID_raw = numPacketsPerLID.getRawPtr ();
6226  const LO* const exportLIDs_raw = exportLIDs.getRawPtr ();
6227 
6228  // Count the total number of packets (column indices, in the case
6229  // of a CrsGraph) to pack. While doing so, set
6230  // numPacketsPerLID[i] to the number of entries owned by the
6231  // calling process in (local) row exportLIDs[i] of the graph, that
6232  // the caller wants us to send out.
6233  Kokkos::RangePolicy<host_execution_space, LO> inputRange (0, numExportLIDs);
6234  size_t totalNumPackets = 0;
6235  size_t errCount = 0;
6236  // lambdas turn what they capture const, so we can't
6237  // atomic_add(&errCount,1). Instead, we need a View to modify.
6238  typedef Kokkos::Device<host_execution_space, Kokkos::HostSpace>
6239  host_device_type;
6240  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6241  constexpr size_t ONE = 1;
6242 
6243  Kokkos::parallel_reduce ("Tpetra::CrsGraph::pack: totalNumPackets",
6244  inputRange,
6245  [=] (const LO& i, size_t& curTotalNumPackets) {
6246  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6247  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6248  Kokkos::atomic_add (&errCountView(), ONE);
6249  numPacketsPerLID_raw[i] = 0;
6250  }
6251  else {
6252  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6253  numPacketsPerLID_raw[i] = numEnt;
6254  curTotalNumPackets += numEnt;
6255  }
6256  },
6257  totalNumPackets);
6258 
6259  if (debug) {
6260  std::ostringstream os;
6261  os << "Proc " << myRank << ": CrsGraph::pack: "
6262  << "totalNumPackets = " << totalNumPackets << std::endl;
6263  std::cerr << os.str ();
6264  }
6265  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6266  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6267  "one or more errors! errCount = " << errCount
6268  << ", totalNumPackets = " << totalNumPackets << ".");
6269  errCount = 0;
6270 
6271  // Allocate space for all the column indices to pack.
6272  exports.resize (totalNumPackets);
6273 
6274  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6275  (! this->supportsRowViews (), std::logic_error,
6276  "this->supportsRowViews() returns false; this should never happen. "
6277  "Please report this bug to the Tpetra developers.");
6278 
6279  // Loop again over the rows to export, and pack rows of indices
6280  // into the output buffer.
6281 
6282  if (debug) {
6283  std::ostringstream os;
6284  os << "Proc " << myRank << ": CrsGraph::pack: pack into exports" << std::endl;
6285  std::cerr << os.str ();
6286  }
6287 
6288  // Teuchos::ArrayView may not be thread safe, or may not be
6289  // efficiently thread safe. Better to use the raw pointer.
6290  GO* const exports_raw = exports.getRawPtr ();
6291  errCount = 0;
6292  Kokkos::parallel_scan ("Tpetra::CrsGraph::pack: pack from views",
6293  inputRange,
6294  [=] (const LO& i, size_t& exportsOffset, const bool final) {
6295  const size_t curOffset = exportsOffset;
6296  const GO gblRow = rowMap.getGlobalElement (exportLIDs_raw[i]);
6297  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6298 
6299  if (rowInfo.localRow == Tpetra::Details::OrdinalTraits<size_t>::invalid ()) {
6300  if (debug) {
6301  std::ostringstream os;
6302  os << "Proc " << myRank << ": INVALID rowInfo: "
6303  << "i = " << i << ", lclRow = " << exportLIDs_raw[i] << std::endl;
6304  std::cerr << os.str ();
6305  }
6306  Kokkos::atomic_add (&errCountView(), ONE);
6307  }
6308  else if (curOffset + rowInfo.numEntries > totalNumPackets) {
6309  if (debug) {
6310  std::ostringstream os;
6311  os << "Proc " << myRank << ": UH OH! For i=" << i << ", lclRow="
6312  << exportLIDs_raw[i] << ", gblRow=" << gblRow << ", curOffset "
6313  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6314  << ") > totalNumPackets (= " << totalNumPackets << ")."
6315  << std::endl;
6316  std::cerr << os.str ();
6317  }
6318  Kokkos::atomic_add (&errCountView(), ONE);
6319  }
6320  else {
6321  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6322  if (this->isLocallyIndexed ()) {
6323  const LO* lclColInds = nullptr;
6324  LO capacity = 0;
6325  const LO errCode =
6326  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6327  if (errCode == 0) {
6328  if (final) {
6329  for (LO k = 0; k < numEnt; ++k) {
6330  const LO lclColInd = lclColInds[k];
6331  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6332  // Pack it, even if it's wrong. Let the receiving
6333  // process deal with it. Otherwise, we'll miss out
6334  // on any correct data.
6335  exports_raw[curOffset + k] = gblColInd;
6336  } // for each entry in the row
6337  } // final pass?
6338  exportsOffset = curOffset + numEnt;
6339  }
6340  else { // error in getting local row view
6341  Kokkos::atomic_add (&errCountView(), ONE);
6342  }
6343  }
6344  else if (this->isGloballyIndexed ()) {
6345  const GO* gblColInds = nullptr;
6346  LO capacity = 0;
6347  const LO errCode =
6348  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6349  if (errCode == 0) {
6350  if (final) {
6351  for (LO k = 0; k < numEnt; ++k) {
6352  const GO gblColInd = gblColInds[k];
6353  // Pack it, even if it's wrong. Let the receiving
6354  // process deal with it. Otherwise, we'll miss out
6355  // on any correct data.
6356  exports_raw[curOffset + k] = gblColInd;
6357  } // for each entry in the row
6358  } // final pass?
6359  exportsOffset = curOffset + numEnt;
6360  }
6361  else { // error in getting global row view
6362  Kokkos::atomic_add (&errCountView(), ONE);
6363  }
6364  }
6365  // If neither globally nor locally indexed, then the graph
6366  // has no entries in this row (or indeed, in any row on this
6367  // process) to pack.
6368  }
6369  });
6370 
6371  // We may have accessed UVM data on host above, so ensure that the
6372  // device sees these changes.
6373  device_execution_space().fence ();
6374 
6375  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6376  (errCount != 0, std::logic_error, "Packing encountered "
6377  "one or more errors! errCount = " << errCount
6378  << ", totalNumPackets = " << totalNumPackets << ".");
6379  }
6380 
6381  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6382  void
6383  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6384  packFillActiveNew (const Kokkos::DualView<const local_ordinal_type*,
6385  buffer_device_type>& exportLIDs,
6386  Kokkos::DualView<packet_type*,
6387  buffer_device_type>& exports,
6388  Kokkos::DualView<size_t*,
6389  buffer_device_type> numPacketsPerLID,
6390  size_t& constantNumPackets,
6391  Distributor& distor) const
6392  {
6393  using std::endl;
6394  using LO = local_ordinal_type;
6395  using GO = global_ordinal_type;
6396  using host_execution_space = typename Kokkos::View<size_t*,
6397  device_type>::HostMirror::execution_space;
6398  using host_device_type =
6399  Kokkos::Device<host_execution_space, Kokkos::HostSpace>;
6400  using device_execution_space = typename device_type::execution_space;
6401  using exports_dv_type =
6402  Kokkos::DualView<packet_type*, buffer_device_type>;
6403  const char tfecfFuncName[] = "packFillActiveNew: ";
6404  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6405  const int myRank = debug ? this->getMap ()->getComm ()->getRank () : 0;
6406 
6407  std::unique_ptr<std::string> prefix;
6408  if (debug) {
6409  std::ostringstream os;
6410  os << "Proc " << myRank << ": Tpetra::CrsGraph::packFillActiveNew: ";
6411  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6412  os << "Start" << endl;
6413  std::cerr << os.str ();
6414  }
6415 
6416  const auto numExportLIDs = exportLIDs.extent (0);
6417  if (debug) {
6418  std::ostringstream os;
6419  os << *prefix << "numExportLIDs: " << numExportLIDs
6420  << ", numPacketsPerLID.extent(0): " << numPacketsPerLID.extent (0)
6421  << endl;
6422  std::cerr << os.str ();
6423  }
6424  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6425  (numExportLIDs != numPacketsPerLID.extent (0), std::runtime_error,
6426  "exportLIDs.extent(0) = " << numExportLIDs
6427  << " != numPacketsPerLID.extent(0) = "
6428  << numPacketsPerLID.extent (0) << ".");
6429  TEUCHOS_ASSERT( ! exportLIDs.need_sync_host () );
6430  auto exportLIDs_h = exportLIDs.view_host ();
6431 
6432  // We may be accessing UVM data on host below, so ensure that the
6433  // device is done accessing it.
6434  device_execution_space().fence ();
6435 
6436  const map_type& rowMap = * (this->getRowMap ());
6437  const map_type* const colMapPtr = this->colMap_.getRawPtr ();
6438  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6439  (this->isLocallyIndexed () && colMapPtr == nullptr, std::logic_error,
6440  "This graph claims to be locally indexed, but its column Map is nullptr. "
6441  "This should never happen. Please report this bug to the Tpetra "
6442  "developers.");
6443 
6444  // We may pack different amounts of data for different rows.
6445  constantNumPackets = 0;
6446 
6447  numPacketsPerLID.clear_sync_state ();
6448  numPacketsPerLID.modify_host ();
6449  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6450 
6451  // Count the total number of packets (column indices, in the case
6452  // of a CrsGraph) to pack. While doing so, set
6453  // numPacketsPerLID[i] to the number of entries owned by the
6454  // calling process in (local) row exportLIDs[i] of the graph, that
6455  // the caller wants us to send out.
6456  using range_type = Kokkos::RangePolicy<host_execution_space, LO>;
6457  range_type inputRange (0, numExportLIDs);
6458  size_t totalNumPackets = 0;
6459  size_t errCount = 0;
6460  // lambdas turn what they capture const, so we can't
6461  // atomic_add(&errCount,1). Instead, we need a View to modify.
6462  Kokkos::View<size_t, host_device_type> errCountView (&errCount);
6463  constexpr size_t ONE = 1;
6464 
6465  if (debug) {
6466  std::ostringstream os;
6467  os << *prefix << "Compute totalNumPackets" << endl;
6468  std::cerr << os.str ();
6469  }
6470 
6471  Kokkos::parallel_reduce
6472  ("Tpetra::CrsGraph::pack: totalNumPackets",
6473  inputRange,
6474  [=] (const LO i, size_t& curTotalNumPackets) {
6475  const LO lclRow = exportLIDs_h[i];
6476  const GO gblRow = rowMap.getGlobalElement (lclRow);
6477  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6478  if (debug) {
6479  std::ostringstream os;
6480  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6481  << lclRow << " not in row Map on this process" << endl;
6482  std::cerr << os.str ();
6483  }
6484  Kokkos::atomic_add (&errCountView(), ONE);
6485  numPacketsPerLID_h(i) = 0;
6486  }
6487  else {
6488  const size_t numEnt = this->getNumEntriesInGlobalRow (gblRow);
6489  numPacketsPerLID_h(i) = numEnt;
6490  curTotalNumPackets += numEnt;
6491  }
6492  },
6493  totalNumPackets);
6494 
6495  if (debug) {
6496  std::ostringstream os;
6497  os << *prefix << "totalNumPackets: " << totalNumPackets
6498  << ", errCount: " << errCount << endl;
6499  std::cerr << os.str ();
6500  }
6501  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6502  (errCount != 0, std::logic_error, "totalNumPackets count encountered "
6503  "one or more errors! totalNumPackets: " << totalNumPackets
6504  << ", errCount: " << errCount << ".");
6505 
6506  // Allocate space for all the column indices to pack.
6507  if (static_cast<size_t> (exports.extent (0)) < totalNumPackets) {
6508  // FIXME (mfh 09 Apr 2019) Create without initializing.
6509  exports = exports_dv_type ("exports", totalNumPackets);
6510  }
6511 
6512  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6513  (! this->supportsRowViews (), std::logic_error,
6514  "this->supportsRowViews() returns false; this should never happen. "
6515  "Please report this bug to the Tpetra developers.");
6516 
6517  // Loop again over the rows to export, and pack rows of indices
6518  // into the output buffer.
6519 
6520  if (debug) {
6521  std::ostringstream os;
6522  os << *prefix << "Pack into exports buffer" << endl;
6523  std::cerr << os.str ();
6524  }
6525 
6526  exports.clear_sync_state ();
6527  exports.modify_host ();
6528  auto exports_h = exports.view_host ();
6529 
6530  // The graph may store its data in UVM memory, so make sure that
6531  // any device kernels are done modifying the graph's data before
6532  // reading the data.
6533  device_execution_space().fence ();
6534 
6535  errCount = 0;
6536  Kokkos::parallel_scan
6537  ("Tpetra::CrsGraph::packFillActiveNew: Pack exports",
6538  inputRange,
6539  [=] (const LO i, size_t& exportsOffset, const bool final) {
6540  const size_t curOffset = exportsOffset;
6541  const LO lclRow = exportLIDs_h(i);
6542  const GO gblRow = rowMap.getGlobalElement (lclRow);
6543  if (gblRow == Details::OrdinalTraits<GO>::invalid ()) {
6544  if (debug) {
6545  std::ostringstream os;
6546  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6547  << lclRow << " not in row Map on this process" << endl;
6548  std::cerr << os.str ();
6549  }
6550  Kokkos::atomic_add (&errCountView(), ONE);
6551  return;
6552  }
6553 
6554  const RowInfo rowInfo = this->getRowInfoFromGlobalRowIndex (gblRow);
6555  if (rowInfo.localRow == Details::OrdinalTraits<size_t>::invalid ()) {
6556  if (debug) {
6557  std::ostringstream os;
6558  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6559  << lclRow << ", gblRow=" << gblRow << ": invalid rowInfo"
6560  << endl;
6561  std::cerr << os.str ();
6562  }
6563  Kokkos::atomic_add (&errCountView(), ONE);
6564  return;
6565  }
6566 
6567  if (curOffset + rowInfo.numEntries > totalNumPackets) {
6568  if (debug) {
6569  std::ostringstream os;
6570  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6571  << lclRow << ", gblRow=" << gblRow << ", curOffset "
6572  "(= " << curOffset << ") + numEnt (= " << rowInfo.numEntries
6573  << ") > totalNumPackets (= " << totalNumPackets << ")."
6574  << endl;
6575  std::cerr << os.str ();
6576  }
6577  Kokkos::atomic_add (&errCountView(), ONE);
6578  return;
6579  }
6580 
6581  const LO numEnt = static_cast<LO> (rowInfo.numEntries);
6582  if (this->isLocallyIndexed ()) {
6583  const LO* lclColInds = nullptr;
6584  LO capacity = 0;
6585  const LO errCode =
6586  this->getLocalViewRawConst (lclColInds, capacity, rowInfo);
6587  if (errCode == 0) {
6588  if (final) {
6589  for (LO k = 0; k < numEnt; ++k) {
6590  const LO lclColInd = lclColInds[k];
6591  const GO gblColInd = colMapPtr->getGlobalElement (lclColInd);
6592  // Pack it, even if it's wrong. Let the receiving
6593  // process deal with it. Otherwise, we'll miss out
6594  // on any correct data.
6595  exports_h(curOffset + k) = gblColInd;
6596  } // for each entry in the row
6597  } // final pass?
6598  exportsOffset = curOffset + numEnt;
6599  }
6600  else { // error in getting local row view
6601  if (debug) {
6602  std::ostringstream os;
6603  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6604  << lclRow << ", gblRow=" << gblRow << ": "
6605  "getLocalViewRawConst returned nonzero error code "
6606  << errCode << endl;
6607  std::cerr << os.str ();
6608  }
6609  Kokkos::atomic_add (&errCountView(), ONE);
6610  }
6611  }
6612  else if (this->isGloballyIndexed ()) {
6613  const GO* gblColInds = nullptr;
6614  LO capacity = 0;
6615  const LO errCode =
6616  this->getGlobalViewRawConst (gblColInds, capacity, rowInfo);
6617  if (errCode == 0) {
6618  if (final) {
6619  for (LO k = 0; k < numEnt; ++k) {
6620  const GO gblColInd = gblColInds[k];
6621  // Pack it, even if it's wrong. Let the receiving
6622  // process deal with it. Otherwise, we'll miss out
6623  // on any correct data.
6624  exports_h(curOffset + k) = gblColInd;
6625  } // for each entry in the row
6626  } // final pass?
6627  exportsOffset = curOffset + numEnt;
6628  }
6629  else { // error in getting global row view
6630  if (debug) {
6631  std::ostringstream os;
6632  os << "Proc " << myRank << ": For i=" << i << ", lclRow="
6633  << lclRow << ", gblRow=" << gblRow << ": "
6634  "getGlobalViewRawConst returned nonzero error code "
6635  << errCode << endl;
6636  std::cerr << os.str ();
6637  }
6638  Kokkos::atomic_add (&errCountView(), ONE);
6639  }
6640  }
6641  // If neither globally nor locally indexed, then the graph
6642  // has no entries in this row (or indeed, in any row on this
6643  // process) to pack.
6644  });
6645 
6646  // TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6647  // (errCount != 0, std::logic_error, "Packing encountered "
6648  // "one or more errors! errCount = " << errCount
6649  // << ", totalNumPackets = " << totalNumPackets << ".");
6650 
6651  if (debug) {
6652  std::ostringstream os;
6653  os << *prefix << "errCount = " << errCount << "; Done" << endl;
6654  std::cerr << os.str ();
6655  }
6656  }
6657 
6658  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6659  void
6660  CrsGraph<LocalOrdinal, GlobalOrdinal, Node>::
6661 #ifdef TPETRA_ENABLE_DEPRECATED_CODE
6662  unpackAndCombineNew
6663 #else // TPETRA_ENABLE_DEPRECATED_CODE
6665 #endif // TPETRA_ENABLE_DEPRECATED_CODE
6666  (const Kokkos::DualView<const local_ordinal_type*,
6667  buffer_device_type>& importLIDs,
6668  Kokkos::DualView<packet_type*,
6669  buffer_device_type> imports,
6670  Kokkos::DualView<size_t*,
6671  buffer_device_type> numPacketsPerLID,
6672  const size_t /* constantNumPackets */,
6673  Distributor& /* distor */,
6674  const CombineMode /* combineMode */ )
6675  {
6676  using std::endl;
6677  using LO = local_ordinal_type;
6678  using GO = global_ordinal_type;
6679  const char tfecfFuncName[] = "unpackAndCombine: ";
6680  const bool debug = ::Tpetra::Details::Behavior::debug ("CrsGraph");
6681 
6682  std::unique_ptr<std::string> prefix;
6683  if (debug) {
6684  std::ostringstream os;
6685  const int myRank = this->getMap ()->getComm ()->getRank ();
6686  os << "Proc " << myRank << ": Tpetra::CrsGraph::unpackAndCombine: ";
6687  prefix = std::unique_ptr<std::string> (new std::string (os.str ()));
6688  os << endl;
6689  std::cerr << os.str ();
6690  }
6691 
6692  if (this->getProfileType () == StaticProfile) {
6693  auto padding = computeCrsPadding (importLIDs, numPacketsPerLID);
6694  applyCrsPadding(padding);
6695  }
6696  // FIXME (mfh 02 Apr 2012) REPLACE combine mode has a perfectly
6697  // reasonable meaning, whether or not the matrix is fill complete.
6698  // It's just more work to implement.
6699 
6700  // We are not checking the value of the CombineMode input
6701  // argument. For CrsGraph, we only support import/export
6702  // operations if fillComplete has not yet been called. Any
6703  // incoming column-indices are inserted into the target graph. In
6704  // this context, CombineMode values of ADD vs INSERT are
6705  // equivalent. What is the meaning of REPLACE for CrsGraph? If a
6706  // duplicate column-index is inserted, it will be compressed out
6707  // when fillComplete is called.
6708  //
6709  // Note: I think REPLACE means that an existing row is replaced by
6710  // the imported row, i.e., the existing indices are cleared. CGB,
6711  // 6/17/2010
6712 
6713  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6714  (importLIDs.extent (0) != numPacketsPerLID.extent (0),
6715  std::runtime_error, "importLIDs.extent(0) = "
6716  << importLIDs.extent (0) << " != numPacketsPerLID.extent(0) = "
6717  << numPacketsPerLID.extent (0) << ".");
6718  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6719  (isFillComplete (), std::runtime_error,
6720  "Import or Export operations are not allowed on the destination "
6721  "CrsGraph if it is fill complete.");
6722 
6723  const size_t numImportLIDs = static_cast<size_t> (importLIDs.extent (0));
6724  if (numPacketsPerLID.need_sync_host ()) {
6725  numPacketsPerLID.sync_host ();
6726  }
6727  auto numPacketsPerLID_h = numPacketsPerLID.view_host ();
6728 
6729  // If we're inserting in local indices, let's pre-allocate
6730  Teuchos::Array<LO> lclColInds;
6731  if (this->isLocallyIndexed ()) {
6732  size_t maxNumInserts = 0;
6733  for (size_t i = 0; i < numImportLIDs; ++i) {
6734  maxNumInserts = std::max (maxNumInserts, numPacketsPerLID_h[i]);
6735  }
6736  lclColInds.resize (maxNumInserts);
6737  }
6738 
6739  auto importLIDs_h = importLIDs.view_host ();
6740  if (imports.need_sync_host ()) {
6741  imports.sync_host ();
6742  }
6743  auto imports_h = imports.view_host ();
6744 
6745  const map_type& rowMap = * (this->rowMap_);
6746  size_t importsOffset = 0;
6747  for (size_t i = 0; i < numImportLIDs; ++i) {
6748  const LO lclRow = importLIDs_h[i];
6749  const GO gblRow = rowMap.getGlobalElement (lclRow);
6750  const LO numEnt = numPacketsPerLID_h[i];
6751  const GO* const gblColInds = (numEnt == 0) ? nullptr :
6752  &imports_h[importsOffset];
6753  if (! this->isLocallyIndexed ()) {
6754  if (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid ()) {
6755  // This row is not in the row Map on the calling process.
6756  this->insertGlobalIndicesIntoNonownedRows (gblRow, gblColInds, numEnt);
6757  }
6758  else {
6759  this->insertGlobalIndicesFiltered (lclRow, gblColInds, numEnt);
6760  }
6761  }
6762  else {
6763  for (LO j = 0; j < numEnt; j++) {
6764  lclColInds[j] = this->colMap_->getLocalElement (gblColInds[j]);
6765  }
6766  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6767  (gblRow == Tpetra::Details::OrdinalTraits<GO>::invalid (),
6768  std::runtime_error,
6769  "cannot insert into unowned rows if isLocallyIndexed().");
6770  this->insertLocalIndices (lclRow, numEnt, lclColInds.data ());
6771  }
6772  importsOffset += numEnt;
6773  }
6774 
6775 
6776  if (debug) {
6777  std::ostringstream os;
6778  os << *prefix << "Done" << endl;
6779  std::cerr << os.str ();
6780  }
6781  }
6782 
6783  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6784  void
6786  removeEmptyProcessesInPlace (const Teuchos::RCP<const map_type>& newMap)
6787  {
6788  using Teuchos::Comm;
6789  using Teuchos::null;
6790  using Teuchos::ParameterList;
6791  using Teuchos::RCP;
6792 
6793  // We'll set all the state "transactionally," so that this method
6794  // satisfies the strong exception guarantee. This object's state
6795  // won't be modified until the end of this method.
6796  RCP<const map_type> rowMap, domainMap, rangeMap, colMap;
6797  RCP<import_type> importer;
6798  RCP<export_type> exporter;
6799 
6800  rowMap = newMap;
6801  RCP<const Comm<int> > newComm =
6802  (newMap.is_null ()) ? null : newMap->getComm ();
6803 
6804  if (! domainMap_.is_null ()) {
6805  if (domainMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6806  // Common case: original domain and row Maps are identical.
6807  // In that case, we need only replace the original domain Map
6808  // with the new Map. This ensures that the new domain and row
6809  // Maps _stay_ identical.
6810  domainMap = newMap;
6811  } else {
6812  domainMap = domainMap_->replaceCommWithSubset (newComm);
6813  }
6814  }
6815  if (! rangeMap_.is_null ()) {
6816  if (rangeMap_.getRawPtr () == rowMap_.getRawPtr ()) {
6817  // Common case: original range and row Maps are identical. In
6818  // that case, we need only replace the original range Map with
6819  // the new Map. This ensures that the new range and row Maps
6820  // _stay_ identical.
6821  rangeMap = newMap;
6822  } else {
6823  rangeMap = rangeMap_->replaceCommWithSubset (newComm);
6824  }
6825  }
6826  if (! colMap.is_null ()) {
6827  colMap = colMap_->replaceCommWithSubset (newComm);
6828  }
6829 
6830  // (Re)create the Export and / or Import if necessary.
6831  if (! newComm.is_null ()) {
6832  RCP<ParameterList> params = this->getNonconstParameterList (); // could be null
6833  //
6834  // The operations below are collective on the new communicator.
6835  //
6836  // (Re)create the Export object if necessary. If I haven't
6837  // called fillComplete yet, I don't have a rangeMap, so I must
6838  // first check if the _original_ rangeMap is not null. Ditto
6839  // for the Import object and the domain Map.
6840  if (! rangeMap_.is_null () &&
6841  rangeMap != rowMap &&
6842  ! rangeMap->isSameAs (*rowMap)) {
6843  if (params.is_null () || ! params->isSublist ("Export")) {
6844  exporter = rcp (new export_type (rowMap, rangeMap));
6845  }
6846  else {
6847  RCP<ParameterList> exportSublist = sublist (params, "Export", true);
6848  exporter = rcp (new export_type (rowMap, rangeMap, exportSublist));
6849  }
6850  }
6851  // (Re)create the Import object if necessary.
6852  if (! domainMap_.is_null () &&
6853  domainMap != colMap &&
6854  ! domainMap->isSameAs (*colMap)) {
6855  if (params.is_null () || ! params->isSublist ("Import")) {
6856  importer = rcp (new import_type (domainMap, colMap));
6857  } else {
6858  RCP<ParameterList> importSublist = sublist (params, "Import", true);
6859  importer = rcp (new import_type (domainMap, colMap, importSublist));
6860  }
6861  }
6862  } // if newComm is not null
6863 
6864  // Defer side effects until the end. If no destructors throw
6865  // exceptions (they shouldn't anyway), then this method satisfies
6866  // the strong exception guarantee.
6867  exporter_ = exporter;
6868  importer_ = importer;
6869  rowMap_ = rowMap;
6870  // mfh 31 Mar 2013: DistObject's map_ is the row Map of a CrsGraph
6871  // or CrsMatrix. CrsGraph keeps a redundant pointer (rowMap_) to
6872  // the same object. We might want to get rid of this redundant
6873  // pointer sometime, but for now, we'll leave it alone and just
6874  // set map_ to the same object.
6875  this->map_ = rowMap;
6876  domainMap_ = domainMap;
6877  rangeMap_ = rangeMap;
6878  colMap_ = colMap;
6879  }
6880 
6881  template <class LocalOrdinal, class GlobalOrdinal, class Node>
6882  void
6884  getLocalDiagOffsets (const Kokkos::View<size_t*, device_type, Kokkos::MemoryUnmanaged>& offsets) const
6885  {
6886  typedef LocalOrdinal LO;
6887  typedef GlobalOrdinal GO;
6888  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
6889  const bool debug = ::Tpetra::Details::Behavior::debug ();
6890 
6891  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6892  (! hasColMap (), std::runtime_error, "The graph must have a column Map.");
6893  const LO lclNumRows = static_cast<LO> (this->getNodeNumRows ());
6894  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
6895  (static_cast<LO> (offsets.extent (0)) < lclNumRows,
6896  std::invalid_argument, "offsets.extent(0) = " <<
6897  offsets.extent (0) << " < getNodeNumRows() = " << lclNumRows << ".");
6898 
6899  const map_type& rowMap = * (this->getRowMap ());
6900  const map_type& colMap = * (this->getColMap ());
6901 
6902  // We only use these in debug mode, but since debug mode is a
6903  // run-time option, they need to exist here. That's why we create
6904  // the vector with explicit size zero, to avoid overhead if debug
6905  // mode is off.
6906  bool allRowMapDiagEntriesInColMap = true;
6907  bool allDiagEntriesFound = true;
6908  bool allOffsetsCorrect = true;
6909  bool noOtherWeirdness = true;
6910  using wrong_offsets_type = std::vector<std::pair<LO, size_t> >;
6911  wrong_offsets_type wrongOffsets (0);
6912 
6913  // mfh 12 Mar 2016: LocalMap works on (CUDA) device. It has just
6914  // the subset of Map functionality that we need below.
6915  auto lclRowMap = rowMap.getLocalMap ();
6916  auto lclColMap = colMap.getLocalMap ();
6917 
6918  // FIXME (mfh 16 Dec 2015) It's easy to thread-parallelize this
6919  // setup, at least on the host. For CUDA, we have to use LocalMap
6920  // (that comes from each of the two Maps).
6921 
6922  const bool sorted = this->isSorted ();
6923  if (isFillComplete ()) {
6924  auto lclGraph = this->getLocalGraph ();
6925  ::Tpetra::Details::getGraphDiagOffsets (offsets, lclRowMap, lclColMap,
6926  lclGraph.row_map,
6927  lclGraph.entries, sorted);
6928  }
6929  else {
6930  // NOTE (mfh 22 Feb 2017): We have to run this code on host,
6931  // since the graph is not fill complete. The previous version
6932  // of this code assumed UVM; this version does not.
6933  auto offsets_h = Kokkos::create_mirror_view (offsets);
6934 
6935  for (LO lclRowInd = 0; lclRowInd < lclNumRows; ++lclRowInd) {
6936  // Find the diagonal entry. Since the row Map and column Map
6937  // may differ, we have to compare global row and column
6938  // indices, not local.
6939  const GO gblRowInd = lclRowMap.getGlobalElement (lclRowInd);
6940  const GO gblColInd = gblRowInd;
6941  const LO lclColInd = lclColMap.getLocalElement (gblColInd);
6942 
6943  if (lclColInd == Tpetra::Details::OrdinalTraits<LO>::invalid ()) {
6944  allRowMapDiagEntriesInColMap = false;
6945  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6946  }
6947  else {
6948  const RowInfo rowInfo = this->getRowInfo (lclRowInd);
6949  if (static_cast<LO> (rowInfo.localRow) == lclRowInd &&
6950  rowInfo.numEntries > 0) {
6951 
6952  auto colInds = this->getLocalKokkosRowView (rowInfo);
6953  const size_t hint = 0; // not needed for this algorithm
6954  const size_t offset =
6955  KokkosSparse::findRelOffset (colInds, rowInfo.numEntries,
6956  lclColInd, hint, sorted);
6957  offsets_h(lclRowInd) = offset;
6958 
6959  if (debug) {
6960  // Now that we have what we think is an offset, make sure
6961  // that it really does point to the diagonal entry. Offsets
6962  // are _relative_ to each row, not absolute (for the whole
6963  // (local) graph).
6964  Teuchos::ArrayView<const LO> lclColInds;
6965  try {
6966  this->getLocalRowView (lclRowInd, lclColInds);
6967  }
6968  catch (...) {
6969  noOtherWeirdness = false;
6970  }
6971  // Don't continue with error checking if the above failed.
6972  if (noOtherWeirdness) {
6973  const size_t numEnt = lclColInds.size ();
6974  if (offset >= numEnt) {
6975  // Offsets are relative to each row, so this means that
6976  // the offset is out of bounds.
6977  allOffsetsCorrect = false;
6978  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6979  } else {
6980  const LO actualLclColInd = lclColInds[offset];
6981  const GO actualGblColInd = lclColMap.getGlobalElement (actualLclColInd);
6982  if (actualGblColInd != gblColInd) {
6983  allOffsetsCorrect = false;
6984  wrongOffsets.push_back (std::make_pair (lclRowInd, offset));
6985  }
6986  }
6987  }
6988  } // debug
6989  }
6990  else { // either row is empty, or something went wrong w/ getRowInfo()
6991  offsets_h(lclRowInd) = Tpetra::Details::OrdinalTraits<size_t>::invalid ();
6992  allDiagEntriesFound = false;
6993  }
6994  } // whether lclColInd is a valid local column index
6995  } // for each local row
6996 
6997  Kokkos::deep_copy (offsets, offsets_h);
6998  } // whether the graph is fill complete
6999 
7000  if (debug) {
7001  if (wrongOffsets.size () != 0) {
7002  std::ostringstream os;
7003  os << "Proc " << this->getComm ()->getRank () << ": Wrong offsets: [";
7004  for (size_t k = 0; k < wrongOffsets.size (); ++k) {
7005  os << "(" << wrongOffsets[k].first << ","
7006  << wrongOffsets[k].second << ")";
7007  if (k + 1 < wrongOffsets.size ()) {
7008  os << ", ";
7009  }
7010  }
7011  os << "]" << std::endl;
7012  std::cerr << os.str ();
7013  }
7014  } // debug
7015 
7016  if (debug) {
7017  using Teuchos::reduceAll;
7018  using std::endl;
7019  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
7020  const bool localSuccess =
7021  allRowMapDiagEntriesInColMap && allDiagEntriesFound && allOffsetsCorrect;
7022  const int numResults = 5;
7023  int lclResults[5];
7024  lclResults[0] = allRowMapDiagEntriesInColMap ? 1 : 0;
7025  lclResults[1] = allDiagEntriesFound ? 1 : 0;
7026  lclResults[2] = allOffsetsCorrect ? 1 : 0;
7027  lclResults[3] = noOtherWeirdness ? 1 : 0;
7028  // min-all-reduce will compute least rank of all the processes
7029  // that didn't succeed.
7030  lclResults[4] = ! localSuccess ? comm->getRank () : comm->getSize ();
7031 
7032  int gblResults[5];
7033  gblResults[0] = 0;
7034  gblResults[1] = 0;
7035  gblResults[2] = 0;
7036  gblResults[3] = 0;
7037  gblResults[4] = 0;
7038  reduceAll<int, int> (*comm, Teuchos::REDUCE_MIN,
7039  numResults, lclResults, gblResults);
7040 
7041  if (gblResults[0] != 1 || gblResults[1] != 1 || gblResults[2] != 1
7042  || gblResults[3] != 1) {
7043  std::ostringstream os; // build error message
7044  os << "Issue(s) that we noticed (on Process " << gblResults[4] << ", "
7045  "possibly among others): " << endl;
7046  if (gblResults[0] == 0) {
7047  os << " - The column Map does not contain at least one diagonal entry "
7048  "of the graph." << endl;
7049  }
7050  if (gblResults[1] == 0) {
7051  os << " - On one or more processes, some row does not contain a "
7052  "diagonal entry." << endl;
7053  }
7054  if (gblResults[2] == 0) {
7055  os << " - On one or more processes, some offsets are incorrect."
7056  << endl;
7057  }
7058  if (gblResults[3] == 0) {
7059  os << " - One or more processes had some other error."
7060  << endl;
7061  }
7062  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::runtime_error, os.str());
7063  }
7064  } // debug
7065  }
7066 
7067  namespace { // (anonymous)
7068 
7069  // mfh 21 Jan 2016: This is useful for getLocalDiagOffsets (see
7070  // below). The point is to avoid the deep copy between the input
7071  // Teuchos::ArrayRCP and the internally used Kokkos::View. We
7072  // can't use UVM to avoid the deep copy with CUDA, because the
7073  // ArrayRCP is a host pointer, while the input to the graph's
7074  // getLocalDiagOffsets method is a device pointer. Assigning a
7075  // host pointer to a device pointer is incorrect unless the host
7076  // pointer points to host pinned memory. The goal is to get rid
7077  // of the Teuchos::ArrayRCP overload anyway, so we accept the deep
7078  // copy for backwards compatibility.
7079  //
7080  // We have to use template magic because
7081  // "staticGraph_->getLocalDiagOffsets(offsetsHosts)" won't compile
7082  // if device_type::memory_space is not Kokkos::HostSpace (as is
7083  // the case with CUDA).
7084 
7085  template<class DeviceType,
7086  const bool memSpaceIsHostSpace =
7087  std::is_same<typename DeviceType::memory_space,
7088  Kokkos::HostSpace>::value>
7089  struct HelpGetLocalDiagOffsets {};
7090 
7091  template<class DeviceType>
7092  struct HelpGetLocalDiagOffsets<DeviceType, true> {
7093  typedef DeviceType device_type;
7094  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7095  Kokkos::MemoryUnmanaged> device_offsets_type;
7096  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7097  Kokkos::MemoryUnmanaged> host_offsets_type;
7098 
7099  static device_offsets_type
7100  getDeviceOffsets (const host_offsets_type& hostOffsets)
7101  {
7102  // Host and device are the same; no need to allocate a
7103  // temporary device View.
7104  return hostOffsets;
7105  }
7106 
7107  static void
7108  copyBackIfNeeded (const host_offsets_type& /* hostOffsets */,
7109  const device_offsets_type& /* deviceOffsets */)
7110  { /* copy back not needed; host and device are the same */ }
7111  };
7112 
7113  template<class DeviceType>
7114  struct HelpGetLocalDiagOffsets<DeviceType, false> {
7115  typedef DeviceType device_type;
7116  // We have to do a deep copy, since host memory space != device
7117  // memory space. Thus, the device View is managed (we need to
7118  // allocate a temporary device View).
7119  typedef Kokkos::View<size_t*, device_type> device_offsets_type;
7120  typedef Kokkos::View<size_t*, Kokkos::HostSpace,
7121  Kokkos::MemoryUnmanaged> host_offsets_type;
7122 
7123  static device_offsets_type
7124  getDeviceOffsets (const host_offsets_type& hostOffsets)
7125  {
7126  // Host memory space != device memory space, so we must
7127  // allocate a temporary device View for the graph.
7128  return device_offsets_type ("offsets", hostOffsets.extent (0));
7129  }
7130 
7131  static void
7132  copyBackIfNeeded (const host_offsets_type& hostOffsets,
7133  const device_offsets_type& deviceOffsets)
7134  {
7135  Kokkos::deep_copy (hostOffsets, deviceOffsets);
7136  }
7137  };
7138  } // namespace (anonymous)
7139 
7140 
7141  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7142  void
7144  getLocalDiagOffsets (Teuchos::ArrayRCP<size_t>& offsets) const
7145  {
7146  typedef LocalOrdinal LO;
7147  const char tfecfFuncName[] = "getLocalDiagOffsets: ";
7148  TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC
7149  (! this->hasColMap (), std::runtime_error,
7150  "The graph does not yet have a column Map.");
7151  const LO myNumRows = static_cast<LO> (this->getNodeNumRows ());
7152  if (static_cast<LO> (offsets.size ()) != myNumRows) {
7153  // NOTE (mfh 21 Jan 2016) This means that the method does not
7154  // satisfy the strong exception guarantee (no side effects
7155  // unless successful).
7156  offsets.resize (myNumRows);
7157  }
7158 
7159  // mfh 21 Jan 2016: This method unfortunately takes a
7160  // Teuchos::ArrayRCP, which is host memory. The graph wants a
7161  // device pointer. We can't access host memory from the device;
7162  // that's the wrong direction for UVM. (It's the right direction
7163  // for inefficient host pinned memory, but we don't want to use
7164  // that here.) Thus, if device memory space != host memory space,
7165  // we allocate and use a temporary device View to get the offsets.
7166  // If the two spaces are equal, the template magic makes the deep
7167  // copy go away.
7168  typedef HelpGetLocalDiagOffsets<device_type> helper_type;
7169  typedef typename helper_type::host_offsets_type host_offsets_type;
7170  // Unmanaged host View that views the output array.
7171  host_offsets_type hostOffsets (offsets.getRawPtr (), myNumRows);
7172  // Allocate temp device View if host != device, else reuse host array.
7173  auto deviceOffsets = helper_type::getDeviceOffsets (hostOffsets);
7174  // NOT recursion; this calls the overload that takes a device View.
7175  this->getLocalDiagOffsets (deviceOffsets);
7176  helper_type::copyBackIfNeeded (hostOffsets, deviceOffsets);
7177  }
7178 
7179  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7180  bool
7182  supportsRowViews () const {
7183  return true;
7184  }
7185 
7186  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7187  void
7190  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& rowTransfer,
7191  const Teuchos::RCP<const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node> > & domainTransfer,
7192  const Teuchos::RCP<const map_type>& domainMap,
7193  const Teuchos::RCP<const map_type>& rangeMap,
7194  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7195  {
7200  using Teuchos::ArrayRCP;
7201  using Teuchos::ArrayView;
7202  using Teuchos::Comm;
7203  using Teuchos::ParameterList;
7204  using Teuchos::rcp;
7205  using Teuchos::RCP;
7206 #ifdef HAVE_TPETRA_MMM_TIMINGS
7207  using std::string;
7208  using Teuchos::TimeMonitor;
7209 #endif
7210 
7211  using LO = LocalOrdinal;
7212  using GO = GlobalOrdinal;
7213  using NT = node_type;
7214  using this_type = CrsGraph<LO, GO, NT>;
7215  using ivector_type = Vector<int, LO, GO, NT>;
7216  using packet_type = typename this_type::packet_type;
7217 
7218  const char* prefix = "Tpetra::CrsGraph::transferAndFillComplete: ";
7219 
7220 #ifdef HAVE_TPETRA_MMM_TIMINGS
7221  string label;
7222  if(!params.is_null()) label = params->get("Timer Label", label);
7223  string prefix2 = string("Tpetra ")+ label + std::string(": CrsGraph TAFC ");
7224  RCP<TimeMonitor> MM =
7225  rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Pack-1"))));
7226 #endif
7227 
7228  // Make sure that the input argument rowTransfer is either an
7229  // Import or an Export. Import and Export are the only two
7230  // subclasses of Transfer that we defined, but users might
7231  // (unwisely, for now at least) decide to implement their own
7232  // subclasses. Exclude this possibility.
7233  const import_type* xferAsImport = dynamic_cast<const import_type*>(&rowTransfer);
7234  const export_type* xferAsExport = dynamic_cast<const export_type*>(&rowTransfer);
7235  TEUCHOS_TEST_FOR_EXCEPTION(
7236  xferAsImport == nullptr && xferAsExport == nullptr, std::invalid_argument,
7237  prefix << "The 'rowTransfer' input argument must be either an Import or "
7238  "an Export, and its template parameters must match the corresponding "
7239  "template parameters of the CrsGraph.");
7240 
7241  // Make sure that the input argument domainTransfer is either an
7242  // Import or an Export. Import and Export are the only two
7243  // subclasses of Transfer that we defined, but users might
7244  // (unwisely, for now at least) decide to implement their own
7245  // subclasses. Exclude this possibility.
7246  Teuchos::RCP<const import_type> xferDomainAsImport =
7247  Teuchos::rcp_dynamic_cast<const import_type>(domainTransfer);
7248  Teuchos::RCP<const export_type> xferDomainAsExport =
7249  Teuchos::rcp_dynamic_cast<const export_type>(domainTransfer);
7250 
7251  if(! domainTransfer.is_null()) {
7252 
7253  TEUCHOS_TEST_FOR_EXCEPTION(
7254  (xferDomainAsImport.is_null() && xferDomainAsExport.is_null()), std::invalid_argument,
7255  prefix << "The 'domainTransfer' input argument must be either an "
7256  "Import or an Export, and its template parameters must match the "
7257  "corresponding template parameters of the CrsGraph.");
7258 
7259  TEUCHOS_TEST_FOR_EXCEPTION(
7260  ( xferAsImport != nullptr || ! xferDomainAsImport.is_null() ) &&
7261  (( xferAsImport != nullptr && xferDomainAsImport.is_null() ) ||
7262  ( xferAsImport == nullptr && ! xferDomainAsImport.is_null() )), std::invalid_argument,
7263  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7264  "must be of the same type (either Import or Export).");
7265 
7266  TEUCHOS_TEST_FOR_EXCEPTION(
7267  ( xferAsExport != nullptr || ! xferDomainAsExport.is_null() ) &&
7268  (( xferAsExport != nullptr && xferDomainAsExport.is_null() ) ||
7269  ( xferAsExport == nullptr && ! xferDomainAsExport.is_null() )), std::invalid_argument,
7270  prefix << "The 'rowTransfer' and 'domainTransfer' input arguments "
7271  "must be of the same type (either Import or Export).");
7272 
7273  } // domainTransfer != null
7274 
7275 
7276  // FIXME (mfh 15 May 2014) Wouldn't communication still be needed,
7277  // if the source Map is not distributed but the target Map is?
7278  const bool communication_needed = rowTransfer.getSourceMap()->isDistributed();
7279 
7280  //
7281  // Get the caller's parameters
7282  //
7283 
7284  bool reverseMode = false; // Are we in reverse mode?
7285  bool restrictComm = false; // Do we need to restrict the communicator?
7286  RCP<ParameterList> graphparams; // parameters for the destination graph
7287  if (! params.is_null()) {
7288  reverseMode = params->get("Reverse Mode", reverseMode);
7289  restrictComm = params->get("Restrict Communicator", restrictComm);
7290  graphparams = sublist(params, "CrsGraph");
7291  }
7292 
7293  // Get the new domain and range Maps. We need some of them for error
7294  // checking, now that we have the reverseMode parameter.
7295  RCP<const map_type> MyRowMap = reverseMode ?
7296  rowTransfer.getSourceMap() : rowTransfer.getTargetMap();
7297  RCP<const map_type> MyColMap; // create this below
7298  RCP<const map_type> MyDomainMap = ! domainMap.is_null() ? domainMap : getDomainMap();
7299  RCP<const map_type> MyRangeMap = ! rangeMap.is_null() ? rangeMap : getRangeMap();
7300  RCP<const map_type> BaseRowMap = MyRowMap;
7301  RCP<const map_type> BaseDomainMap = MyDomainMap;
7302 
7303  // If the user gave us a nonnull destGraph, then check whether it's
7304  // "pristine." That means that it has no entries.
7305  //
7306  // FIXME (mfh 15 May 2014) If this is not true on all processes,
7307  // then this exception test may hang. It would be better to
7308  // forward an error flag to the next communication phase.
7309  if (! destGraph.is_null()) {
7310  // FIXME (mfh 15 May 2014): The Epetra idiom for checking
7311  // whether a graph or matrix has no entries on the calling
7312  // process, is that it is neither locally nor globally indexed.
7313  // This may change eventually with the Kokkos refactor version
7314  // of Tpetra, so it would be better just to check the quantity
7315  // of interest directly. Note that with the Kokkos refactor
7316  // version of Tpetra, asking for the total number of entries in
7317  // a graph or matrix that is not fill complete might require
7318  // computation (kernel launch), since it is not thread scalable
7319  // to update a count every time an entry is inserted.
7320  const bool NewFlag =
7321  ! destGraph->isLocallyIndexed() && ! destGraph->isGloballyIndexed();
7322  TEUCHOS_TEST_FOR_EXCEPTION(! NewFlag, std::invalid_argument,
7323  prefix << "The input argument 'destGraph' is only allowed to be nonnull, "
7324  "if its graph is empty (neither locally nor globally indexed).");
7325 
7326  // FIXME (mfh 15 May 2014) At some point, we want to change
7327  // graphs and matrices so that their DistObject Map
7328  // (this->getMap()) may differ from their row Map. This will
7329  // make redistribution for 2-D distributions more efficient. I
7330  // hesitate to change this check, because I'm not sure how much
7331  // the code here depends on getMap() and getRowMap() being the
7332  // same.
7333  TEUCHOS_TEST_FOR_EXCEPTION(
7334  ! destGraph->getRowMap()->isSameAs(*MyRowMap), std::invalid_argument,
7335  prefix << "The (row) Map of the input argument 'destGraph' is not the "
7336  "same as the (row) Map specified by the input argument 'rowTransfer'.");
7337 
7338  TEUCHOS_TEST_FOR_EXCEPTION(
7339  ! destGraph->checkSizes(*this), std::invalid_argument,
7340  prefix << "You provided a nonnull destination graph, but checkSizes() "
7341  "indicates that it is not a legal legal target for redistribution from "
7342  "the source graph (*this). This may mean that they do not have the "
7343  "same dimensions.");
7344  }
7345 
7346  // If forward mode (the default), then *this's (row) Map must be
7347  // the same as the source Map of the Transfer. If reverse mode,
7348  // then *this's (row) Map must be the same as the target Map of
7349  // the Transfer.
7350  //
7351  // FIXME (mfh 15 May 2014) At some point, we want to change graphs
7352  // and matrices so that their DistObject Map (this->getMap()) may
7353  // differ from their row Map. This will make redistribution for
7354  // 2-D distributions more efficient. I hesitate to change this
7355  // check, because I'm not sure how much the code here depends on
7356  // getMap() and getRowMap() being the same.
7357  TEUCHOS_TEST_FOR_EXCEPTION(
7358  ! (reverseMode || getRowMap()->isSameAs(*rowTransfer.getSourceMap())),
7359  std::invalid_argument, prefix <<
7360  "rowTransfer->getSourceMap() must match this->getRowMap() in forward mode.");
7361 
7362  TEUCHOS_TEST_FOR_EXCEPTION(
7363  ! (! reverseMode || getRowMap()->isSameAs(*rowTransfer.getTargetMap())),
7364  std::invalid_argument, prefix <<
7365  "rowTransfer->getTargetMap() must match this->getRowMap() in reverse mode.");
7366 
7367  // checks for domainTransfer
7368  TEUCHOS_TEST_FOR_EXCEPTION(
7369  ! xferDomainAsImport.is_null() && ! xferDomainAsImport->getTargetMap()->isSameAs(*domainMap),
7370  std::invalid_argument,
7371  prefix << "The target map of the 'domainTransfer' input argument must be "
7372  "the same as the rebalanced domain map 'domainMap'");
7373 
7374  TEUCHOS_TEST_FOR_EXCEPTION(
7375  ! xferDomainAsExport.is_null() && ! xferDomainAsExport->getSourceMap()->isSameAs(*domainMap),
7376  std::invalid_argument,
7377  prefix << "The source map of the 'domainTransfer' input argument must be "
7378  "the same as the rebalanced domain map 'domainMap'");
7379 
7380  // The basic algorithm here is:
7381  //
7382  // 1. Call the moral equivalent of "distor.do" to handle the import.
7383  // 2. Copy all the Imported and Copy/Permuted data into the raw
7384  // CrsGraph pointers, still using GIDs.
7385  // 3. Call an optimized version of MakeColMap that avoids the
7386  // Directory lookups (since the importer knows who owns all the
7387  // GIDs) AND reindexes to LIDs.
7388  // 4. Call expertStaticFillComplete()
7389 
7390  // Get information from the Importer
7391  const size_t NumSameIDs = rowTransfer.getNumSameIDs();
7392  ArrayView<const LO> ExportLIDs = reverseMode ?
7393  rowTransfer.getRemoteLIDs() : rowTransfer.getExportLIDs();
7394  ArrayView<const LO> RemoteLIDs = reverseMode ?
7395  rowTransfer.getExportLIDs() : rowTransfer.getRemoteLIDs();
7396  ArrayView<const LO> PermuteToLIDs = reverseMode ?
7397  rowTransfer.getPermuteFromLIDs() : rowTransfer.getPermuteToLIDs();
7398  ArrayView<const LO> PermuteFromLIDs = reverseMode ?
7399  rowTransfer.getPermuteToLIDs() : rowTransfer.getPermuteFromLIDs();
7400  Distributor& Distor = rowTransfer.getDistributor();
7401 
7402  // Owning PIDs
7403  Teuchos::Array<int> SourcePids;
7404  Teuchos::Array<int> TargetPids;
7405  int MyPID = getComm()->getRank();
7406 
7407  // Temp variables for sub-communicators
7408  RCP<const map_type> ReducedRowMap, ReducedColMap,
7409  ReducedDomainMap, ReducedRangeMap;
7410  RCP<const Comm<int> > ReducedComm;
7411 
7412  // If the user gave us a null destGraph, then construct the new
7413  // destination graph. We will replace its column Map later.
7414  if (destGraph.is_null()) {
7415  destGraph = rcp(new this_type(MyRowMap, 0, StaticProfile, graphparams));
7416  }
7417 
7418  /***************************************************/
7419  /***** 1) First communicator restriction phase ****/
7420  /***************************************************/
7421  if (restrictComm) {
7422  ReducedRowMap = MyRowMap->removeEmptyProcesses();
7423  ReducedComm = ReducedRowMap.is_null() ?
7424  Teuchos::null :
7425  ReducedRowMap->getComm();
7426  destGraph->removeEmptyProcessesInPlace(ReducedRowMap);
7427 
7428  ReducedDomainMap = MyRowMap.getRawPtr() == MyDomainMap.getRawPtr() ?
7429  ReducedRowMap :
7430  MyDomainMap->replaceCommWithSubset(ReducedComm);
7431  ReducedRangeMap = MyRowMap.getRawPtr() == MyRangeMap.getRawPtr() ?
7432  ReducedRowMap :
7433  MyRangeMap->replaceCommWithSubset(ReducedComm);
7434 
7435  // Reset the "my" maps
7436  MyRowMap = ReducedRowMap;
7437  MyDomainMap = ReducedDomainMap;
7438  MyRangeMap = ReducedRangeMap;
7439 
7440  // Update my PID, if we've restricted the communicator
7441  if (! ReducedComm.is_null()) {
7442  MyPID = ReducedComm->getRank();
7443  }
7444  else {
7445  MyPID = -2; // For debugging
7446  }
7447  }
7448  else {
7449  ReducedComm = MyRowMap->getComm();
7450  }
7451 
7452  /***************************************************/
7453  /***** 2) From Tpera::DistObject::doTransfer() ****/
7454  /***************************************************/
7455 #ifdef HAVE_TPETRA_MMM_TIMINGS
7456  MM = Teuchos::null;
7457  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ImportSetup"))));
7458 #endif
7459  // Get the owning PIDs
7460  RCP<const import_type> MyImporter = getImporter();
7461 
7462  // check whether domain maps of source graph and base domain map is the same
7463  bool bSameDomainMap = BaseDomainMap->isSameAs(*getDomainMap());
7464 
7465  if (! restrictComm && ! MyImporter.is_null() && bSameDomainMap ) {
7466  // Same domain map as source graph
7467  //
7468  // NOTE: This won't work for restrictComm (because the Import
7469  // doesn't know the restricted PIDs), though writing an
7470  // optimized version for that case would be easy (Import an
7471  // IntVector of the new PIDs). Might want to add this later.
7472  Import_Util::getPids(*MyImporter, SourcePids, false);
7473  }
7474  else if (restrictComm && ! MyImporter.is_null() && bSameDomainMap) {
7475  // Same domain map as source graph (restricted communicator)
7476  // We need one import from the domain to the column map
7477  ivector_type SourceDomain_pids(getDomainMap(),true);
7478  ivector_type SourceCol_pids(getColMap());
7479  // SourceDomain_pids contains the restricted pids
7480  SourceDomain_pids.putScalar(MyPID);
7481 
7482  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7483  SourcePids.resize(getColMap()->getNodeNumElements());
7484  SourceCol_pids.get1dCopy(SourcePids());
7485  }
7486  else if (MyImporter.is_null() && bSameDomainMap) {
7487  // Graph has no off-process entries
7488  SourcePids.resize(getColMap()->getNodeNumElements());
7489  SourcePids.assign(getColMap()->getNodeNumElements(), MyPID);
7490  }
7491  else if ( ! MyImporter.is_null() &&
7492  ! domainTransfer.is_null() ) {
7493  // general implementation for rectangular matrices with
7494  // domain map different than SourceGraph domain map.
7495  // User has to provide a DomainTransfer object. We need
7496  // to communications (import/export)
7497 
7498  // TargetDomain_pids lives on the rebalanced new domain map
7499  ivector_type TargetDomain_pids(domainMap);
7500  TargetDomain_pids.putScalar(MyPID);
7501 
7502  // SourceDomain_pids lives on the non-rebalanced old domain map
7503  ivector_type SourceDomain_pids(getDomainMap());
7504 
7505  // SourceCol_pids lives on the non-rebalanced old column map
7506  ivector_type SourceCol_pids(getColMap());
7507 
7508  if (! reverseMode && ! xferDomainAsImport.is_null() ) {
7509  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7510  }
7511  else if (reverseMode && ! xferDomainAsExport.is_null() ) {
7512  SourceDomain_pids.doExport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7513  }
7514  else if (! reverseMode && ! xferDomainAsExport.is_null() ) {
7515  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsExport, INSERT);
7516  }
7517  else if (reverseMode && ! xferDomainAsImport.is_null() ) {
7518  SourceDomain_pids.doImport(TargetDomain_pids, *xferDomainAsImport, INSERT);
7519  }
7520  else {
7521  TEUCHOS_TEST_FOR_EXCEPTION(
7522  true, std::logic_error,
7523  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7524  }
7525  SourceCol_pids.doImport(SourceDomain_pids, *MyImporter, INSERT);
7526  SourcePids.resize(getColMap()->getNodeNumElements());
7527  SourceCol_pids.get1dCopy(SourcePids());
7528  }
7529  else if (BaseDomainMap->isSameAs(*BaseRowMap) &&
7530  getDomainMap()->isSameAs(*getRowMap())) {
7531  // We can use the rowTransfer + SourceGraph's Import to find out who owns what.
7532  ivector_type TargetRow_pids(domainMap);
7533  ivector_type SourceRow_pids(getRowMap());
7534  ivector_type SourceCol_pids(getColMap());
7535 
7536  TargetRow_pids.putScalar(MyPID);
7537  if (! reverseMode && xferAsImport != nullptr) {
7538  SourceRow_pids.doExport(TargetRow_pids, *xferAsImport, INSERT);
7539  }
7540  else if (reverseMode && xferAsExport != nullptr) {
7541  SourceRow_pids.doExport(TargetRow_pids, *xferAsExport, INSERT);
7542  }
7543  else if (! reverseMode && xferAsExport != nullptr) {
7544  SourceRow_pids.doImport(TargetRow_pids, *xferAsExport, INSERT);
7545  }
7546  else if (reverseMode && xferAsImport != nullptr) {
7547  SourceRow_pids.doImport(TargetRow_pids, *xferAsImport, INSERT);
7548  }
7549  else {
7550  TEUCHOS_TEST_FOR_EXCEPTION(
7551  true, std::logic_error,
7552  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7553  }
7554  SourceCol_pids.doImport(SourceRow_pids, *MyImporter, INSERT);
7555  SourcePids.resize(getColMap()->getNodeNumElements());
7556  SourceCol_pids.get1dCopy(SourcePids());
7557  }
7558  else {
7559  TEUCHOS_TEST_FOR_EXCEPTION(
7560  true, std::invalid_argument,
7561  prefix << "This method only allows either domainMap == getDomainMap(), "
7562  "or (domainMap == rowTransfer.getTargetMap() and getDomainMap() == getRowMap()).");
7563  }
7564 
7565  // Tpetra-specific stuff
7566  size_t constantNumPackets = destGraph->constantNumberOfPackets();
7567  if (constantNumPackets == 0) {
7568  destGraph->reallocArraysForNumPacketsPerLid(ExportLIDs.size(),
7569  RemoteLIDs.size());
7570  }
7571  else {
7572  // There are a constant number of packets per element. We
7573  // already know (from the number of "remote" (incoming)
7574  // elements) how many incoming elements we expect, so we can
7575  // resize the buffer accordingly.
7576  const size_t rbufLen = RemoteLIDs.size() * constantNumPackets;
7577  destGraph->reallocImportsIfNeeded(rbufLen, false, nullptr);
7578  }
7579 
7580  {
7581  // packAndPrepare* methods modify numExportPacketsPerLID_.
7582  destGraph->numExportPacketsPerLID_.modify_host();
7583  Teuchos::ArrayView<size_t> numExportPacketsPerLID =
7584  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7585 
7586  // Pack & Prepare w/ owning PIDs
7587  packCrsGraphWithOwningPIDs(*this, destGraph->exports_,
7588  numExportPacketsPerLID, ExportLIDs,
7589  SourcePids, constantNumPackets, Distor);
7590  }
7591 
7592  // Do the exchange of remote data.
7593 #ifdef HAVE_TPETRA_MMM_TIMINGS
7594  MM = Teuchos::null;
7595  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Transfer"))));
7596 #endif
7597 
7598  if (communication_needed) {
7599  if (reverseMode) {
7600  if (constantNumPackets == 0) { // variable number of packets per LID
7601  // Make sure that host has the latest version, since we're
7602  // using the version on host. If host has the latest
7603  // version, syncing to host does nothing.
7604  destGraph->numExportPacketsPerLID_.sync_host();
7605  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7606  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7607  destGraph->numImportPacketsPerLID_.sync_host();
7608  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7609  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7610  Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1,
7611  numImportPacketsPerLID);
7612  size_t totalImportPackets = 0;
7613  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7614  totalImportPackets += numImportPacketsPerLID[i];
7615  }
7616 
7617  // Reallocation MUST go before setting the modified flag,
7618  // because it may clear out the flags.
7619  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7620  destGraph->imports_.modify_host();
7621  Teuchos::ArrayView<packet_type> hostImports =
7622  getArrayViewFromDualView(destGraph->imports_);
7623  // This is a legacy host pack/unpack path, so use the host
7624  // version of exports_.
7625  destGraph->exports_.sync_host();
7626  Teuchos::ArrayView<const packet_type> hostExports =
7627  getArrayViewFromDualView(destGraph->exports_);
7628  Distor.doReversePostsAndWaits(hostExports,
7629  numExportPacketsPerLID,
7630  hostImports,
7631  numImportPacketsPerLID);
7632  }
7633  else { // constant number of packets per LI
7634  destGraph->imports_.modify_host();
7635  Teuchos::ArrayView<packet_type> hostImports =
7636  getArrayViewFromDualView(destGraph->imports_);
7637  // This is a legacy host pack/unpack path, so use the host
7638  // version of exports_.
7639  destGraph->exports_.sync_host();
7640  Teuchos::ArrayView<const packet_type> hostExports =
7641  getArrayViewFromDualView(destGraph->exports_);
7642  Distor.doReversePostsAndWaits(hostExports,
7643  constantNumPackets,
7644  hostImports);
7645  }
7646  }
7647  else { // forward mode (the default)
7648  if (constantNumPackets == 0) { // variable number of packets per LID
7649  // Make sure that host has the latest version, since we're
7650  // using the version on host. If host has the latest
7651  // version, syncing to host does nothing.
7652  destGraph->numExportPacketsPerLID_.sync_host();
7653  Teuchos::ArrayView<const size_t> numExportPacketsPerLID =
7654  getArrayViewFromDualView(destGraph->numExportPacketsPerLID_);
7655  destGraph->numImportPacketsPerLID_.sync_host();
7656  Teuchos::ArrayView<size_t> numImportPacketsPerLID =
7657  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7658  Distor.doPostsAndWaits(numExportPacketsPerLID, 1,
7659  numImportPacketsPerLID);
7660  size_t totalImportPackets = 0;
7661  for (Array_size_type i = 0; i < numImportPacketsPerLID.size(); ++i) {
7662  totalImportPackets += numImportPacketsPerLID[i];
7663  }
7664 
7665  // Reallocation MUST go before setting the modified flag,
7666  // because it may clear out the flags.
7667  destGraph->reallocImportsIfNeeded(totalImportPackets, false, nullptr);
7668  destGraph->imports_.modify_host();
7669  Teuchos::ArrayView<packet_type> hostImports =
7670  getArrayViewFromDualView(destGraph->imports_);
7671  // This is a legacy host pack/unpack path, so use the host
7672  // version of exports_.
7673  destGraph->exports_.sync_host();
7674  Teuchos::ArrayView<const packet_type> hostExports =
7675  getArrayViewFromDualView(destGraph->exports_);
7676  Distor.doPostsAndWaits(hostExports,
7677  numExportPacketsPerLID,
7678  hostImports,
7679  numImportPacketsPerLID);
7680  }
7681  else { // constant number of packets per LID
7682  destGraph->imports_.modify_host();
7683  Teuchos::ArrayView<packet_type> hostImports =
7684  getArrayViewFromDualView(destGraph->imports_);
7685  // This is a legacy host pack/unpack path, so use the host
7686  // version of exports_.
7687  destGraph->exports_.sync_host();
7688  Teuchos::ArrayView<const packet_type> hostExports =
7689  getArrayViewFromDualView(destGraph->exports_);
7690  Distor.doPostsAndWaits(hostExports,
7691  constantNumPackets,
7692  hostImports);
7693  }
7694  }
7695  }
7696 
7697  /*********************************************************************/
7698  /**** 3) Copy all of the Same/Permute/Remote data into CSR_arrays ****/
7699  /*********************************************************************/
7700 
7701 #ifdef HAVE_TPETRA_MMM_TIMINGS
7702  MM = Teuchos::null;
7703  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-1"))));
7704 #endif
7705 
7706  // Backwards compatibility measure. We'll use this again below.
7707  destGraph->numImportPacketsPerLID_.sync_host();
7708  Teuchos::ArrayView<const size_t> numImportPacketsPerLID =
7709  getArrayViewFromDualView(destGraph->numImportPacketsPerLID_);
7710  destGraph->imports_.sync_host();
7711  Teuchos::ArrayView<const packet_type> hostImports =
7712  getArrayViewFromDualView(destGraph->imports_);
7713  size_t mynnz =
7714  unpackAndCombineWithOwningPIDsCount(*this, RemoteLIDs, hostImports,
7715  numImportPacketsPerLID,
7716  constantNumPackets, Distor, INSERT,
7717  NumSameIDs, PermuteToLIDs, PermuteFromLIDs);
7718  size_t N = BaseRowMap->getNodeNumElements();
7719 
7720  // Allocations
7721  ArrayRCP<size_t> CSR_rowptr(N+1);
7722  ArrayRCP<GO> CSR_colind_GID;
7723  ArrayRCP<LO> CSR_colind_LID;
7724  CSR_colind_GID.resize(mynnz);
7725 
7726  // If LO and GO are the same, we can reuse memory when
7727  // converting the column indices from global to local indices.
7728  if (typeid(LO) == typeid(GO)) {
7729  CSR_colind_LID = Teuchos::arcp_reinterpret_cast<LO>(CSR_colind_GID);
7730  }
7731  else {
7732  CSR_colind_LID.resize(mynnz);
7733  }
7734 
7735  // FIXME (mfh 15 May 2014) Why can't we abstract this out as an
7736  // unpackAndCombine method on a "CrsArrays" object? This passing
7737  // in a huge list of arrays is icky. Can't we have a bit of an
7738  // abstraction? Implementing a concrete DistObject subclass only
7739  // takes five methods.
7740  unpackAndCombineIntoCrsArrays(*this, RemoteLIDs, hostImports,
7741  numImportPacketsPerLID, constantNumPackets,
7742  Distor, INSERT, NumSameIDs, PermuteToLIDs,
7743  PermuteFromLIDs, N, mynnz, MyPID,
7744  CSR_rowptr(), CSR_colind_GID(),
7745  SourcePids(), TargetPids);
7746 
7747  /**************************************************************/
7748  /**** 4) Call Optimized MakeColMap w/ no Directory Lookups ****/
7749  /**************************************************************/
7750 #ifdef HAVE_TPETRA_MMM_TIMINGS
7751  MM = Teuchos::null;
7752  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("Unpack-2"))));
7753 #endif
7754  // Call an optimized version of makeColMap that avoids the
7755  // Directory lookups (since the Import object knows who owns all
7756  // the GIDs).
7757  Teuchos::Array<int> RemotePids;
7758  Import_Util::lowCommunicationMakeColMapAndReindex(CSR_rowptr(),
7759  CSR_colind_LID(),
7760  CSR_colind_GID(),
7761  BaseDomainMap,
7762  TargetPids, RemotePids,
7763  MyColMap);
7764 
7765  /*******************************************************/
7766  /**** 4) Second communicator restriction phase ****/
7767  /*******************************************************/
7768  if (restrictComm) {
7769  ReducedColMap = (MyRowMap.getRawPtr() == MyColMap.getRawPtr()) ?
7770  ReducedRowMap :
7771  MyColMap->replaceCommWithSubset(ReducedComm);
7772  MyColMap = ReducedColMap; // Reset the "my" maps
7773  }
7774 
7775  // Replace the col map
7776  destGraph->replaceColMap(MyColMap);
7777 
7778  // Short circuit if the processor is no longer in the communicator
7779  //
7780  // NOTE: Epetra replaces modifies all "removed" processes so they
7781  // have a dummy (serial) Map that doesn't touch the original
7782  // communicator. Duplicating that here might be a good idea.
7783  if (ReducedComm.is_null()) {
7784  return;
7785  }
7786 
7787  /***************************************************/
7788  /**** 5) Sort ****/
7789  /***************************************************/
7790  if ((! reverseMode && xferAsImport != nullptr) ||
7791  (reverseMode && xferAsExport != nullptr)) {
7792  Import_Util::sortCrsEntries(CSR_rowptr(),
7793  CSR_colind_LID());
7794  }
7795  else if ((! reverseMode && xferAsExport != nullptr) ||
7796  (reverseMode && xferAsImport != nullptr)) {
7797  Import_Util::sortAndMergeCrsEntries(CSR_rowptr(),
7798  CSR_colind_LID());
7799  if (CSR_rowptr[N] != mynnz) {
7800  CSR_colind_LID.resize(CSR_rowptr[N]);
7801  }
7802  }
7803  else {
7804  TEUCHOS_TEST_FOR_EXCEPTION(
7805  true, std::logic_error,
7806  prefix << "Should never get here! Please report this bug to a Tpetra developer.");
7807  }
7808  /***************************************************/
7809  /**** 6) Reset the colmap and the arrays ****/
7810  /***************************************************/
7811 
7812  // Call constructor for the new graph (restricted as needed)
7813  //
7814  destGraph->setAllIndices(CSR_rowptr, CSR_colind_LID);
7815 
7816  /***************************************************/
7817  /**** 7) Build Importer & Call ESFC ****/
7818  /***************************************************/
7819  // Pre-build the importer using the existing PIDs
7820  Teuchos::ParameterList esfc_params;
7821 #ifdef HAVE_TPETRA_MMM_TIMINGS
7822  MM = Teuchos::null;
7823  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("CreateImporter"))));
7824 #endif
7825  RCP<import_type> MyImport = rcp(new import_type(MyDomainMap, MyColMap, RemotePids));
7826 #ifdef HAVE_TPETRA_MMM_TIMINGS
7827  MM = Teuchos::null;
7828  MM = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix2+string("ESFC"))));
7829 
7830  esfc_params.set("Timer Label",prefix + std::string("TAFC"));
7831 #endif
7832  if(!params.is_null())
7833  esfc_params.set("compute global constants",params->get("compute global constants",true));
7834 
7835  destGraph->expertStaticFillComplete(MyDomainMap, MyRangeMap,
7836  MyImport, Teuchos::null, rcp(&esfc_params,false));
7837 
7838  }
7839 
7840  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7841  void
7844  const import_type& importer,
7845  const Teuchos::RCP<const map_type>& domainMap,
7846  const Teuchos::RCP<const map_type>& rangeMap,
7847  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7848  {
7849  transferAndFillComplete(destGraph, importer, Teuchos::null, domainMap, rangeMap, params);
7850  }
7851 
7852  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7853  void
7856  const import_type& rowImporter,
7857  const import_type& domainImporter,
7858  const Teuchos::RCP<const map_type>& domainMap,
7859  const Teuchos::RCP<const map_type>& rangeMap,
7860  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7861  {
7862  transferAndFillComplete(destGraph, rowImporter, Teuchos::rcpFromRef(domainImporter), domainMap, rangeMap, params);
7863  }
7864 
7865  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7866  void
7869  const export_type& exporter,
7870  const Teuchos::RCP<const map_type>& domainMap,
7871  const Teuchos::RCP<const map_type>& rangeMap,
7872  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7873  {
7874  transferAndFillComplete(destGraph, exporter, Teuchos::null, domainMap, rangeMap, params);
7875  }
7876 
7877  template <class LocalOrdinal, class GlobalOrdinal, class Node>
7878  void
7881  const export_type& rowExporter,
7882  const export_type& domainExporter,
7883  const Teuchos::RCP<const map_type>& domainMap,
7884  const Teuchos::RCP<const map_type>& rangeMap,
7885  const Teuchos::RCP<Teuchos::ParameterList>& params) const
7886  {
7887  transferAndFillComplete(destGraph, rowExporter, Teuchos::rcpFromRef(domainExporter), domainMap, rangeMap, params);
7888  }
7889 
7890 
7891  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7892  void
7895  {
7896  std::swap(graph.rowMap_, this->rowMap_);
7897  std::swap(graph.colMap_, this->colMap_);
7898  std::swap(graph.rangeMap_, this->rangeMap_);
7899  std::swap(graph.domainMap_, this->domainMap_);
7900 
7901  std::swap(graph.importer_, this->importer_);
7902  std::swap(graph.exporter_, this->exporter_);
7903 
7904  std::swap(graph.lclGraph_, this->lclGraph_);
7905 
7906  std::swap(graph.nodeNumDiags_, this->nodeNumDiags_);
7907  std::swap(graph.nodeMaxNumRowEntries_, this->nodeMaxNumRowEntries_);
7908 
7909  std::swap(graph.globalNumEntries_, this->globalNumEntries_);
7910  std::swap(graph.globalNumDiags_, this->globalNumDiags_);
7911  std::swap(graph.globalMaxNumRowEntries_, this->globalMaxNumRowEntries_);
7912 
7913  std::swap(graph.pftype_, this->pftype_);
7914 
7915  std::swap(graph.numAllocForAllRows_, this->numAllocForAllRows_);
7916 
7917  std::swap(graph.k_rowPtrs_, this->k_rowPtrs_);
7918 
7919  std::swap(graph.k_lclInds1D_, this->k_lclInds1D_);
7920  std::swap(graph.k_gblInds1D_, this->k_gblInds1D_);
7921 
7922  std::swap(graph.lclInds2D_, this->lclInds2D_);
7923  std::swap(graph.gblInds2D_, this->gblInds2D_);
7924 
7925  std::swap(graph.storageStatus_, this->storageStatus_);
7926 
7927  std::swap(graph.indicesAreAllocated_, this->indicesAreAllocated_);
7928  std::swap(graph.indicesAreLocal_, this->indicesAreLocal_);
7929  std::swap(graph.indicesAreGlobal_, this->indicesAreGlobal_);
7930  std::swap(graph.fillComplete_, this->fillComplete_);
7931  std::swap(graph.lowerTriangular_, this->lowerTriangular_);
7932  std::swap(graph.upperTriangular_, this->upperTriangular_);
7933  std::swap(graph.indicesAreSorted_, this->indicesAreSorted_);
7934  std::swap(graph.noRedundancies_, this->noRedundancies_);
7935  std::swap(graph.haveLocalConstants_, this->haveLocalConstants_);
7936  std::swap(graph.haveGlobalConstants_, this->haveGlobalConstants_);
7937 
7938  std::swap(graph.sortGhostsAssociatedWithEachProcessor_, this->sortGhostsAssociatedWithEachProcessor_);
7939 
7940  std::swap(graph.k_numAllocPerRow_, this->k_numAllocPerRow_); // View
7941  std::swap(graph.k_numRowEntries_, this->k_numRowEntries_); // View
7942  std::swap(graph.nonlocals_, this->nonlocals_); // std::map
7943  }
7944 
7945 
7946  template<class LocalOrdinal, class GlobalOrdinal, class Node>
7947  bool
7950  {
7951  auto compare_nonlocals = [&] (const nonlocals_type & m1, const nonlocals_type & m2) {
7952  bool output = true;
7953  output = m1.size() == m2.size() ? output : false;
7954  for(auto & it_m: m1)
7955  {
7956  size_t key = it_m.first;
7957  output = m2.find(key) != m2.end() ? output : false;
7958  if(output)
7959  {
7960  auto v1 = m1.find(key)->second;
7961  auto v2 = m2.find(key)->second;
7962  std::sort(v1.begin(), v1.end());
7963  std::sort(v2.begin(), v2.end());
7964 
7965  output = v1.size() == v2.size() ? output : false;
7966  for(size_t i=0; output && i<v1.size(); i++)
7967  {
7968  output = v1[i]==v2[i] ? output : false;
7969  }
7970  }
7971  }
7972  return output;
7973  };
7974 
7975  bool output = true;
7976 
7977  output = this->rowMap_->isSameAs( *(graph.rowMap_) ) ? output : false;
7978  output = this->colMap_->isSameAs( *(graph.colMap_) ) ? output : false;
7979  output = this->rangeMap_->isSameAs( *(graph.rangeMap_) ) ? output : false;
7980  output = this->domainMap_->isSameAs( *(graph.domainMap_) ) ? output : false;
7981 
7982  output = this->nodeNumDiags_ == graph.nodeNumDiags_ ? output : false;
7983  output = this->nodeMaxNumRowEntries_ == graph.nodeMaxNumRowEntries_ ? output : false;
7984 
7985  output = this->globalNumEntries_ == graph.globalNumEntries_ ? output : false;
7986  output = this->globalNumDiags_ == graph.globalNumDiags_ ? output : false;
7987  output = this->globalMaxNumRowEntries_ == graph.globalMaxNumRowEntries_ ? output : false;
7988 
7989  output = this->pftype_ == graph.pftype_ ? output : false; // ProfileType is a enum (scalar)
7990 
7991  output = this->numAllocForAllRows_ == graph.numAllocForAllRows_ ? output : false;
7992 
7993  output = this->lclInds2D_ == graph.lclInds2D_ ? output : false; // Teuchos::Array has == overloaded
7994  output = this->gblInds2D_ == graph.gblInds2D_ ? output : false; // Teuchos::Array has == overloaded
7995 
7996  output = this->storageStatus_ == graph.storageStatus_ ? output : false; // EStorageStatus is an enum
7997 
7998  output = this->indicesAreAllocated_ == graph.indicesAreAllocated_ ? output : false;
7999  output = this->indicesAreLocal_ == graph.indicesAreLocal_ ? output : false;
8000  output = this->indicesAreGlobal_ == graph.indicesAreGlobal_ ? output : false;
8001  output = this->fillComplete_ == graph.fillComplete_ ? output : false;
8002  output = this->lowerTriangular_ == graph.lowerTriangular_ ? output : false;
8003  output = this->upperTriangular_ == graph.upperTriangular_ ? output : false;
8004  output = this->indicesAreSorted_ == graph.indicesAreSorted_ ? output : false;
8005  output = this->noRedundancies_ == graph.noRedundancies_ ? output : false;
8006  output = this->haveLocalConstants_ == graph.haveLocalConstants_ ? output : false;
8007  output = this->haveGlobalConstants_ == graph.haveGlobalConstants_ ? output : false;
8008  output = this->sortGhostsAssociatedWithEachProcessor_ == this->sortGhostsAssociatedWithEachProcessor_ ? output : false;
8009 
8010  // Compare nonlocals_ -- std::map<GlobalOrdinal, std::vector<GlobalOrdinal> >
8011  // nonlocals_ isa std::map<GO, std::vector<GO> >
8012  output = compare_nonlocals(this->nonlocals_, graph.nonlocals_) ? output : false;
8013 
8014  // Compare k_numAllocPerRow_ isa Kokkos::View::HostMirror
8015  // - since this is a HostMirror type, it should be in host memory already
8016  output = this->k_numAllocPerRow_.extent(0) == graph.k_numAllocPerRow_.extent(0) ? output : false;
8017  if(output && this->k_numAllocPerRow_.extent(0) > 0)
8018  {
8019  for(size_t i=0; output && i<this->k_numAllocPerRow_.extent(0); i++)
8020  output = this->k_numAllocPerRow_(i) == graph.k_numAllocPerRow_(i) ? output : false;
8021  }
8022 
8023  // Compare k_numRowEntries_ isa Kokkos::View::HostMirror
8024  // - since this is a HostMirror type, it should be in host memory already
8025  output = this->k_numRowEntries_.extent(0) == graph.k_numRowEntries_.extent(0) ? output : false;
8026  if(output && this->k_numRowEntries_.extent(0) > 0)
8027  {
8028  for(size_t i = 0; output && i < this->k_numRowEntries_.extent(0); i++)
8029  output = this->k_numRowEntries_(i) == graph.k_numRowEntries_(i) ? output : false;
8030  }
8031 
8032  // Compare this->k_rowPtrs_ isa Kokkos::View<LocalOrdinal*, ...>
8033  output = this->k_rowPtrs_.extent(0) == graph.k_rowPtrs_.extent(0) ? output : false;
8034  if(output && this->k_rowPtrs_.extent(0) > 0)
8035  {
8036  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_this = Kokkos::create_mirror_view(this->k_rowPtrs_);
8037  typename local_graph_type::row_map_type::const_type::HostMirror k_rowPtrs_host_graph= Kokkos::create_mirror_view(graph.k_rowPtrs_);
8038  Kokkos::deep_copy(k_rowPtrs_host_this, this->k_rowPtrs_);
8039  Kokkos::deep_copy(k_rowPtrs_host_graph, graph.k_rowPtrs_);
8040  for(size_t i=0; output && i<k_rowPtrs_host_this.extent(0); i++)
8041  output = k_rowPtrs_host_this(i) == k_rowPtrs_host_graph(i) ? output : false;
8042  }
8043 
8044  // Compare k_lclInds1D_ isa Kokkos::View<LocalOrdinal*, ...>
8045  output = this->k_lclInds1D_.extent(0) == graph.k_lclInds1D_.extent(0) ? output : false;
8046  if(output && this->k_lclInds1D_.extent(0) > 0)
8047  {
8048  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_this = Kokkos::create_mirror_view(this->k_lclInds1D_);
8049  typename local_graph_type::entries_type::non_const_type::HostMirror k_lclInds1D_host_graph= Kokkos::create_mirror_view(graph.k_lclInds1D_);
8050  Kokkos::deep_copy(k_lclInds1D_host_this, this->k_lclInds1D_);
8051  Kokkos::deep_copy(k_lclInds1D_host_graph, graph.k_lclInds1D_);
8052  for(size_t i=0; output && i < k_lclInds1D_host_this.extent(0); i++)
8053  output = k_lclInds1D_host_this(i) == k_lclInds1D_host_graph(i) ? output : false;
8054  }
8055 
8056  // Compare k_gblInds1D_ isa Kokkos::View<GlobalOrdinal*, ...>
8057  output = this->k_gblInds1D_.extent(0) == graph.k_gblInds1D_.extent(0) ? output : false;
8058  if(output && this->k_gblInds1D_.extent(0) > 0)
8059  {
8060  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_this = Kokkos::create_mirror_view(this->k_gblInds1D_);
8061  typename t_GlobalOrdinal_1D::HostMirror k_gblInds1D_host_graph = Kokkos::create_mirror_view(graph.k_gblInds1D_);
8062  Kokkos::deep_copy(k_gblInds1D_host_this, this->k_gblInds1D_);
8063  Kokkos::deep_copy(k_gblInds1D_host_graph, graph.k_gblInds1D_);
8064  for(size_t i=0; output && i<k_gblInds1D_host_this.extent(0); i++)
8065  output = k_gblInds1D_host_this(i) == k_gblInds1D_host_graph(i) ? output : false;
8066  }
8067 
8068  // Check lclGraph_ // isa Kokkos::StaticCrsGraph<LocalOrdinal, Kokkos::LayoutLeft, execution_space>
8069  // Kokkos::StaticCrsGraph has 3 data members in it:
8070  // Kokkos::View<size_type*, ...> row_map (local_graph_type::row_map_type)
8071  // Kokkos::View<data_type*, ...> entries (local_graph_type::entries_type)
8072  // Kokkos::View<size_type*, ...> row_block_offsets (local_graph_type::row_block_type)
8073  // There is currently no Kokkos::StaticCrsGraph comparison function that's built-in, so we will just compare
8074  // the three data items here. This can be replaced if Kokkos ever puts in its own comparison routine.
8075  output = this->lclGraph_.row_map.extent(0) == graph.lclGraph_.row_map.extent(0) ? output : false;
8076  if(output && this->lclGraph_.row_map.extent(0) > 0)
8077  {
8078  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_map);
8079  typename local_graph_type::row_map_type::HostMirror lclGraph_rowmap_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_map);
8080  Kokkos::deep_copy(lclGraph_rowmap_host_this, this->lclGraph_.row_map);
8081  Kokkos::deep_copy(lclGraph_rowmap_host_graph, graph.lclGraph_.row_map);
8082  for(size_t i=0; output && i<lclGraph_rowmap_host_this.extent(0); i++)
8083  output = lclGraph_rowmap_host_this(i) == lclGraph_rowmap_host_graph(i) ? output : false;
8084  }
8085 
8086  output = this->lclGraph_.entries.extent(0) == graph.lclGraph_.entries.extent(0) ? output : false;
8087  if(output && this->lclGraph_.entries.extent(0) > 0)
8088  {
8089  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_this = Kokkos::create_mirror_view(this->lclGraph_.entries);
8090  typename local_graph_type::entries_type::HostMirror lclGraph_entries_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.entries);
8091  Kokkos::deep_copy(lclGraph_entries_host_this, this->lclGraph_.entries);
8092  Kokkos::deep_copy(lclGraph_entries_host_graph, graph.lclGraph_.entries);
8093  for(size_t i=0; output && i<lclGraph_entries_host_this.extent(0); i++)
8094  output = lclGraph_entries_host_this(i) == lclGraph_entries_host_graph(i) ? output : false;
8095  }
8096 
8097  output = this->lclGraph_.row_block_offsets.extent(0) == graph.lclGraph_.row_block_offsets.extent(0) ? output : false;
8098  if(output && this->lclGraph_.row_block_offsets.extent(0) > 0)
8099  {
8100  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_this = Kokkos::create_mirror_view(this->lclGraph_.row_block_offsets);
8101  typename local_graph_type::row_block_type::HostMirror lclGraph_rbo_host_graph = Kokkos::create_mirror_view(graph.lclGraph_.row_block_offsets);
8102  Kokkos::deep_copy(lclGraph_rbo_host_this, this->lclGraph_.row_block_offsets);
8103  Kokkos::deep_copy(lclGraph_rbo_host_graph, graph.lclGraph_.row_block_offsets);
8104  for(size_t i=0; output && i < lclGraph_rbo_host_this.extent(0); i++)
8105  output = lclGraph_rbo_host_this(i) == lclGraph_rbo_host_graph(i) ? output : false;
8106  }
8107 
8108  // For the Importer and Exporter, we shouldn't need to explicitly check them since
8109  // they will be consistent with the maps.
8110  // Note: importer_ isa Teuchos::RCP<const import_type>
8111  // exporter_ isa Teuchos::RCP<const export_type>
8112 
8113  return output;
8114  }
8115 
8116 
8117 
8118 } // namespace Tpetra
8119 
8120 //
8121 // Explicit instantiation macros
8122 //
8123 // Must be expanded from within the Tpetra namespace!
8124 //
8125 
8126 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8127  template<> \
8128  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8129  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8130  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8131  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8132  CrsGraph<LO,GO,NODE>::node_type>& importer, \
8133  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8134  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8135  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8136  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8137  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8138  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8139  const Teuchos::RCP<Teuchos::ParameterList>& params);
8140 
8141 #define TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8142  template<> \
8143  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8144  importAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8145  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8146  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8147  CrsGraph<LO,GO,NODE>::node_type>& rowImporter, \
8148  const Import<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8149  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8150  CrsGraph<LO,GO,NODE>::node_type>& domainImporter, \
8151  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8152  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8153  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8154  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8155  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8156  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8157  const Teuchos::RCP<Teuchos::ParameterList>& params);
8158 
8159 
8160 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8161  template<> \
8162  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8163  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8164  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8165  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8166  CrsGraph<LO,GO,NODE>::node_type>& exporter, \
8167  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8168  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8169  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8170  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8171  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8172  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8173  const Teuchos::RCP<Teuchos::ParameterList>& params);
8174 
8175 #define TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8176  template<> \
8177  Teuchos::RCP<CrsGraph<LO,GO,NODE> > \
8178  exportAndFillCompleteCrsGraph(const Teuchos::RCP<const CrsGraph<LO,GO,NODE> >& sourceGraph, \
8179  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8180  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8181  CrsGraph<LO,GO,NODE>::node_type>& rowExporter, \
8182  const Export<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8183  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8184  CrsGraph<LO,GO,NODE>::node_type>& domainExporter, \
8185  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8186  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8187  CrsGraph<LO,GO,NODE>::node_type> >& domainMap, \
8188  const Teuchos::RCP<const Map<CrsGraph<LO,GO,NODE>::local_ordinal_type, \
8189  CrsGraph<LO,GO,NODE>::global_ordinal_type, \
8190  CrsGraph<LO,GO,NODE>::node_type> >& rangeMap, \
8191  const Teuchos::RCP<Teuchos::ParameterList>& params);
8192 
8193 
8194 #define TPETRA_CRSGRAPH_INSTANT( LO, GO, NODE ) \
8195  template class CrsGraph<LO, GO, NODE>; \
8196  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8197  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT(LO,GO,NODE) \
8198  TPETRA_CRSGRAPH_IMPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE) \
8199  TPETRA_CRSGRAPH_EXPORT_AND_FILL_COMPLETE_INSTANT_TWO(LO,GO,NODE)
8200 
8201 
8202 #endif // TPETRA_CRSGRAPH_DEF_HPP
Tpetra::CrsGraph::t_GlobalOrdinal_1D
Kokkos::View< global_ordinal_type *, execution_space > t_GlobalOrdinal_1D
Type of the k_gblInds1D_ array of global column indices.
Definition: Tpetra_CrsGraph_decl.hpp:2416
Tpetra::CrsGraph::globalNumDiags_
global_size_t globalNumDiags_
Global number of (populated) diagonal entries.
Definition: Tpetra_CrsGraph_decl.hpp:2354
Tpetra_Import_Util.hpp
Internal functions and macros designed for use with Tpetra::Import and Tpetra::Export objects.
Tpetra::createOneToOne
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified,...
Tpetra::ProfileType
ProfileType
Definition: Tpetra_ConfigDefs.hpp:130
Tpetra::Details::unpackAndCombineIntoCrsArrays
void unpackAndCombineIntoCrsArrays(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, const size_t constantNumPackets, Distributor &distor, const CombineMode combineMode, const size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs, size_t TargetNumRows, size_t TargetNumNonzeros, const int MyTargetPID, const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< GO > &CRS_colind, const Teuchos::ArrayView< const int > &SourcePids, Teuchos::Array< int > &TargetPids)
unpackAndCombineIntoCrsArrays
Tpetra::CrsGraph::upperTriangular_
bool upperTriangular_
Whether the graph is locally upper triangular.
Definition: Tpetra_CrsGraph_decl.hpp:2541
Tpetra::Details::determineLocalTriangularStructure
LocalTriangularStructureResult< typename LocalMapType::local_ordinal_type > determineLocalTriangularStructure(const LocalGraphType &G, const LocalMapType &rowMap, const LocalMapType &colMap, const bool ignoreMapsForTriangularStructure)
Count the local number of diagonal entries in a local sparse graph, and determine whether the local p...
Definition: Tpetra_Details_determineLocalTriangularStructure.hpp:241
Tpetra::Export
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
Definition: Tpetra_Export_decl.hpp:124
Tpetra::RowInfo
Allocation information for a locally owned row in a CrsGraph or CrsMatrix.
Definition: Tpetra_CrsGraph_decl.hpp:128
Tpetra::CrsGraph::nonlocals_
nonlocals_type nonlocals_
Nonlocal data given to insertGlobalIndices.
Definition: Tpetra_CrsGraph_decl.hpp:2555
Tpetra::Details::DefaultTypes::node_type
::Kokkos::Compat::KokkosDeviceWrapperNode< execution_space > node_type
Default value of Node template parameter.
Definition: Tpetra_Details_DefaultTypes.hpp:121
Tpetra::Map::getGlobalElement
GlobalOrdinal getGlobalElement(LocalOrdinal localIndex) const
The global index corresponding to the given local index.
Definition: Tpetra_Map_def.hpp:1246
Tpetra::Details::packCrsGraph
void packCrsGraph(const CrsGraph< LO, GO, NT > &sourceGraph, Teuchos::Array< typename CrsGraph< LO, GO, NT >::packet_type > &exports, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Definition: Tpetra_Details_packCrsGraph_def.hpp:798
Tpetra_Details_copyOffsets.hpp
Declare and define Tpetra::Details::copyOffsets, an implementation detail of Tpetra (in particular,...
Tpetra::Map::isNodeGlobalElement
bool isNodeGlobalElement(GlobalOrdinal globalIndex) const
Whether the given global index is owned by this Map on the calling process.
Definition: Tpetra_Map_def.hpp:1278
Tpetra::CrsGraph::haveLocalConstants_
bool haveLocalConstants_
Whether this process has computed local constants.
Definition: Tpetra_CrsGraph_decl.hpp:2548
Tpetra_Details_Behavior.hpp
Declaration of Tpetra::Details::Behavior, a class that describes Tpetra's behavior.
Tpetra::Details::makeColMap
int makeColMap(Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &colMap, Teuchos::Array< int > &remotePIDs, const Teuchos::RCP< const Tpetra::Map< LO, GO, NT > > &domMap, const RowGraph< LO, GO, NT > &graph, const bool sortEachProcsGids=true, std::ostream *errStrm=NULL)
Make the graph's column Map.
Definition: Tpetra_Details_makeColMap_def.hpp:67
Tpetra::CrsGraph::rangeMap_
Teuchos::RCP< const map_type > rangeMap_
The Map describing the range of the (matrix corresponding to the) graph.
Definition: Tpetra_CrsGraph_decl.hpp:2314
Tpetra::Map
A parallel distribution of indices over processes.
Definition: Tpetra_Map_decl.hpp:246
Tpetra::CrsGraph::lowerTriangular_
bool lowerTriangular_
Whether the graph is locally lower triangular.
Definition: Tpetra_CrsGraph_decl.hpp:2539
Tpetra::Import
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
Definition: Tpetra_Import_decl.hpp:116
Tpetra::Details::packCrsGraphNew
void packCrsGraphNew(const CrsGraph< LO, GO, NT > &sourceGraph, const Kokkos::DualView< const LO *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportLIDs, const Kokkos::DualView< const int *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exportPIDs, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports, Kokkos::DualView< size_t *, typename CrsGraph< LO, GO, NT >::buffer_device_type > numPacketsPerLID, size_t &constantNumPackets, const bool pack_pids, Distributor &distor)
Pack specified entries of the given local sparse graph for communication, for "new" DistObject interf...
Definition: Tpetra_Details_packCrsGraph_def.hpp:888
Tpetra_Details_crsUtils.hpp
Functions for manipulating CRS arrays.
Tpetra::Details::ProfilingRegion
Profile the given scope.
Definition: Tpetra_Details_Profiling.hpp:100
Tpetra::CrsGraph::rowMap_
Teuchos::RCP< const map_type > rowMap_
The Map describing the distribution of rows of the graph.
Definition: Tpetra_CrsGraph_decl.hpp:2310
Tpetra::CrsGraph::importer_
Teuchos::RCP< const import_type > importer_
The Import from the domain Map to the column Map.
Definition: Tpetra_CrsGraph_decl.hpp:2324
Tpetra::Details::getArrayViewFromDualView
Teuchos::ArrayView< typename DualViewType::t_dev::value_type > getArrayViewFromDualView(const DualViewType &x)
Get a Teuchos::ArrayView which views the host Kokkos::View of the input 1-D Kokkos::DualView.
Definition: Tpetra_Util.hpp:878
Tpetra::CrsGraph::sortGhostsAssociatedWithEachProcessor_
bool sortGhostsAssociatedWithEachProcessor_
Whether to require makeColMap() (and therefore fillComplete()) to order column Map GIDs associated wi...
Definition: Tpetra_CrsGraph_decl.hpp:2571
Tpetra::CrsGraph::lclGraph_
local_graph_type lclGraph_
Local graph; only initialized after first fillComplete() call.
Definition: Tpetra_CrsGraph_decl.hpp:2334
Tpetra::CrsGraph::k_numAllocPerRow_
Kokkos::View< const size_t *, execution_space >::HostMirror k_numAllocPerRow_
The maximum number of entries to allow in each locally owned row, per row.
Definition: Tpetra_CrsGraph_decl.hpp:2390
Tpetra::Details::padCrsArrays
void padCrsArrays(const RowPtr &rowPtrBeg, const RowPtr &rowPtrEnd, Indices &indices, const Padding &padding)
Determine if the row pointers and indices arrays need to be resized to accommodate new entries....
Definition: Tpetra_Details_crsUtils.hpp:303
Tpetra::CrsGraph::exporter_
Teuchos::RCP< const export_type > exporter_
The Export from the row Map to the range Map.
Definition: Tpetra_CrsGraph_decl.hpp:2331
Tpetra::DistObject< GlobalOrdinal, LocalOrdinal, GlobalOrdinal, Node >
Tpetra::CrsGraph::haveGlobalConstants_
bool haveGlobalConstants_
Whether all processes have computed global constants.
Definition: Tpetra_CrsGraph_decl.hpp:2550
Details
Implementation details of Tpetra.
Tpetra::Details::gathervPrint
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator,...
Definition: Tpetra_Details_gathervPrint.cpp:52
Tpetra::Details::Behavior::debug
static bool debug()
Whether Tpetra is in debug mode.
Definition: Tpetra_Details_Behavior.cpp:192
Tpetra::CrsGraph::k_lclInds1D_
local_graph_type::entries_type::non_const_type k_lclInds1D_
Local column indices for all rows.
Definition: Tpetra_CrsGraph_decl.hpp:2413
Tpetra::CrsGraph::execution_space
typename device_type::execution_space execution_space
This class' Kokkos execution space.
Definition: Tpetra_CrsGraph_decl.hpp:297
Tpetra::CrsGraph::k_rowPtrs_
local_graph_type::row_map_type::const_type k_rowPtrs_
Row offsets for "1-D" storage.
Definition: Tpetra_CrsGraph_decl.hpp:2451
Tpetra::CrsGraph::noRedundancies_
bool noRedundancies_
Whether the graph's indices are non-redundant (merged) in each row, on this process.
Definition: Tpetra_CrsGraph_decl.hpp:2546
Tpetra::CrsGraph::local_ordinal_type
LocalOrdinal local_ordinal_type
The type of the graph's local indices.
Definition: Tpetra_CrsGraph_decl.hpp:291
Tpetra_Import_Util2.hpp
Utility functions for packing and unpacking sparse matrix entries.
Tpetra::Map::getLocalMap
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Definition: Tpetra_Map_def.hpp:1297
Tpetra::Details::packCrsGraphWithOwningPIDs
void packCrsGraphWithOwningPIDs(const CrsGraph< LO, GO, NT > &sourceGraph, Kokkos::DualView< typename CrsGraph< LO, GO, NT >::packet_type *, typename CrsGraph< LO, GO, NT >::buffer_device_type > &exports_dv, const Teuchos::ArrayView< size_t > &numPacketsPerLID, const Teuchos::ArrayView< const LO > &exportLIDs, const Teuchos::ArrayView< const int > &sourcePIDs, size_t &constantNumPackets, Distributor &distor)
Pack specified entries of the given local sparse graph for communication.
Definition: Tpetra_Details_packCrsGraph_def.hpp:987
Tpetra::CrsGraph::globalNumEntries_
global_size_t globalNumEntries_
Global number of entries in the graph.
Definition: Tpetra_CrsGraph_decl.hpp:2349
Tpetra::Details::UnpackAndCombineCrsGraphImpl::unpackAndCombine
void unpackAndCombine(const RowView &row_ptrs_beg, const RowView &row_ptrs_end, IndicesView &indices, const Kokkos::View< const Packet *, BufferDevice, Kokkos::MemoryUnmanaged > &imports, const Kokkos::View< const size_t *, BufferDevice, Kokkos::MemoryUnmanaged > &num_packets_per_lid, const Kokkos::View< const LocalOrdinal *, BufferDevice, Kokkos::MemoryUnmanaged > &import_lids, const bool unpack_pids)
Perform the unpack operation for the graph.
Definition: Tpetra_Details_unpackCrsGraphAndCombine_def.hpp:334
Tpetra::CrsGraph::pftype_
ProfileType pftype_
Whether the graph was allocated with static or dynamic profile.
Definition: Tpetra_CrsGraph_decl.hpp:2362
Tpetra_Details_Profiling.hpp
Declaration of Tpetra::Details::Profiling, a scope guard for Kokkos Profiling.
Tpetra::CrsGraph::storageStatus_
::Tpetra::Details::EStorageStatus storageStatus_
Status of the graph's storage, when not in a fill-complete state.
Definition: Tpetra_CrsGraph_decl.hpp:2531
Tpetra::Details::convertColumnIndicesFromGlobalToLocal
OffsetType convertColumnIndicesFromGlobalToLocal(const Kokkos::View< LO *, DT > &lclColInds, const Kokkos::View< const GO *, DT > &gblColInds, const Kokkos::View< const OffsetType *, DT > &ptr, const LocalMap< LO, GO, DT > &lclColMap, const Kokkos::View< const NumEntType *, DT > &numRowEnt)
Convert a (StaticProfile) CrsGraph's global column indices into local column indices.
Definition: Tpetra_CrsGraph_def.hpp:161
Tpetra::Details::getGlobalNumDiags
CrsGraphType::global_ordinal_type getGlobalNumDiags(const CrsGraphType &G)
Number of populated diagonal entries in the given sparse graph, over all processes in the graph's (MP...
Definition: Tpetra_Details_getNumDiags.hpp:406
Tpetra::CrsGraph< LO, GO, node_type >::node_type
node_type node_type
This class' Kokkos Node type.
Definition: Tpetra_CrsGraph_decl.hpp:303
Tpetra::Vector
A distributed dense vector.
Definition: Tpetra_Vector_decl.hpp:82
Tpetra::Distributor
Sets up and executes a communication plan for a Tpetra DistObject.
Definition: Tpetra_Distributor.hpp:192
Tpetra::Map::getLocalElement
LocalOrdinal getLocalElement(GlobalOrdinal globalIndex) const
The local index corresponding to the given global index.
Definition: Tpetra_Map_def.hpp:1223
Tpetra::CrsGraph< LO, GO, node_type >::device_type
typename Node::device_type device_type
This class' Kokkos device type.
Definition: Tpetra_CrsGraph_decl.hpp:295
Tpetra::CrsGraph::domainMap_
Teuchos::RCP< const map_type > domainMap_
The Map describing the domain of the (matrix corresponding to the) graph.
Definition: Tpetra_CrsGraph_decl.hpp:2316
Tpetra::Details::DefaultTypes::local_ordinal_type
int local_ordinal_type
Default value of Scalar template parameter.
Definition: Tpetra_Details_DefaultTypes.hpp:72
Tpetra::CrsGraph::getComm
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const override
Returns the communicator.
Definition: Tpetra_CrsGraph_def.hpp:1395
Tpetra::CrsGraph::globalMaxNumRowEntries_
global_size_t globalMaxNumRowEntries_
Global maximum of the number of entries in each row.
Definition: Tpetra_CrsGraph_decl.hpp:2359
Tpetra::CrsGraph::numAllocForAllRows_
size_t numAllocForAllRows_
The maximum number of entries to allow in each locally owned row.
Definition: Tpetra_CrsGraph_decl.hpp:2401
Tpetra::CrsGraph::k_numRowEntries_
num_row_entries_type k_numRowEntries_
The number of local entries in each locally owned row.
Definition: Tpetra_CrsGraph_decl.hpp:2518
Tpetra_Details_getEntryOnHost.hpp
Declaration and definition of Tpetra::Details::getEntryOnHost.
Tpetra::CrsGraph::colMap_
Teuchos::RCP< const map_type > colMap_
The Map describing the distribution of columns of the graph.
Definition: Tpetra_CrsGraph_decl.hpp:2312
Tpetra::removeEmptyProcessesInPlace
void removeEmptyProcessesInPlace(Teuchos::RCP< DistObjectType > &input, const Teuchos::RCP< const Map< typename DistObjectType::local_ordinal_type, typename DistObjectType::global_ordinal_type, typename DistObjectType::node_type > > &newMap)
Remove processes which contain no elements in this object's Map.
Definition: Tpetra_DistObject_def.hpp:1949
Tpetra::Details::computeOffsetsFromConstantCount
OffsetsViewType::non_const_value_type computeOffsetsFromConstantCount(const OffsetsViewType &ptr, const CountType count)
Compute offsets from a constant count.
Definition: Tpetra_Details_computeOffsets.hpp:359
Tpetra::CrsGraph
A distributed graph accessed by rows (adjacency lists) and stored sparsely.
Definition: Tpetra_CrsGraph_decl.hpp:278
Tpetra::CrsGraph::local_graph_type
Kokkos::StaticCrsGraph< local_ordinal_type, Kokkos::LayoutLeft, execution_space > local_graph_type
The type of the part of the sparse graph on each MPI process.
Definition: Tpetra_CrsGraph_decl.hpp:308
Tpetra::CrsGraph::getIndexBase
global_ordinal_type getIndexBase() const override
Returns the index base for global indices for this graph.
Definition: Tpetra_CrsGraph_def.hpp:1403
Tpetra::Map::isNodeLocalElement
bool isNodeLocalElement(LocalOrdinal localIndex) const
Whether the given local index is valid for this Map on the calling process.
Definition: Tpetra_Map_def.hpp:1266
Tpetra::CrsGraph::gblInds2D_
Teuchos::ArrayRCP< Teuchos::Array< global_ordinal_type > > gblInds2D_
Global column indices for all rows.
Definition: Tpetra_CrsGraph_decl.hpp:2491
Tpetra_Details_computeOffsets.hpp
Declare and define the functions Tpetra::Details::computeOffsetsFromCounts and Tpetra::computeOffsets...
Tpetra::CrsGraph::nodeMaxNumRowEntries_
size_t nodeMaxNumRowEntries_
Local maximum of the number of entries in each row.
Definition: Tpetra_CrsGraph_decl.hpp:2344
Tpetra::CrsGraph::indicesAreSorted_
bool indicesAreSorted_
Whether the graph's indices are sorted in each row, on this process.
Definition: Tpetra_CrsGraph_decl.hpp:2543
Tpetra::CrsGraph::lclInds2D_
Teuchos::ArrayRCP< Teuchos::Array< local_ordinal_type > > lclInds2D_
Local column indices for all rows.
Definition: Tpetra_CrsGraph_decl.hpp:2478
Tpetra::Details::unpackAndCombineWithOwningPIDsCount
size_t unpackAndCombineWithOwningPIDsCount(const CrsGraph< LO, GO, NT > &sourceGraph, const Teuchos::ArrayView< const LO > &importLIDs, const Teuchos::ArrayView< const typename CrsGraph< LO, GO, NT >::packet_type > &imports, const Teuchos::ArrayView< const size_t > &numPacketsPerLID, size_t constantNumPackets, Distributor &distor, CombineMode combineMode, size_t numSameIDs, const Teuchos::ArrayView< const LO > &permuteToLIDs, const Teuchos::ArrayView< const LO > &permuteFromLIDs)
Special version of Tpetra::Details::unpackCrsGraphAndCombine that also unpacks owning process ranks.
Tpetra::global_size_t
size_t global_size_t
Global size_t object.
Definition: Tpetra_ConfigDefs.hpp:109
Tpetra::Details::LocalMap< LO, GO, DT >
Tpetra
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Tpetra::Details::computeOffsetsFromCounts
OffsetsViewType::non_const_value_type computeOffsetsFromCounts(const ExecutionSpace &execSpace, const OffsetsViewType &ptr, const CountsViewType &counts)
Compute offsets from counts.
Definition: Tpetra_Details_computeOffsets.hpp:243
Tpetra::deep_copy
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
Definition: Tpetra_MultiVector_decl.hpp:2557
Tpetra::SrcDistObject
Abstract base class for objects that can be the source of an Import or Export operation.
Definition: Tpetra_SrcDistObject.hpp:89
Tpetra::Details::insertCrsIndices
size_t insertCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, InOutIndices &curIndices, size_t &numAssigned, InIndices const &newIndices, std::function< void(const size_t, const size_t, const size_t)> cb=std::function< void(const size_t, const size_t, const size_t)>())
Insert new indices in to current list of indices.
Definition: Tpetra_Details_crsUtils.hpp:375
Tpetra::CrsGraph::nodeNumDiags_
size_t nodeNumDiags_
Local number of (populated) diagonal entries.
Definition: Tpetra_CrsGraph_decl.hpp:2339
Tpetra::CrsGraph::checkInternalState
void checkInternalState() const
Throw an exception if the internal state is not consistent.
Definition: Tpetra_CrsGraph_def.hpp:2559
Tpetra_Details_determineLocalTriangularStructure.hpp
Declaration and definition of Tpetra::Details::determineLocalTriangularStructure.
Tpetra::Details::copyOffsets
void copyOffsets(const OutputViewType &dst, const InputViewType &src)
Copy row offsets (in a sparse graph or matrix) from src to dst. The offsets may have different types.
Definition: Tpetra_Details_copyOffsets.hpp:539
Tpetra::CrsGraph::resumeFill
void resumeFill(const Teuchos::RCP< Teuchos::ParameterList > &params=Teuchos::null)
Resume fill operations.
Definition: Tpetra_CrsGraph_def.hpp:3847
Tpetra::INSERT
@ INSERT
Insert new values that don't currently exist.
Definition: Tpetra_CombineMode.hpp:96
Tpetra::CrsGraph::k_gblInds1D_
t_GlobalOrdinal_1D k_gblInds1D_
Global column indices for all rows.
Definition: Tpetra_CrsGraph_decl.hpp:2425
Tpetra::Details::findCrsIndices
size_t findCrsIndices(typename Pointers::value_type const row, Pointers const &rowPtrs, const size_t curNumEntries, Indices1 const &curIndices, Indices2 const &newIndices, Callback &&cb)
Finds offsets in to current list of indices.
Definition: Tpetra_Details_crsUtils.hpp:444
Tpetra::RowGraph< LO, GO, node_type >
Tpetra::CombineMode
CombineMode
Rule for combining data in an Import or Export.
Definition: Tpetra_CombineMode.hpp:94
Tpetra::Details::Behavior::verbosePrintCountThreshold
static size_t verbosePrintCountThreshold()
Threshold, below which arrays, lists, etc. will be printed in debug mode.
Definition: Tpetra_Details_Behavior.cpp:247