Tpetra parallel linear algebra  Version of the Day
Tpetra_Import_Util2.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
42 #ifndef TPETRA_IMPORT_UTIL2_HPP
43 #define TPETRA_IMPORT_UTIL2_HPP
44 
49 
50 #include "Tpetra_ConfigDefs.hpp"
51 #include "Tpetra_Import.hpp"
52 #include "Tpetra_HashTable.hpp"
53 #include "Tpetra_Map.hpp"
54 #include "Tpetra_Util.hpp"
55 #include "Tpetra_Distributor.hpp"
57 #include "Tpetra_Vector.hpp"
58 #include "Kokkos_DualView.hpp"
59 #include <Teuchos_Array.hpp>
60 #include <utility>
61 
62 namespace Tpetra {
63 namespace Import_Util {
64 
67 template<typename Scalar, typename Ordinal>
68 void
69 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
70  const Teuchos::ArrayView<Ordinal>& CRS_colind,
71  const Teuchos::ArrayView<Scalar>&CRS_vals);
72 
73 template<typename Ordinal>
74 void
75 sortCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
76  const Teuchos::ArrayView<Ordinal>& CRS_colind);
77 
78 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
79 void
80 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
81  const colind_array_type& CRS_colind,
82  const vals_array_type& CRS_vals);
83 
84 template<typename rowptr_array_type, typename colind_array_type>
85 void
86 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
87  const colind_array_type& CRS_colind);
88 
93 template<typename Scalar, typename Ordinal>
94 void
95 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
96  const Teuchos::ArrayView<Ordinal>& CRS_colind,
97  const Teuchos::ArrayView<Scalar>& CRS_vals);
98 
99 template<typename Ordinal>
100 void
101 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t>& CRS_rowptr,
102  const Teuchos::ArrayView<Ordinal>& CRS_colind);
103 
119 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
120 void
121 lowCommunicationMakeColMapAndReindex (const Teuchos::ArrayView<const size_t> &rowPointers,
122  const Teuchos::ArrayView<LocalOrdinal> &columnIndices_LID,
123  const Teuchos::ArrayView<GlobalOrdinal> &columnIndices_GID,
124  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & domainMap,
125  const Teuchos::ArrayView<const int> &owningPids,
126  Teuchos::Array<int> &remotePids,
127  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap);
128 
129 
130 
131 
145  template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
146  void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
147  bool useReverseModeForOwnership,
148  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferForMigratingData,
149  bool useReverseModeForMigration,
151 
152 } // namespace Import_Util
153 } // namespace Tpetra
154 
155 
156 //
157 // Implementations
158 //
159 
160 namespace Tpetra {
161 namespace Import_Util {
162 
163 // Note: This should get merged with the other Tpetra sort routines eventually.
164 template<typename Scalar, typename Ordinal>
165 void
166 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
167  const Teuchos::ArrayView<Ordinal> & CRS_colind,
168  const Teuchos::ArrayView<Scalar> &CRS_vals)
169 {
170  // For each row, sort column entries from smallest to largest.
171  // Use shell sort. Stable sort so it is fast if indices are already sorted.
172  // Code copied from Epetra_CrsMatrix::SortEntries()
173  size_t NumRows = CRS_rowptr.size()-1;
174  size_t nnz = CRS_colind.size();
175 
176  const bool permute_values_array = CRS_vals.size() > 0;
177 
178  for(size_t i = 0; i < NumRows; i++){
179  size_t start=CRS_rowptr[i];
180  if(start >= nnz) continue;
181 
182  size_t NumEntries = CRS_rowptr[i+1] - start;
183  Teuchos::ArrayRCP<Scalar> locValues;
184  if (permute_values_array)
185  locValues = Teuchos::arcp<Scalar>(&CRS_vals[start], 0, NumEntries, false);
186  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[start], 0, NumEntries, false);
187 
188  Ordinal n = NumEntries;
189  Ordinal m = 1;
190  while (m<n) m = m*3+1;
191  m /= 3;
192 
193  while(m > 0) {
194  Ordinal max = n - m;
195  for(Ordinal j = 0; j < max; j++) {
196  for(Ordinal k = j; k >= 0; k-=m) {
197  if(locIndices[k+m] >= locIndices[k])
198  break;
199  if (permute_values_array) {
200  Scalar dtemp = locValues[k+m];
201  locValues[k+m] = locValues[k];
202  locValues[k] = dtemp;
203  }
204  Ordinal itemp = locIndices[k+m];
205  locIndices[k+m] = locIndices[k];
206  locIndices[k] = itemp;
207  }
208  }
209  m = m/3;
210  }
211  }
212 }
213 
214 template<typename Ordinal>
215 void
216 sortCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
217  const Teuchos::ArrayView<Ordinal> & CRS_colind)
218 {
219  // Generate dummy values array
220  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
221  sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
222 }
223 
224 namespace Impl {
225 
226 template<class RowOffsetsType, class ColumnIndicesType, class ValuesType>
227 class SortCrsEntries {
228 private:
229  typedef typename ColumnIndicesType::non_const_value_type ordinal_type;
230  typedef typename ValuesType::non_const_value_type scalar_type;
231 
232 public:
233  SortCrsEntries (const RowOffsetsType& ptr,
234  const ColumnIndicesType& ind,
235  const ValuesType& val) :
236  ptr_ (ptr),
237  ind_ (ind),
238  val_ (val)
239  {
240  static_assert (std::is_signed<ordinal_type>::value, "The type of each "
241  "column index -- that is, the type of each entry of ind "
242  "-- must be signed in order for this functor to work.");
243  }
244 
245  KOKKOS_FUNCTION void operator() (const size_t i) const
246  {
247  const size_t nnz = ind_.extent (0);
248  const size_t start = ptr_(i);
249  const bool permute_values_array = val_.extent(0) > 0;
250 
251  if (start < nnz) {
252  const size_t NumEntries = ptr_(i+1) - start;
253 
254  const ordinal_type n = static_cast<ordinal_type> (NumEntries);
255  ordinal_type m = 1;
256  while (m<n) m = m*3+1;
257  m /= 3;
258 
259  while (m > 0) {
260  ordinal_type max = n - m;
261  for (ordinal_type j = 0; j < max; j++) {
262  for (ordinal_type k = j; k >= 0; k -= m) {
263  const size_t sk = start+k;
264  if (ind_(sk+m) >= ind_(sk)) {
265  break;
266  }
267  if (permute_values_array) {
268  const scalar_type dtemp = val_(sk+m);
269  val_(sk+m) = val_(sk);
270  val_(sk) = dtemp;
271  }
272  const ordinal_type itemp = ind_(sk+m);
273  ind_(sk+m) = ind_(sk);
274  ind_(sk) = itemp;
275  }
276  }
277  m = m/3;
278  }
279  }
280  }
281 
282  static void
283  sortCrsEntries (const RowOffsetsType& ptr,
284  const ColumnIndicesType& ind,
285  const ValuesType& val)
286  {
287  // For each row, sort column entries from smallest to largest.
288  // Use shell sort. Stable sort so it is fast if indices are already sorted.
289  // Code copied from Epetra_CrsMatrix::SortEntries()
290  // NOTE: This should not be taken as a particularly efficient way to sort
291  // rows of matrices in parallel. But it is correct, so that's something.
292  if (ptr.extent (0) == 0) {
293  return; // no rows, so nothing to sort
294  }
295  const size_t NumRows = ptr.extent (0) - 1;
296 
297  typedef size_t index_type; // what this function was using; not my choice
298  typedef typename ValuesType::execution_space execution_space;
299  // Specify RangePolicy explicitly, in order to use correct execution
300  // space. See #1345.
301  typedef Kokkos::RangePolicy<execution_space, index_type> range_type;
302  Kokkos::parallel_for ("sortCrsEntries", range_type (0, NumRows),
303  SortCrsEntries (ptr, ind, val));
304  }
305 
306 private:
307  RowOffsetsType ptr_;
308  ColumnIndicesType ind_;
309  ValuesType val_;
310 };
311 
312 } // namespace Impl
313 
314 template<typename rowptr_array_type, typename colind_array_type, typename vals_array_type>
315 void
316 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
317  const colind_array_type& CRS_colind,
318  const vals_array_type& CRS_vals)
319 {
320  Impl::SortCrsEntries<rowptr_array_type, colind_array_type,
321  vals_array_type>::sortCrsEntries (CRS_rowptr, CRS_colind, CRS_vals);
322 }
323 
324 template<typename rowptr_array_type, typename colind_array_type>
325 void
326 sortCrsEntries (const rowptr_array_type& CRS_rowptr,
327  const colind_array_type& CRS_colind)
328 {
329  // Generate dummy values array
330  typedef typename colind_array_type::execution_space execution_space;
331  typedef Tpetra::Details::DefaultTypes::scalar_type scalar_type;
332  typedef typename Kokkos::View<scalar_type*, execution_space> scalar_view_type;
333  scalar_view_type CRS_vals;
334  sortCrsEntries<rowptr_array_type, colind_array_type,
335  scalar_view_type>(CRS_rowptr, CRS_colind, CRS_vals);
336 }
337 
338 // Note: This should get merged with the other Tpetra sort routines eventually.
339 template<typename Scalar, typename Ordinal>
340 void
341 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
342  const Teuchos::ArrayView<Ordinal> & CRS_colind,
343  const Teuchos::ArrayView<Scalar> &CRS_vals)
344 {
345  // For each row, sort column entries from smallest to largest,
346  // merging column ids that are identify by adding values. Use shell
347  // sort. Stable sort so it is fast if indices are already sorted.
348  // Code copied from Epetra_CrsMatrix::SortEntries()
349 
350  if (CRS_rowptr.size () == 0) {
351  return; // no rows, so nothing to sort
352  }
353  const size_t NumRows = CRS_rowptr.size () - 1;
354  const size_t nnz = CRS_colind.size ();
355  size_t new_curr = CRS_rowptr[0];
356  size_t old_curr = CRS_rowptr[0];
357 
358  const bool permute_values_array = CRS_vals.size() > 0;
359 
360  for(size_t i = 0; i < NumRows; i++){
361  const size_t old_rowptr_i=CRS_rowptr[i];
362  CRS_rowptr[i] = old_curr;
363  if(old_rowptr_i >= nnz) continue;
364 
365  size_t NumEntries = CRS_rowptr[i+1] - old_rowptr_i;
366  Teuchos::ArrayRCP<Scalar> locValues;
367  if (permute_values_array)
368  locValues = Teuchos::arcp<Scalar>(&CRS_vals[old_rowptr_i], 0, NumEntries, false);
369  Teuchos::ArrayRCP<Ordinal> locIndices(&CRS_colind[old_rowptr_i], 0, NumEntries, false);
370 
371  // Sort phase
372  Ordinal n = NumEntries;
373  Ordinal m = n/2;
374 
375  while(m > 0) {
376  Ordinal max = n - m;
377  for(Ordinal j = 0; j < max; j++) {
378  for(Ordinal k = j; k >= 0; k-=m) {
379  if(locIndices[k+m] >= locIndices[k])
380  break;
381  if (permute_values_array) {
382  Scalar dtemp = locValues[k+m];
383  locValues[k+m] = locValues[k];
384  locValues[k] = dtemp;
385  }
386  Ordinal itemp = locIndices[k+m];
387  locIndices[k+m] = locIndices[k];
388  locIndices[k] = itemp;
389  }
390  }
391  m = m/2;
392  }
393 
394  // Merge & shrink
395  for(size_t j=old_rowptr_i; j < CRS_rowptr[i+1]; j++) {
396  if(j > old_rowptr_i && CRS_colind[j]==CRS_colind[new_curr-1]) {
397  if (permute_values_array) CRS_vals[new_curr-1] += CRS_vals[j];
398  }
399  else if(new_curr==j) {
400  new_curr++;
401  }
402  else {
403  CRS_colind[new_curr] = CRS_colind[j];
404  if (permute_values_array) CRS_vals[new_curr] = CRS_vals[j];
405  new_curr++;
406  }
407  }
408  old_curr=new_curr;
409  }
410 
411  CRS_rowptr[NumRows] = new_curr;
412 }
413 
414 template<typename Ordinal>
415 void
416 sortAndMergeCrsEntries (const Teuchos::ArrayView<size_t> &CRS_rowptr,
417  const Teuchos::ArrayView<Ordinal> & CRS_colind)
418 {
419  Teuchos::ArrayView<Tpetra::Details::DefaultTypes::scalar_type> CRS_vals;
420  return sortAndMergeCrsEntries(CRS_rowptr, CRS_colind, CRS_vals);
421 }
422 
423 
424 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
425 void
426 lowCommunicationMakeColMapAndReindex (const Teuchos::ArrayView<const size_t> &rowptr,
427  const Teuchos::ArrayView<LocalOrdinal> &colind_LID,
428  const Teuchos::ArrayView<GlobalOrdinal> &colind_GID,
429  const Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >& domainMapRCP,
430  const Teuchos::ArrayView<const int> &owningPIDs,
431  Teuchos::Array<int> &remotePIDs,
432  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > & colMap)
433 {
434  using Teuchos::rcp;
435  typedef LocalOrdinal LO;
436  typedef GlobalOrdinal GO;
437  typedef Tpetra::global_size_t GST;
438  typedef Tpetra::Map<LO, GO, Node> map_type;
439  const char prefix[] = "lowCommunicationMakeColMapAndReindex: ";
440 
441  // The domainMap is an RCP because there is a shortcut for a
442  // (common) special case to return the columnMap = domainMap.
443  const map_type& domainMap = *domainMapRCP;
444 
445  // Scan all column indices and sort into two groups:
446  // Local: those whose GID matches a GID of the domain map on this processor and
447  // Remote: All others.
448  const size_t numDomainElements = domainMap.getNodeNumElements ();
449  Teuchos::Array<bool> LocalGIDs;
450  if (numDomainElements > 0) {
451  LocalGIDs.resize (numDomainElements, false); // Assume domain GIDs are not local
452  }
453 
454  // In principle it is good to have RemoteGIDs and RemotGIDList be as
455  // long as the number of remote GIDs on this processor, but this
456  // would require two passes through the column IDs, so we make it
457  // the max of 100 and the number of block rows.
458  //
459  // FIXME (mfh 11 Feb 2015) Tpetra::Details::HashTable can hold at
460  // most INT_MAX entries, but it's possible to have more rows than
461  // that (if size_t is 64 bits and int is 32 bits).
462  const size_t numMyRows = rowptr.size () - 1;
463  const int hashsize = std::max (static_cast<int> (numMyRows), 100);
464 
465  Tpetra::Details::HashTable<GO, LO> RemoteGIDs (hashsize);
466  Teuchos::Array<GO> RemoteGIDList;
467  RemoteGIDList.reserve (hashsize);
468  Teuchos::Array<int> PIDList;
469  PIDList.reserve (hashsize);
470 
471  // Here we start using the *LocalOrdinal* colind_LID array. This is
472  // safe even if both columnIndices arrays are actually the same
473  // (because LocalOrdinal==GO). For *local* GID's set
474  // colind_LID with with their LID in the domainMap. For *remote*
475  // GIDs, we set colind_LID with (numDomainElements+NumRemoteColGIDs)
476  // before the increment of the remote count. These numberings will
477  // be separate because no local LID is greater than
478  // numDomainElements.
479 
480  size_t NumLocalColGIDs = 0;
481  LO NumRemoteColGIDs = 0;
482  for (size_t i = 0; i < numMyRows; ++i) {
483  for(size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
484  const GO GID = colind_GID[j];
485  // Check if GID matches a row GID
486  const LO LID = domainMap.getLocalElement (GID);
487  if(LID != -1) {
488  const bool alreadyFound = LocalGIDs[LID];
489  if (! alreadyFound) {
490  LocalGIDs[LID] = true; // There is a column in the graph associated with this domain map GID
491  NumLocalColGIDs++;
492  }
493  colind_LID[j] = LID;
494  }
495  else {
496  const LO hash_value = RemoteGIDs.get (GID);
497  if (hash_value == -1) { // This means its a new remote GID
498  const int PID = owningPIDs[j];
499  TEUCHOS_TEST_FOR_EXCEPTION(
500  PID == -1, std::invalid_argument, prefix << "Cannot figure out if "
501  "PID is owned.");
502  colind_LID[j] = static_cast<LO> (numDomainElements + NumRemoteColGIDs);
503  RemoteGIDs.add (GID, NumRemoteColGIDs);
504  RemoteGIDList.push_back (GID);
505  PIDList.push_back (PID);
506  NumRemoteColGIDs++;
507  }
508  else {
509  colind_LID[j] = static_cast<LO> (numDomainElements + hash_value);
510  }
511  }
512  }
513  }
514 
515  // Possible short-circuit: If all domain map GIDs are present as
516  // column indices, then set ColMap=domainMap and quit.
517  if (domainMap.getComm ()->getSize () == 1) {
518  // Sanity check: When there is only one process, there can be no
519  // remoteGIDs.
520  TEUCHOS_TEST_FOR_EXCEPTION(
521  NumRemoteColGIDs != 0, std::runtime_error, prefix << "There is only one "
522  "process in the domain Map's communicator, which means that there are no "
523  "\"remote\" indices. Nevertheless, some column indices are not in the "
524  "domain Map.");
525  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
526  // In this case, we just use the domainMap's indices, which is,
527  // not coincidently, what we clobbered colind with up above
528  // anyway. No further reindexing is needed.
529  colMap = domainMapRCP;
530  return;
531  }
532  }
533 
534  // Now build the array containing column GIDs
535  // Build back end, containing remote GIDs, first
536  const LO numMyCols = NumLocalColGIDs + NumRemoteColGIDs;
537  Teuchos::Array<GO> ColIndices;
538  GO* RemoteColIndices = NULL;
539  if (numMyCols > 0) {
540  ColIndices.resize (numMyCols);
541  if (NumLocalColGIDs != static_cast<size_t> (numMyCols)) {
542  RemoteColIndices = &ColIndices[NumLocalColGIDs]; // Points to back half of ColIndices
543  }
544  }
545 
546  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
547  RemoteColIndices[i] = RemoteGIDList[i];
548  }
549 
550  // Build permute array for *remote* reindexing.
551  Teuchos::Array<LO> RemotePermuteIDs (NumRemoteColGIDs);
552  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
553  RemotePermuteIDs[i]=i;
554  }
555 
556  // Sort External column indices so that all columns coming from a
557  // given remote processor are contiguous. This is a sort with two
558  // auxilary arrays: RemoteColIndices and RemotePermuteIDs.
559  Tpetra::sort3 (PIDList.begin (), PIDList.end (),
560  ColIndices.begin () + NumLocalColGIDs,
561  RemotePermuteIDs.begin ());
562 
563  // Stash the RemotePIDs.
564  //
565  // Note: If Teuchos::Array had a shrink_to_fit like std::vector,
566  // we'd call it here.
567  remotePIDs = PIDList;
568 
569  // Sort external column indices so that columns from a given remote
570  // processor are not only contiguous but also in ascending
571  // order. NOTE: I don't know if the number of externals associated
572  // with a given remote processor is known at this point ... so I
573  // count them here.
574 
575  // NTS: Only sort the RemoteColIndices this time...
576  LO StartCurrent = 0, StartNext = 1;
577  while (StartNext < NumRemoteColGIDs) {
578  if (PIDList[StartNext]==PIDList[StartNext-1]) {
579  StartNext++;
580  }
581  else {
582  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
583  ColIndices.begin () + NumLocalColGIDs + StartNext,
584  RemotePermuteIDs.begin () + StartCurrent);
585  StartCurrent = StartNext;
586  StartNext++;
587  }
588  }
589  Tpetra::sort2 (ColIndices.begin () + NumLocalColGIDs + StartCurrent,
590  ColIndices.begin () + NumLocalColGIDs + StartNext,
591  RemotePermuteIDs.begin () + StartCurrent);
592 
593  // Reverse the permutation to get the information we actually care about
594  Teuchos::Array<LO> ReverseRemotePermuteIDs (NumRemoteColGIDs);
595  for (LO i = 0; i < NumRemoteColGIDs; ++i) {
596  ReverseRemotePermuteIDs[RemotePermuteIDs[i]] = i;
597  }
598 
599  // Build permute array for *local* reindexing.
600  bool use_local_permute = false;
601  Teuchos::Array<LO> LocalPermuteIDs (numDomainElements);
602 
603  // Now fill front end. Two cases:
604  //
605  // (1) If the number of Local column GIDs is the same as the number
606  // of Local domain GIDs, we can simply read the domain GIDs into
607  // the front part of ColIndices, otherwise
608  //
609  // (2) We step through the GIDs of the domainMap, checking to see if
610  // each domain GID is a column GID. we want to do this to
611  // maintain a consistent ordering of GIDs between the columns
612  // and the domain.
613  Teuchos::ArrayView<const GO> domainGlobalElements = domainMap.getNodeElementList();
614  if (static_cast<size_t> (NumLocalColGIDs) == numDomainElements) {
615  if (NumLocalColGIDs > 0) {
616  // Load Global Indices into first numMyCols elements column GID list
617  std::copy (domainGlobalElements.begin (), domainGlobalElements.end (),
618  ColIndices.begin ());
619  }
620  }
621  else {
622  LO NumLocalAgain = 0;
623  use_local_permute = true;
624  for (size_t i = 0; i < numDomainElements; ++i) {
625  if (LocalGIDs[i]) {
626  LocalPermuteIDs[i] = NumLocalAgain;
627  ColIndices[NumLocalAgain++] = domainGlobalElements[i];
628  }
629  }
630  TEUCHOS_TEST_FOR_EXCEPTION(
631  static_cast<size_t> (NumLocalAgain) != NumLocalColGIDs,
632  std::runtime_error, prefix << "Local ID count test failed.");
633  }
634 
635  // Make column Map
636  const GST minus_one = Teuchos::OrdinalTraits<GST>::invalid ();
637  colMap = rcp (new map_type (minus_one, ColIndices, domainMap.getIndexBase (),
638  domainMap.getComm (), domainMap.getNode ()));
639 
640  // Low-cost reindex of the matrix
641  for (size_t i = 0; i < numMyRows; ++i) {
642  for (size_t j = rowptr[i]; j < rowptr[i+1]; ++j) {
643  const LO ID = colind_LID[j];
644  if (static_cast<size_t> (ID) < numDomainElements) {
645  if (use_local_permute) {
646  colind_LID[j] = LocalPermuteIDs[colind_LID[j]];
647  }
648  // In the case where use_local_permute==false, we just copy
649  // the DomainMap's ordering, which it so happens is what we
650  // put in colind_LID to begin with.
651  }
652  else {
653  colind_LID[j] = NumLocalColGIDs + ReverseRemotePermuteIDs[colind_LID[j]-numDomainElements];
654  }
655  }
656  }
657 }
658 
659 
660 
661 
662 // Generates an list of owning PIDs based on two transfer (aka import/export objects)
663 // Let:
664 // OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
665 // MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
666 // MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
667 // VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
668 // Precondition:
669 // 1) MapAo.isSameAs(*MapAm) - map compatibility between transfers
670 // 2) VectorMap->isSameAs(*owningPIDs->getMap()) - map compabibility between transfer & vector
671 // 3) OwningMap->isOneToOne() - owning map is 1-to-1
672 // --- Precondition 3 is only checked in DEBUG mode ---
673 // Postcondition:
674 // owningPIDs[VectorMap->getLocalElement(GID i)] = j iff (OwningMap->isLocalElement(GID i) on rank j)
675 template <typename LocalOrdinal, typename GlobalOrdinal, typename Node>
676 void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesOwnership,
677  bool useReverseModeForOwnership,
678  const ::Tpetra::Details::Transfer<LocalOrdinal, GlobalOrdinal, Node>& transferThatDefinesMigration,
679  bool useReverseModeForMigration,
683 
684  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > OwningMap = useReverseModeForOwnership ? transferThatDefinesOwnership.getTargetMap() : transferThatDefinesOwnership.getSourceMap();
685  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAo = useReverseModeForOwnership ? transferThatDefinesOwnership.getSourceMap() : transferThatDefinesOwnership.getTargetMap();
686  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > MapAm = useReverseModeForMigration ? transferThatDefinesMigration.getTargetMap() : transferThatDefinesMigration.getSourceMap();
687  Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > VectorMap = useReverseModeForMigration ? transferThatDefinesMigration.getSourceMap() : transferThatDefinesMigration.getTargetMap();
688 
689  TEUCHOS_TEST_FOR_EXCEPTION(!MapAo->isSameAs(*MapAm),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch between transfers");
690  TEUCHOS_TEST_FOR_EXCEPTION(!VectorMap->isSameAs(*owningPIDs.getMap()),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector map mismatch transfer and vector");
691 #ifdef HAVE_TPETRA_DEBUG
692  TEUCHOS_TEST_FOR_EXCEPTION(!OwningMap->isOneToOne(),std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");
693 #endif
694 
695  int rank = OwningMap->getComm()->getRank();
696  // Generate "A" vector and fill it with owning information. We can read this from transferThatDefinesOwnership w/o communication
697  // Note: Due to the 1-to-1 requirement, several of these options throw
699  const import_type* ownAsImport = dynamic_cast<const import_type*> (&transferThatDefinesOwnership);
700  const export_type* ownAsExport = dynamic_cast<const export_type*> (&transferThatDefinesOwnership);
701 
702  Teuchos::ArrayRCP<int> pids = temp.getDataNonConst();
703  Teuchos::ArrayView<int> v_pids = pids();
704  if(ownAsImport && useReverseModeForOwnership) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
705  else if(ownAsImport && !useReverseModeForOwnership) getPids(*ownAsImport,v_pids,false);
706  else if(ownAsExport && useReverseModeForMigration) {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector this option not yet implemented");}
707  else {TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector owner must be 1-to-1");}
708 
709  const import_type* xferAsImport = dynamic_cast<const import_type*> (&transferThatDefinesMigration);
710  const export_type* xferAsExport = dynamic_cast<const export_type*> (&transferThatDefinesMigration);
711  TEUCHOS_TEST_FOR_EXCEPTION(!xferAsImport && !xferAsExport,std::runtime_error,"Tpetra::Import_Util::getTwoTransferOwnershipVector transfer undefined");
712 
713  // Migrate from "A" vector to output vector
714  owningPIDs.putScalar(rank);
715  if(xferAsImport && useReverseModeForMigration) owningPIDs.doExport(temp,*xferAsImport,Tpetra::REPLACE);
716  else if(xferAsImport && !useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsImport,Tpetra::REPLACE);
717  else if(xferAsExport && useReverseModeForMigration) owningPIDs.doImport(temp,*xferAsExport,Tpetra::REPLACE);
718  else owningPIDs.doExport(temp,*xferAsExport,Tpetra::REPLACE);
719 
720 }
721 
722 
723 
724 } // namespace Import_Util
725 } // namespace Tpetra
726 
727 #endif // TPETRA_IMPORT_UTIL_HPP
void getTwoTransferOwnershipVector(const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferThatDefinesOwnership, bool useReverseModeForOwnership, const ::Tpetra::Details::Transfer< LocalOrdinal, GlobalOrdinal, Node > &transferForMigratingData, bool useReverseModeForMigration, Tpetra::Vector< int, LocalOrdinal, GlobalOrdinal, Node > &owningPIDs)
Generates an list of owning PIDs based on two transfer (aka import/export objects) Let: OwningMap = u...
void sortAndMergeCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort and merge the entries of the (raw CSR) matrix by column index within each row.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Communication plan for data redistribution from a (possibly) multiply-owned to a uniquely-owned distr...
void doExport(const SrcDistObject &source, const Export< LocalOrdinal, GlobalOrdinal, Node > &exporter, CombineMode CM)
Export data into this object using an Export object ("forward mode").
A distributed dense vector.
void sort3(const IT1 &first1, const IT1 &last1, const IT2 &first2, const IT3 &first3)
Sort the first array, and apply the same permutation to the second and third arrays.
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
virtual Teuchos::RCP< const map_type > getMap() const
The Map describing the parallel distribution of this object.
void doImport(const SrcDistObject &source, const Import< LocalOrdinal, GlobalOrdinal, Node > &importer, CombineMode CM)
Import data into this object using an Import object ("forward mode").
void sortCrsEntries(const Teuchos::ArrayView< size_t > &CRS_rowptr, const Teuchos::ArrayView< Ordinal > &CRS_colind, const Teuchos::ArrayView< Scalar > &CRS_vals)
Sort the entries of the (raw CSR) matrix by column index within each row.
void lowCommunicationMakeColMapAndReindex(const Teuchos::ArrayView< const size_t > &rowPointers, const Teuchos::ArrayView< LocalOrdinal > &columnIndices_LID, const Teuchos::ArrayView< GlobalOrdinal > &columnIndices_GID, const Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &domainMap, const Teuchos::ArrayView< const int > &owningPids, Teuchos::Array< int > &remotePids, Teuchos::RCP< const Tpetra::Map< LocalOrdinal, GlobalOrdinal, Node > > &colMap)
lowCommunicationMakeColMapAndReindex
size_t global_size_t
Global size_t object.
Teuchos::RCP< const map_type > getTargetMap() const
The target Map used to construct this Export.
void putScalar(const Scalar &value)
Set all values in the multivector with the given value.
Declaration and definition of Tpetra::Details::reallocDualViewIfNeeded, an implementation detail of T...
Replace existing values with new values.
Teuchos::ArrayRCP< Scalar > getDataNonConst()
View of the local values of this vector.
void sort2(const IT1 &first1, const IT1 &last1, const IT2 &first2)
Sort the first array, and apply the resulting permutation to the second array.
ValueType get(const KeyType key)
Get the value corresponding to the given key.
Communication plan for data redistribution from a uniquely-owned to a (possibly) multiply-owned distr...
void add(const KeyType key, const ValueType value)
Add a key and its value to the hash table.
Stand-alone utility functions and macros.
A parallel distribution of indices over processes.
void getPids(const Tpetra::Import< LocalOrdinal, GlobalOrdinal, Node > &Importer, Teuchos::Array< int > &pids, bool use_minus_one_for_local)
Like getPidGidPairs, but just gets the PIDs, ordered by the column Map.