Tpetra parallel linear algebra  Version of the Day
Tpetra_Map_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Tpetra: Templated Linear Algebra Services Package
5 // Copyright (2008) Sandia Corporation
6 //
7 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8 // the U.S. Government retains certain rights in this software.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ************************************************************************
40 // @HEADER
41 
46 
47 #ifndef TPETRA_MAP_DEF_HPP
48 #define TPETRA_MAP_DEF_HPP
49 
50 #include "Tpetra_Directory.hpp" // must include for implicit instantiation to work
51 #include "Tpetra_Details_FixedHashTable.hpp"
52 #include "Tpetra_Details_gathervPrint.hpp"
54 #include "Tpetra_Core.hpp"
55 #include "Tpetra_Util.hpp"
56 #include "Teuchos_as.hpp"
57 #include "Teuchos_TypeNameTraits.hpp"
58 #include "Teuchos_CommHelpers.hpp"
59 #include "Tpetra_Details_mpiIsInitialized.hpp"
60 #include "Tpetra_Details_extractMpiCommFromTeuchos.hpp" // teuchosCommIsAnMpiComm
62 #include <stdexcept>
63 #include <typeinfo>
64 
65 namespace Tpetra {
66 namespace Classes {
67 
68  template <class LocalOrdinal, class GlobalOrdinal, class Node>
70  Map () :
71  comm_ (new Teuchos::SerialComm<int> ()),
72  indexBase_ (0),
73  numGlobalElements_ (0),
74  numLocalElements_ (0),
75  minMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
76  maxMyGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
77  minAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
78  maxAllGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
79  firstContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
80  lastContiguousGID_ (Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ()),
81  uniform_ (false), // trivially
82  contiguous_ (false),
83  distributed_ (false), // no communicator yet
84  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
85  {
87  }
88 
89  template <class LocalOrdinal, class GlobalOrdinal, class Node>
91  Map (global_size_t numGlobalElements,
92  GlobalOrdinal indexBase,
93  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
94  LocalGlobal lOrG,
95  const Teuchos::RCP<Node> &node) :
96  comm_ (comm),
97  uniform_ (true),
98  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
99  {
100  using Teuchos::as;
101  using Teuchos::broadcast;
102  using Teuchos::outArg;
103  using Teuchos::reduceAll;
104  using Teuchos::REDUCE_MIN;
105  using Teuchos::REDUCE_MAX;
106  using Teuchos::typeName;
107  typedef GlobalOrdinal GO;
108  typedef global_size_t GST;
109  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
110 
112 
113 #ifdef HAVE_TPETRA_DEBUG
114  // In debug mode only, check whether numGlobalElements and
115  // indexBase are the same over all processes in the communicator.
116  {
117  GST proc0NumGlobalElements = numGlobalElements;
118  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
119  GST minNumGlobalElements = numGlobalElements;
120  GST maxNumGlobalElements = numGlobalElements;
121  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements, outArg (minNumGlobalElements));
122  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements, outArg (maxNumGlobalElements));
123  TEUCHOS_TEST_FOR_EXCEPTION(
124  minNumGlobalElements != maxNumGlobalElements || numGlobalElements != minNumGlobalElements,
125  std::invalid_argument,
126  "Tpetra::Map constructor: All processes must provide the same number "
127  "of global elements. Process 0 set numGlobalElements = "
128  << proc0NumGlobalElements << ". The calling process "
129  << comm->getRank () << " set numGlobalElements = " << numGlobalElements
130  << ". The min and max values over all processes are "
131  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
132 
133  GO proc0IndexBase = indexBase;
134  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
135  GO minIndexBase = indexBase;
136  GO maxIndexBase = indexBase;
137  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
138  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
139  TEUCHOS_TEST_FOR_EXCEPTION(
140  minIndexBase != maxIndexBase || indexBase != minIndexBase,
141  std::invalid_argument,
142  "Tpetra::Map constructor: "
143  "All processes must provide the same indexBase argument. "
144  "Process 0 set indexBase = " << proc0IndexBase << ". The calling "
145  "process " << comm->getRank () << " set indexBase = " << indexBase
146  << ". The min and max values over all processes are "
147  << minIndexBase << " resp. " << maxIndexBase << ".");
148  }
149 #endif // HAVE_TPETRA_DEBUG
150 
151  // Distribute the elements across the processes in the given
152  // communicator so that global IDs (GIDs) are
153  //
154  // - Nonoverlapping (only one process owns each GID)
155  // - Contiguous (the sequence of GIDs is nondecreasing, and no two
156  // adjacent GIDs differ by more than one)
157  // - As evenly distributed as possible (the numbers of GIDs on two
158  // different processes do not differ by more than one)
159 
160  // All processes have the same numGlobalElements, but we still
161  // need to check that it is valid. numGlobalElements must be
162  // positive and not the "invalid" value (GSTI).
163  //
164  // This comparison looks funny, but it avoids compiler warnings
165  // for comparing unsigned integers (numGlobalElements_in is a
166  // GST, which is unsigned) while still working if we
167  // later decide to make GST signed.
168  TEUCHOS_TEST_FOR_EXCEPTION(
169  (numGlobalElements < 1 && numGlobalElements != 0),
170  std::invalid_argument,
171  "Tpetra::Map constructor: numGlobalElements (= "
172  << numGlobalElements << ") must be nonnegative.");
173 
174  TEUCHOS_TEST_FOR_EXCEPTION(
175  numGlobalElements == GSTI, std::invalid_argument,
176  "Tpetra::Map constructor: You provided numGlobalElements = Teuchos::"
177  "OrdinalTraits<Tpetra::global_size_t>::invalid(). This version of the "
178  "constructor requires a valid value of numGlobalElements. You "
179  "probably mistook this constructor for the \"contiguous nonuniform\" "
180  "constructor, which can compute the global number of elements for you "
181  "if you set numGlobalElements to that value.");
182 
183  size_t numLocalElements = 0; // will set below
184  if (lOrG == GloballyDistributed) {
185  // Compute numLocalElements:
186  //
187  // If numGlobalElements == numProcs * B + remainder,
188  // then Proc r gets B+1 elements if r < remainder,
189  // and B elements if r >= remainder.
190  //
191  // This strategy is valid for any value of numGlobalElements and
192  // numProcs, including the following border cases:
193  // - numProcs == 1
194  // - numLocalElements < numProcs
195  //
196  // In the former case, remainder == 0 && numGlobalElements ==
197  // numLocalElements. In the latter case, remainder ==
198  // numGlobalElements && numLocalElements is either 0 or 1.
199  const GST numProcs = static_cast<GST> (comm_->getSize ());
200  const GST myRank = static_cast<GST> (comm_->getRank ());
201  const GST quotient = numGlobalElements / numProcs;
202  const GST remainder = numGlobalElements - quotient * numProcs;
203 
204  GO startIndex;
205  if (myRank < remainder) {
206  numLocalElements = static_cast<size_t> (1) + static_cast<size_t> (quotient);
207  // myRank was originally an int, so it should never overflow
208  // reasonable GO types.
209  startIndex = as<GO> (myRank) * as<GO> (numLocalElements);
210  } else {
211  numLocalElements = as<size_t> (quotient);
212  startIndex = as<GO> (myRank) * as<GO> (numLocalElements) +
213  as<GO> (remainder);
214  }
215 
216  minMyGID_ = indexBase + startIndex;
217  maxMyGID_ = indexBase + startIndex + numLocalElements - 1;
218  minAllGID_ = indexBase;
219  maxAllGID_ = indexBase + numGlobalElements - 1;
220  distributed_ = (numProcs > 1);
221  }
222  else { // lOrG == LocallyReplicated
223  numLocalElements = as<size_t> (numGlobalElements);
224  minMyGID_ = indexBase;
225  maxMyGID_ = indexBase + numGlobalElements - 1;
226  distributed_ = false;
227  }
228 
229  minAllGID_ = indexBase;
230  maxAllGID_ = indexBase + numGlobalElements - 1;
231  indexBase_ = indexBase;
232  numGlobalElements_ = numGlobalElements;
233  numLocalElements_ = numLocalElements;
234  firstContiguousGID_ = minMyGID_;
235  lastContiguousGID_ = maxMyGID_;
236  contiguous_ = true;
237 
238  // Create the Directory on demand in getRemoteIndexList().
239  //setupDirectory ();
240  }
241 
242  template <class LocalOrdinal, class GlobalOrdinal, class Node>
244  Map (global_size_t numGlobalElements,
245  size_t numLocalElements,
246  GlobalOrdinal indexBase,
247  const Teuchos::RCP<const Teuchos::Comm<int> > &comm,
248  const Teuchos::RCP<Node> &node) :
249  comm_ (comm),
250  uniform_ (false),
251  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
252  {
253  using Teuchos::as;
254  using Teuchos::broadcast;
255  using Teuchos::outArg;
256  using Teuchos::reduceAll;
257  using Teuchos::REDUCE_MIN;
258  using Teuchos::REDUCE_MAX;
259  using Teuchos::REDUCE_SUM;
260  using Teuchos::scan;
261  typedef GlobalOrdinal GO;
262  typedef global_size_t GST;
263  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
264 
266 
267 #ifdef HAVE_TPETRA_DEBUG
268  // Global sum of numLocalElements over all processes.
269  // Keep this for later debug checks.
270  const GST debugGlobalSum =
271  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
272  indexBase, comm);
273 #endif // HAVE_TPETRA_DEBUG
274 
275  // Distribute the elements across the nodes so that they are
276  // - non-overlapping
277  // - contiguous
278 
279  // This differs from the first Map constructor (that only takes a
280  // global number of elements) in that the user has specified the
281  // number of local elements, so that the elements are not
282  // (necessarily) evenly distributed over the processes.
283 
284  // Compute my local offset. This is an inclusive scan, so to get
285  // the final offset, we subtract off the input.
286  GO scanResult = 0;
287  scan<int, GO> (*comm, REDUCE_SUM, numLocalElements, outArg (scanResult));
288  const GO myOffset = scanResult - numLocalElements;
289 
290  if (numGlobalElements != GSTI) {
291  numGlobalElements_ = numGlobalElements; // Use the user's value.
292  } else {
293  // Inclusive scan means that the last process has the final sum.
294  // Rather than doing a reduceAll to get the sum of
295  // numLocalElements, we can just have the last process broadcast
296  // its result. That saves us a round of log(numProcs) messages.
297  const int numProcs = comm->getSize ();
298  GST globalSum = scanResult;
299  if (numProcs > 1) {
300  broadcast (*comm, numProcs - 1, outArg (globalSum));
301  }
302  numGlobalElements_ = globalSum;
303 
304 #ifdef HAVE_TPETRA_DEBUG
305  // No need for an all-reduce here; both come from collectives.
306  TEUCHOS_TEST_FOR_EXCEPTION(
307  globalSum != debugGlobalSum, std::logic_error,
308  "Tpetra::Map constructor (contiguous nonuniform): "
309  "globalSum = " << globalSum << " != debugGlobalSum = " << debugGlobalSum
310  << ". Please report this bug to the Tpetra developers.");
311 #endif // HAVE_TPETRA_DEBUG
312  }
313  numLocalElements_ = numLocalElements;
314  indexBase_ = indexBase;
315  minAllGID_ = (numGlobalElements_ == 0) ?
316  std::numeric_limits<GO>::max () :
317  indexBase;
318  maxAllGID_ = (numGlobalElements_ == 0) ?
319  std::numeric_limits<GO>::lowest () :
320  indexBase + static_cast<GO> (numGlobalElements_) - static_cast<GO> (1);
321  minMyGID_ = (numLocalElements_ == 0) ?
322  std::numeric_limits<GO>::max () :
323  indexBase + static_cast<GO> (myOffset);
324  maxMyGID_ = (numLocalElements_ == 0) ?
325  std::numeric_limits<GO>::lowest () :
326  indexBase + myOffset + static_cast<GO> (numLocalElements) - static_cast<GO> (1);
327  firstContiguousGID_ = minMyGID_;
328  lastContiguousGID_ = maxMyGID_;
329  contiguous_ = true;
330  distributed_ = checkIsDist ();
331 
332  // Create the Directory on demand in getRemoteIndexList().
333  //setupDirectory ();
334  }
335 
336  template <class LocalOrdinal, class GlobalOrdinal, class Node>
339  initialNonuniformDebugCheck (const global_size_t numGlobalElements,
340  const size_t numLocalElements,
341  const GlobalOrdinal indexBase,
342  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) const
343  {
344 #ifdef HAVE_TPETRA_DEBUG
345  using Teuchos::broadcast;
346  using Teuchos::outArg;
347  using Teuchos::ptr;
348  using Teuchos::REDUCE_MAX;
349  using Teuchos::REDUCE_MIN;
350  using Teuchos::REDUCE_SUM;
351  using Teuchos::reduceAll;
352  typedef GlobalOrdinal GO;
353  typedef global_size_t GST;
354  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
355 
356  // The user has specified the distribution of indices over the
357  // processes. The distribution is not necessarily contiguous or
358  // equally shared over the processes.
359  //
360  // We assume that the number of local elements can be stored in a
361  // size_t. The instance member numLocalElements_ is a size_t, so
362  // this variable and that should have the same type.
363 
364  GST debugGlobalSum = 0; // Will be global sum of numLocalElements
365  reduceAll<int, GST> (*comm, REDUCE_SUM, static_cast<GST> (numLocalElements),
366  outArg (debugGlobalSum));
367  // In debug mode only, check whether numGlobalElements and
368  // indexBase are the same over all processes in the communicator.
369  {
370  GST proc0NumGlobalElements = numGlobalElements;
371  broadcast<int, GST> (*comm_, 0, outArg (proc0NumGlobalElements));
372  GST minNumGlobalElements = numGlobalElements;
373  GST maxNumGlobalElements = numGlobalElements;
374  reduceAll<int, GST> (*comm, REDUCE_MIN, numGlobalElements,
375  outArg (minNumGlobalElements));
376  reduceAll<int, GST> (*comm, REDUCE_MAX, numGlobalElements,
377  outArg (maxNumGlobalElements));
378  TEUCHOS_TEST_FOR_EXCEPTION(
379  minNumGlobalElements != maxNumGlobalElements ||
380  numGlobalElements != minNumGlobalElements,
381  std::invalid_argument,
382  "Tpetra::Map constructor: All processes must provide the same number "
383  "of global elements. This is true even if that argument is Teuchos::"
384  "OrdinalTraits<global_size_t>::invalid() to signal that the Map should "
385  "compute the global number of elements. Process 0 set numGlobalElements"
386  " = " << proc0NumGlobalElements << ". The calling process "
387  << comm->getRank () << " set numGlobalElements = " << numGlobalElements
388  << ". The min and max values over all processes are "
389  << minNumGlobalElements << " resp. " << maxNumGlobalElements << ".");
390 
391  GO proc0IndexBase = indexBase;
392  broadcast<int, GO> (*comm_, 0, outArg (proc0IndexBase));
393  GO minIndexBase = indexBase;
394  GO maxIndexBase = indexBase;
395  reduceAll<int, GO> (*comm, REDUCE_MIN, indexBase, outArg (minIndexBase));
396  reduceAll<int, GO> (*comm, REDUCE_MAX, indexBase, outArg (maxIndexBase));
397  TEUCHOS_TEST_FOR_EXCEPTION(
398  minIndexBase != maxIndexBase || indexBase != minIndexBase,
399  std::invalid_argument,
400  "Tpetra::Map constructor: "
401  "All processes must provide the same indexBase argument. "
402  "Process 0 set indexBase = " << proc0IndexBase << ". The calling "
403  "process " << comm->getRank () << " set indexBase = " << indexBase
404  << ". The min and max values over all processes are "
405  << minIndexBase << " resp. " << maxIndexBase << ".");
406 
407  // Make sure that the sum of numLocalElements over all processes
408  // equals numGlobalElements.
409  TEUCHOS_TEST_FOR_EXCEPTION
410  (numGlobalElements != GSTI && debugGlobalSum != numGlobalElements,
411  std::invalid_argument, "Tpetra::Map constructor: The sum of each "
412  "process' number of indices over all processes, " << debugGlobalSum
413  << " != numGlobalElements = " << numGlobalElements << ". If you "
414  "would like this constructor to compute numGlobalElements for you, "
415  "you may set numGlobalElements = "
416  "Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid() on input. "
417  "Please note that this is NOT necessarily -1.");
418  }
419 
420  return debugGlobalSum;
421 #else
422  return static_cast<global_size_t> (0);
423 #endif // HAVE_TPETRA_DEBUG
424  }
425 
426  template <class LocalOrdinal, class GlobalOrdinal, class Node>
427  void
429  initWithNonownedHostIndexList (const global_size_t numGlobalElements,
430  const Kokkos::View<const GlobalOrdinal*,
431  Kokkos::LayoutLeft,
432  Kokkos::HostSpace,
433  Kokkos::MemoryUnmanaged>& entryList_host,
434  const GlobalOrdinal indexBase,
435  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
436  {
437  using Kokkos::LayoutLeft;
438  using Kokkos::subview;
439  using Kokkos::View;
440  using Teuchos::as;
441  using Teuchos::broadcast;
442  using Teuchos::outArg;
443  using Teuchos::ptr;
444  using Teuchos::REDUCE_MAX;
445  using Teuchos::REDUCE_MIN;
446  using Teuchos::REDUCE_SUM;
447  using Teuchos::reduceAll;
448  typedef LocalOrdinal LO;
449  typedef GlobalOrdinal GO;
450  typedef global_size_t GST;
451  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
452 
453  // Make sure that Kokkos has been initialized (Github Issue #513).
454  TEUCHOS_TEST_FOR_EXCEPTION
455  (! Kokkos::is_initialized (), std::runtime_error,
456  "Tpetra::Map constructor: The Kokkos execution space "
457  << Teuchos::TypeNameTraits<execution_space>::name ()
458  << " has not been initialized. "
459  "Please initialize it before creating a Map.")
460 
461  // The user has specified the distribution of indices over the
462  // processes, via the input array of global indices on each
463  // process. The distribution is not necessarily contiguous or
464  // equally shared over the processes.
465 
466  // The length of the input array on this process is the number of
467  // local indices to associate with this process, even though the
468  // input array contains global indices. We assume that the number
469  // of local indices on a process can be stored in a size_t;
470  // numLocalElements_ is a size_t, so this variable and that should
471  // have the same type.
472  const size_t numLocalElements = static_cast<size_t> (entryList_host.size ());
473 
474  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
475  indexBase, comm);
476 
477  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
478  // reduction is redundant, since the directory Map will have to do
479  // the same thing. Thus, we could do the scan and broadcast for
480  // the directory Map here, and give the computed offsets to the
481  // directory Map's constructor. However, a reduction costs less
482  // than a scan and broadcast, so this still saves time if users of
483  // this Map don't ever need the Directory (i.e., if they never
484  // call getRemoteIndexList on this Map).
485  if (numGlobalElements != GSTI) {
486  numGlobalElements_ = numGlobalElements; // Use the user's value.
487  }
488  else { // The user wants us to compute the sum.
489  reduceAll<int, GST> (*comm, REDUCE_SUM,
490  static_cast<GST> (numLocalElements),
491  outArg (numGlobalElements_));
492  }
493 
494  // mfh 20 Feb 2013: We've never quite done the right thing for
495  // duplicate GIDs here. Duplicate GIDs have always been counted
496  // distinctly in numLocalElements_, and thus should get a
497  // different LID. However, we've always used std::map or a hash
498  // table for the GID -> LID lookup table, so distinct GIDs always
499  // map to the same LID. Furthermore, the order of the input GID
500  // list matters, so it's not desirable to sort for determining
501  // uniqueness.
502  //
503  // I've chosen for now to write this code as if the input GID list
504  // contains no duplicates. If this is not desired, we could use
505  // the lookup table itself to determine uniqueness: If we haven't
506  // seen the GID before, it gets a new LID and it's added to the
507  // LID -> GID and GID -> LID tables. If we have seen the GID
508  // before, it doesn't get added to either table. I would
509  // implement this, but it would cost more to do the double lookups
510  // in the table (one to check, and one to insert).
511  //
512  // More importantly, since we build the GID -> LID table in (a
513  // thread-) parallel (way), the order in which duplicate GIDs may
514  // get inserted is not defined. This would make the assignment of
515  // LID to GID nondeterministic.
516 
517  numLocalElements_ = numLocalElements;
518  indexBase_ = indexBase;
519 
520  minMyGID_ = indexBase_;
521  maxMyGID_ = indexBase_;
522 
523  // NOTE (mfh 27 May 2015): While finding the initial contiguous
524  // GID range requires looking at all the GIDs in the range,
525  // dismissing an interval of GIDs only requires looking at the
526  // first and last GIDs. Thus, we could do binary search backwards
527  // from the end in order to catch the common case of a contiguous
528  // interval followed by noncontiguous entries. On the other hand,
529  // we could just expose this case explicitly as yet another Map
530  // constructor, and avoid the trouble of detecting it.
531  if (numLocalElements_ > 0) {
532  // Find contiguous GID range, with the restriction that the
533  // beginning of the range starts with the first entry. While
534  // doing so, fill in the LID -> GID table.
535  View<GO*, LayoutLeft, device_type> lgMap ("lgMap", numLocalElements_);
536  auto lgMap_host = Kokkos::create_mirror_view (lgMap);
537 
538  // The input array entryList_host is already on host, so we
539  // don't need to take a host view of it.
540  // auto entryList_host = Kokkos::create_mirror_view (entryList);
541  // Kokkos::deep_copy (entryList_host, entryList);
542 
543  firstContiguousGID_ = entryList_host[0];
544  lastContiguousGID_ = firstContiguousGID_+1;
545 
546  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
547  // anyway, so we have to look at them all. The logical way to
548  // find the first noncontiguous entry would thus be to "reduce,"
549  // where the local reduction result is whether entryList[i] + 1
550  // == entryList[i+1].
551 
552  lgMap_host[0] = firstContiguousGID_;
553  size_t i = 1;
554  for ( ; i < numLocalElements_; ++i) {
555  const GO curGid = entryList_host[i];
556  const LO curLid = as<LO> (i);
557 
558  if (lastContiguousGID_ != curGid) break;
559 
560  // Add the entry to the LID->GID table only after we know that
561  // the current GID is in the initial contiguous sequence, so
562  // that we don't repeat adding it in the first iteration of
563  // the loop below over the remaining noncontiguous GIDs.
564  lgMap_host[curLid] = curGid;
565  ++lastContiguousGID_;
566  }
567  --lastContiguousGID_;
568 
569  // [firstContiguousGID_, lastContigousGID_] is the initial
570  // sequence of contiguous GIDs. We can start the min and max
571  // GID using this range.
572  minMyGID_ = firstContiguousGID_;
573  maxMyGID_ = lastContiguousGID_;
574 
575  // Compute the GID -> LID lookup table, _not_ including the
576  // initial sequence of contiguous GIDs.
577  {
578  const std::pair<size_t, size_t> ncRange (i, entryList_host.extent (0));
579  auto nonContigGids_host = subview (entryList_host, ncRange);
580  TEUCHOS_TEST_FOR_EXCEPTION
581  (static_cast<size_t> (nonContigGids_host.extent (0)) !=
582  static_cast<size_t> (entryList_host.extent (0) - i),
583  std::logic_error, "Tpetra::Map noncontiguous constructor: "
584  "nonContigGids_host.extent(0) = "
585  << nonContigGids_host.extent (0)
586  << " != entryList_host.extent(0) - i = "
587  << (entryList_host.extent (0) - i) << " = "
588  << entryList_host.extent (0) << " - " << i
589  << ". Please report this bug to the Tpetra developers.");
590 
591  // FixedHashTable's constructor expects an owned device View,
592  // so we must deep-copy the subview of the input indices.
593  View<GO*, LayoutLeft, device_type>
594  nonContigGids ("nonContigGids", nonContigGids_host.size ());
595  Kokkos::deep_copy (nonContigGids, nonContigGids_host);
596 
597  glMap_ = global_to_local_table_type (nonContigGids,
598  firstContiguousGID_,
599  lastContiguousGID_,
600  static_cast<LO> (i));
601  }
602 
603  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
604  // table above, we have to look at all the (noncontiguous) input
605  // indices anyway. Thus, why not have the constructor compute
606  // and return the min and max?
607 
608  for ( ; i < numLocalElements_; ++i) {
609  const GO curGid = entryList_host[i];
610  const LO curLid = as<LO> (i);
611  lgMap_host[curLid] = curGid; // LID -> GID table
612 
613  // While iterating through entryList, we compute its
614  // (process-local) min and max elements.
615  if (curGid < minMyGID_) {
616  minMyGID_ = curGid;
617  }
618  if (curGid > maxMyGID_) {
619  maxMyGID_ = curGid;
620  }
621  }
622 
623  // We filled lgMap on host above; now sync back to device.
624  Kokkos::deep_copy (lgMap, lgMap_host);
625 
626  // "Commit" the local-to-global lookup table we filled in above.
627  lgMap_ = lgMap;
628  // We've already created this, so use it.
629  lgMapHost_ = lgMap_host;
630  }
631  else {
632  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
633  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
634  // This insures tests for GIDs in the range
635  // [firstContiguousGID_, lastContiguousGID_] fail for processes
636  // with no local elements.
637  firstContiguousGID_ = indexBase_+1;
638  lastContiguousGID_ = indexBase_;
639  // glMap_ was default constructed, so it's already empty.
640  }
641 
642  // Compute the min and max of all processes' GIDs. If
643  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
644  // are both indexBase_. This is wrong, but fixing it would
645  // require either a fancy sparse all-reduce, or a custom reduction
646  // operator that ignores invalid values ("invalid" means
647  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
648  //
649  // Also, while we're at it, use the same all-reduce to figure out
650  // if the Map is distributed. "Distributed" means that there is
651  // at least one process with a number of local elements less than
652  // the number of global elements.
653  //
654  // We're computing the min and max of all processes' GIDs using a
655  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
656  // and y are signed). (This lets us combine the min and max into
657  // a single all-reduce.) If each process sets localDist=1 if its
658  // number of local elements is strictly less than the number of
659  // global elements, and localDist=0 otherwise, then a MAX
660  // all-reduce on localDist tells us if the Map is distributed (1
661  // if yes, 0 if no). Thus, we can append localDist onto the end
662  // of the data and get the global result from the all-reduce.
663  if (std::numeric_limits<GO>::is_signed) {
664  // Does my process NOT own all the elements?
665  const GO localDist =
666  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
667 
668  GO minMaxInput[3];
669  minMaxInput[0] = -minMyGID_;
670  minMaxInput[1] = maxMyGID_;
671  minMaxInput[2] = localDist;
672 
673  GO minMaxOutput[3];
674  minMaxOutput[0] = 0;
675  minMaxOutput[1] = 0;
676  minMaxOutput[2] = 0;
677  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
678  minAllGID_ = -minMaxOutput[0];
679  maxAllGID_ = minMaxOutput[1];
680  const GO globalDist = minMaxOutput[2];
681  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
682  }
683  else { // unsigned; use two reductions
684  // This is always correct, no matter the signedness of GO.
685  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
686  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
687  distributed_ = checkIsDist ();
688  }
689 
690  contiguous_ = false; // "Contiguous" is conservative.
691 
692  TEUCHOS_TEST_FOR_EXCEPTION(
693  minAllGID_ < indexBase_,
694  std::invalid_argument,
695  "Tpetra::Map constructor (noncontiguous): "
696  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
697  "less than the given indexBase = " << indexBase_ << ".");
698 
699  // Create the Directory on demand in getRemoteIndexList().
700  //setupDirectory ();
701  }
702 
703  template <class LocalOrdinal, class GlobalOrdinal, class Node>
705  Map (const global_size_t numGlobalElements,
706  const GlobalOrdinal indexList[],
707  const LocalOrdinal indexListSize,
708  const GlobalOrdinal indexBase,
709  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
710  comm_ (comm),
711  uniform_ (false),
712  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
713  {
715 
716  // Not quite sure if I trust all code to behave correctly if the
717  // pointer is nonnull but the array length is nonzero, so I'll
718  // make sure the raw pointer is null if the length is zero.
719  const GlobalOrdinal* const indsRaw = indexListSize == 0 ? NULL : indexList;
720  Kokkos::View<const GlobalOrdinal*,
721  Kokkos::LayoutLeft,
722  Kokkos::HostSpace,
723  Kokkos::MemoryUnmanaged> inds (indsRaw, indexListSize);
724  initWithNonownedHostIndexList (numGlobalElements, inds, indexBase, comm);
725  }
726 
727  template <class LocalOrdinal, class GlobalOrdinal, class Node>
729  Map (const global_size_t numGlobalElements,
730  const Teuchos::ArrayView<const GlobalOrdinal>& entryList,
731  const GlobalOrdinal indexBase,
732  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
733  const Teuchos::RCP<Node>& node) :
734  comm_ (comm),
735  uniform_ (false),
736  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
737  {
739 
740  const size_t numLclInds = static_cast<size_t> (entryList.size ());
741 
742  // Not quite sure if I trust both ArrayView and View to behave
743  // correctly if the pointer is nonnull but the array length is
744  // nonzero, so I'll make sure it's null if the length is zero.
745  const GlobalOrdinal* const indsRaw =
746  numLclInds == 0 ? NULL : entryList.getRawPtr ();
747  Kokkos::View<const GlobalOrdinal*,
748  Kokkos::LayoutLeft,
749  Kokkos::HostSpace,
750  Kokkos::MemoryUnmanaged> inds (indsRaw, numLclInds);
751  initWithNonownedHostIndexList (numGlobalElements, inds, indexBase, comm);
752  }
753 
754  template <class LocalOrdinal, class GlobalOrdinal, class Node>
756  Map (const global_size_t numGlobalElements,
757  const Kokkos::View<const GlobalOrdinal*, device_type>& entryList,
758  const GlobalOrdinal indexBase,
759  const Teuchos::RCP<const Teuchos::Comm<int> >& comm) :
760  comm_ (comm),
761  uniform_ (false),
762  directory_ (new Directory<LocalOrdinal, GlobalOrdinal, Node> ())
763  {
764  using Kokkos::LayoutLeft;
765  using Kokkos::subview;
766  using Kokkos::View;
767  using Teuchos::arcp;
768  using Teuchos::ArrayView;
769  using Teuchos::as;
770  using Teuchos::broadcast;
771  using Teuchos::outArg;
772  using Teuchos::ptr;
773  using Teuchos::REDUCE_MAX;
774  using Teuchos::REDUCE_MIN;
775  using Teuchos::REDUCE_SUM;
776  using Teuchos::reduceAll;
777  using Teuchos::typeName;
778  typedef LocalOrdinal LO;
779  typedef GlobalOrdinal GO;
780  typedef global_size_t GST;
781  const GST GSTI = Tpetra::Details::OrdinalTraits<GST>::invalid ();
782 
784 
785  // The user has specified the distribution of indices over the
786  // processes, via the input array of global indices on each
787  // process. The distribution is not necessarily contiguous or
788  // equally shared over the processes.
789 
790  // The length of the input array on this process is the number of
791  // local indices to associate with this process, even though the
792  // input array contains global indices. We assume that the number
793  // of local indices on a process can be stored in a size_t;
794  // numLocalElements_ is a size_t, so this variable and that should
795  // have the same type.
796  const size_t numLocalElements = static_cast<size_t> (entryList.size ());
797 
798  initialNonuniformDebugCheck (numGlobalElements, numLocalElements,
799  indexBase, comm);
800 
801  // NOTE (mfh 20 Feb 2013, 10 Oct 2016) In some sense, this global
802  // reduction is redundant, since the directory Map will have to do
803  // the same thing. Thus, we could do the scan and broadcast for
804  // the directory Map here, and give the computed offsets to the
805  // directory Map's constructor. However, a reduction costs less
806  // than a scan and broadcast, so this still saves time if users of
807  // this Map don't ever need the Directory (i.e., if they never
808  // call getRemoteIndexList on this Map).
809  if (numGlobalElements != GSTI) {
810  numGlobalElements_ = numGlobalElements; // Use the user's value.
811  }
812  else { // The user wants us to compute the sum.
813  reduceAll<int, GST> (*comm, REDUCE_SUM,
814  static_cast<GST> (numLocalElements),
815  outArg (numGlobalElements_));
816  }
817 
818  // mfh 20 Feb 2013: We've never quite done the right thing for
819  // duplicate GIDs here. Duplicate GIDs have always been counted
820  // distinctly in numLocalElements_, and thus should get a
821  // different LID. However, we've always used std::map or a hash
822  // table for the GID -> LID lookup table, so distinct GIDs always
823  // map to the same LID. Furthermore, the order of the input GID
824  // list matters, so it's not desirable to sort for determining
825  // uniqueness.
826  //
827  // I've chosen for now to write this code as if the input GID list
828  // contains no duplicates. If this is not desired, we could use
829  // the lookup table itself to determine uniqueness: If we haven't
830  // seen the GID before, it gets a new LID and it's added to the
831  // LID -> GID and GID -> LID tables. If we have seen the GID
832  // before, it doesn't get added to either table. I would
833  // implement this, but it would cost more to do the double lookups
834  // in the table (one to check, and one to insert).
835  //
836  // More importantly, since we build the GID -> LID table in (a
837  // thread-) parallel (way), the order in which duplicate GIDs may
838  // get inserted is not defined. This would make the assignment of
839  // LID to GID nondeterministic.
840 
841  numLocalElements_ = numLocalElements;
842  indexBase_ = indexBase;
843 
844  minMyGID_ = indexBase_;
845  maxMyGID_ = indexBase_;
846 
847  // NOTE (mfh 27 May 2015): While finding the initial contiguous
848  // GID range requires looking at all the GIDs in the range,
849  // dismissing an interval of GIDs only requires looking at the
850  // first and last GIDs. Thus, we could do binary search backwards
851  // from the end in order to catch the common case of a contiguous
852  // interval followed by noncontiguous entries. On the other hand,
853  // we could just expose this case explicitly as yet another Map
854  // constructor, and avoid the trouble of detecting it.
855  if (numLocalElements_ > 0) {
856  // Find contiguous GID range, with the restriction that the
857  // beginning of the range starts with the first entry. While
858  // doing so, fill in the LID -> GID table.
859  View<GO*, LayoutLeft, device_type> lgMap ("lgMap", numLocalElements_);
860  auto lgMap_host = Kokkos::create_mirror_view (lgMap);
861 
862  // Creating the mirror view is trivial, and the deep_copy is a
863  // no-op, if entryList is on host already.
864  auto entryList_host = Kokkos::create_mirror_view (entryList);
865  Kokkos::deep_copy (entryList_host, entryList);
866 
867  firstContiguousGID_ = entryList_host[0];
868  lastContiguousGID_ = firstContiguousGID_+1;
869 
870  // FIXME (mfh 23 Sep 2015) We need to copy the input GIDs
871  // anyway, so we have to look at them all. The logical way to
872  // find the first noncontiguous entry would thus be to "reduce,"
873  // where the local reduction result is whether entryList[i] + 1
874  // == entryList[i+1].
875 
876  lgMap_host[0] = firstContiguousGID_;
877  size_t i = 1;
878  for ( ; i < numLocalElements_; ++i) {
879  const GO curGid = entryList_host[i];
880  const LO curLid = as<LO> (i);
881 
882  if (lastContiguousGID_ != curGid) break;
883 
884  // Add the entry to the LID->GID table only after we know that
885  // the current GID is in the initial contiguous sequence, so
886  // that we don't repeat adding it in the first iteration of
887  // the loop below over the remaining noncontiguous GIDs.
888  lgMap_host[curLid] = curGid;
889  ++lastContiguousGID_;
890  }
891  --lastContiguousGID_;
892 
893  // [firstContiguousGID_, lastContigousGID_] is the initial
894  // sequence of contiguous GIDs. We can start the min and max
895  // GID using this range.
896  minMyGID_ = firstContiguousGID_;
897  maxMyGID_ = lastContiguousGID_;
898 
899  // Compute the GID -> LID lookup table, _not_ including the
900  // initial sequence of contiguous GIDs.
901  {
902  const std::pair<size_t, size_t> ncRange (i, entryList.extent (0));
903  auto nonContigGids = subview (entryList, ncRange);
904  TEUCHOS_TEST_FOR_EXCEPTION
905  (static_cast<size_t> (nonContigGids.extent (0)) !=
906  static_cast<size_t> (entryList.extent (0) - i),
907  std::logic_error, "Tpetra::Map noncontiguous constructor: "
908  "nonContigGids.extent(0) = "
909  << nonContigGids.extent (0)
910  << " != entryList.extent(0) - i = "
911  << (entryList.extent (0) - i) << " = "
912  << entryList.extent (0) << " - " << i
913  << ". Please report this bug to the Tpetra developers.");
914 
915  glMap_ = global_to_local_table_type (nonContigGids,
916  firstContiguousGID_,
917  lastContiguousGID_,
918  static_cast<LO> (i));
919  }
920 
921  // FIXME (mfh 10 Oct 2016) When we construct the global-to-local
922  // table above, we have to look at all the (noncontiguous) input
923  // indices anyway. Thus, why not have the constructor compute
924  // and return the min and max?
925 
926  for ( ; i < numLocalElements_; ++i) {
927  const GO curGid = entryList_host[i];
928  const LO curLid = static_cast<LO> (i);
929  lgMap_host[curLid] = curGid; // LID -> GID table
930 
931  // While iterating through entryList, we compute its
932  // (process-local) min and max elements.
933  if (curGid < minMyGID_) {
934  minMyGID_ = curGid;
935  }
936  if (curGid > maxMyGID_) {
937  maxMyGID_ = curGid;
938  }
939  }
940 
941  // We filled lgMap on host above; now sync back to device.
942  Kokkos::deep_copy (lgMap, lgMap_host);
943 
944  // "Commit" the local-to-global lookup table we filled in above.
945  lgMap_ = lgMap;
946  // We've already created this, so use it.
947  lgMapHost_ = lgMap_host;
948  }
949  else {
950  minMyGID_ = std::numeric_limits<GlobalOrdinal>::max();
951  maxMyGID_ = std::numeric_limits<GlobalOrdinal>::lowest();
952  // This insures tests for GIDs in the range
953  // [firstContiguousGID_, lastContiguousGID_] fail for processes
954  // with no local elements.
955  firstContiguousGID_ = indexBase_+1;
956  lastContiguousGID_ = indexBase_;
957  // glMap_ was default constructed, so it's already empty.
958  }
959 
960  // Compute the min and max of all processes' GIDs. If
961  // numLocalElements_ == 0 on this process, minMyGID_ and maxMyGID_
962  // are both indexBase_. This is wrong, but fixing it would
963  // require either a fancy sparse all-reduce, or a custom reduction
964  // operator that ignores invalid values ("invalid" means
965  // Tpetra::Details::OrdinalTraits<GO>::invalid()).
966  //
967  // Also, while we're at it, use the same all-reduce to figure out
968  // if the Map is distributed. "Distributed" means that there is
969  // at least one process with a number of local elements less than
970  // the number of global elements.
971  //
972  // We're computing the min and max of all processes' GIDs using a
973  // single MAX all-reduce, because min(x,y) = -max(-x,-y) (when x
974  // and y are signed). (This lets us combine the min and max into
975  // a single all-reduce.) If each process sets localDist=1 if its
976  // number of local elements is strictly less than the number of
977  // global elements, and localDist=0 otherwise, then a MAX
978  // all-reduce on localDist tells us if the Map is distributed (1
979  // if yes, 0 if no). Thus, we can append localDist onto the end
980  // of the data and get the global result from the all-reduce.
981  if (std::numeric_limits<GO>::is_signed) {
982  // Does my process NOT own all the elements?
983  const GO localDist =
984  (as<GST> (numLocalElements_) < numGlobalElements_) ? 1 : 0;
985 
986  GO minMaxInput[3];
987  minMaxInput[0] = -minMyGID_;
988  minMaxInput[1] = maxMyGID_;
989  minMaxInput[2] = localDist;
990 
991  GO minMaxOutput[3];
992  minMaxOutput[0] = 0;
993  minMaxOutput[1] = 0;
994  minMaxOutput[2] = 0;
995  reduceAll<int, GO> (*comm, REDUCE_MAX, 3, minMaxInput, minMaxOutput);
996  minAllGID_ = -minMaxOutput[0];
997  maxAllGID_ = minMaxOutput[1];
998  const GO globalDist = minMaxOutput[2];
999  distributed_ = (comm_->getSize () > 1 && globalDist == 1);
1000  }
1001  else { // unsigned; use two reductions
1002  // This is always correct, no matter the signedness of GO.
1003  reduceAll<int, GO> (*comm_, REDUCE_MIN, minMyGID_, outArg (minAllGID_));
1004  reduceAll<int, GO> (*comm_, REDUCE_MAX, maxMyGID_, outArg (maxAllGID_));
1005  distributed_ = checkIsDist ();
1006  }
1007 
1008  contiguous_ = false; // "Contiguous" is conservative.
1009 
1010  TEUCHOS_TEST_FOR_EXCEPTION(
1011  minAllGID_ < indexBase_,
1012  std::invalid_argument,
1013  "Tpetra::Map constructor (noncontiguous): "
1014  "Minimum global ID = " << minAllGID_ << " over all process(es) is "
1015  "less than the given indexBase = " << indexBase_ << ".");
1016 
1017  // Create the Directory on demand in getRemoteIndexList().
1018  //setupDirectory ();
1019  }
1020 
1021 
1022  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1024  {
1025  if (! Kokkos::is_initialized ()) {
1026  std::ostringstream os;
1027  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1028  "Kokkos::finalize() has been called. This is user error! There are "
1029  "two likely causes: " << std::endl <<
1030  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1031  << std::endl <<
1032  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1033  "of a Tpetra::Map) at the same scope in main() as Kokkos::finalize() "
1034  "or Tpetra::finalize()." << std::endl << std::endl <<
1035  "Don't do either of these! Please refer to GitHib Issue #2372."
1036  << std::endl;
1037  ::Tpetra::Details::printOnce (std::cerr, os.str (),
1038  this->getComm ().getRawPtr ());
1039  }
1040  else {
1041  using ::Tpetra::Details::mpiIsInitialized;
1042  using ::Tpetra::Details::mpiIsFinalized;
1043  using ::Tpetra::Details::teuchosCommIsAnMpiComm;
1044 
1045  Teuchos::RCP<const Teuchos::Comm<int> > comm = this->getComm ();
1046  if (! comm.is_null () && teuchosCommIsAnMpiComm (*comm) &&
1047  mpiIsInitialized () && mpiIsFinalized ()) {
1048  // Tpetra itself does not require MPI, even if building with
1049  // MPI. It is legal to create Tpetra objects that do not use
1050  // MPI, even in an MPI program. However, calling Tpetra stuff
1051  // after MPI_Finalize() has been called is a bad idea, since
1052  // some Tpetra defaults may use MPI if available.
1053  std::ostringstream os;
1054  os << "WARNING: Tpetra::Map destructor (~Map()) is being called after "
1055  "MPI_Finalize() has been called. This is user error! There are "
1056  "two likely causes: " << std::endl <<
1057  " 1. You have a static Tpetra::Map (or RCP or shared_ptr of a Map)"
1058  << std::endl <<
1059  " 2. You declare and construct a Tpetra::Map (or RCP or shared_ptr "
1060  "of a Tpetra::Map) at the same scope in main() as MPI_finalize() or "
1061  "Tpetra::finalize()." << std::endl << std::endl <<
1062  "Don't do either of these! Please refer to GitHib Issue #2372."
1063  << std::endl;
1064  ::Tpetra::Details::printOnce (std::cerr, os.str (), comm.getRawPtr ());
1065  }
1066  }
1067  // mfh 20 Mar 2018: We can't check Tpetra::isInitialized() yet,
1068  // because Tpetra does not yet require Tpetra::initialize /
1069  // Tpetra::finalize.
1070  }
1071 
1072 
1073  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1074  bool
1076  {
1077  TEUCHOS_TEST_FOR_EXCEPTION(
1078  getComm ().is_null (), std::logic_error, "Tpetra::Map::isOneToOne: "
1079  "getComm() returns null. Please report this bug to the Tpetra "
1080  "developers.");
1081 
1082  // This is a collective operation, if it hasn't been called before.
1083  setupDirectory ();
1084  return directory_->isOneToOne (*this);
1085  }
1086 
1087 
1088  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1089  LocalOrdinal
1091  getLocalElement (GlobalOrdinal globalIndex) const
1092  {
1093  if (isContiguous ()) {
1094  if (globalIndex < getMinGlobalIndex () ||
1095  globalIndex > getMaxGlobalIndex ()) {
1096  return Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1097  }
1098  return static_cast<LocalOrdinal> (globalIndex - getMinGlobalIndex ());
1099  }
1100  else if (globalIndex >= firstContiguousGID_ &&
1101  globalIndex <= lastContiguousGID_) {
1102  return static_cast<LocalOrdinal> (globalIndex - firstContiguousGID_);
1103  }
1104  else {
1105  // If the given global index is not in the table, this returns
1106  // the same value as OrdinalTraits<LocalOrdinal>::invalid().
1107  return glMap_.get (globalIndex);
1108  }
1109  }
1110 
1111  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1112  GlobalOrdinal
1114  getGlobalElement (LocalOrdinal localIndex) const
1115  {
1116  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1117  return Tpetra::Details::OrdinalTraits<GlobalOrdinal>::invalid ();
1118  }
1119  if (isContiguous ()) {
1120  return getMinGlobalIndex () + localIndex;
1121  }
1122  else {
1123  // This is a host Kokkos::View access, with no RCP or ArrayRCP
1124  // involvement. As a result, it is thread safe.
1125  //
1126  // lgMapHost_ is a host pointer; this does NOT assume UVM.
1127  return lgMapHost_[localIndex];
1128  }
1129  }
1130 
1131  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1132  bool
1134  isNodeLocalElement (LocalOrdinal localIndex) const
1135  {
1136  if (localIndex < getMinLocalIndex () || localIndex > getMaxLocalIndex ()) {
1137  return false;
1138  } else {
1139  return true;
1140  }
1141  }
1142 
1143  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1144  bool
1146  isNodeGlobalElement (GlobalOrdinal globalIndex) const {
1147  return this->getLocalElement (globalIndex) !=
1148  Tpetra::Details::OrdinalTraits<LocalOrdinal>::invalid ();
1149  }
1150 
1151  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1153  return uniform_;
1154  }
1155 
1156  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1158  return contiguous_;
1159  }
1160 
1161 
1162  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1166  {
1167  return local_map_type (glMap_, lgMap_, getIndexBase (),
1168  getMinGlobalIndex (), getMaxGlobalIndex (),
1169  firstContiguousGID_, lastContiguousGID_,
1170  getNodeNumElements (), isContiguous ());
1171  }
1172 
1173  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1174  bool
1177  {
1178  using Teuchos::outArg;
1179  using Teuchos::REDUCE_MIN;
1180  using Teuchos::reduceAll;
1181  //
1182  // Tests that avoid the Boolean all-reduce below by using
1183  // globally consistent quantities.
1184  //
1185  if (this == &map) {
1186  // Pointer equality on one process always implies pointer
1187  // equality on all processes, since Map is immutable.
1188  return true;
1189  }
1190  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1191  // The two communicators have different numbers of processes.
1192  // It's not correct to call isCompatible() in that case. This
1193  // may result in the all-reduce hanging below.
1194  return false;
1195  }
1196  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1197  // Two Maps are definitely NOT compatible if they have different
1198  // global numbers of indices.
1199  return false;
1200  }
1201  else if (isContiguous () && isUniform () &&
1202  map.isContiguous () && map.isUniform ()) {
1203  // Contiguous uniform Maps with the same number of processes in
1204  // their communicators, and with the same global numbers of
1205  // indices, are always compatible.
1206  return true;
1207  }
1208  else if (! isContiguous () && ! map.isContiguous () &&
1209  lgMap_.extent (0) != 0 && map.lgMap_.extent (0) != 0 &&
1210  lgMap_.data () == map.lgMap_.data ()) {
1211  // Noncontiguous Maps whose global index lists are nonempty and
1212  // have the same pointer must be the same (and therefore
1213  // contiguous).
1214  //
1215  // Nonempty is important. For example, consider a communicator
1216  // with two processes, and two Maps that share this
1217  // communicator, with zero global indices on the first process,
1218  // and different nonzero numbers of global indices on the second
1219  // process. In that case, on the first process, the pointers
1220  // would both be NULL.
1221  return true;
1222  }
1223 
1224  TEUCHOS_TEST_FOR_EXCEPTION(
1225  getGlobalNumElements () != map.getGlobalNumElements (), std::logic_error,
1226  "Tpetra::Map::isCompatible: There's a bug in this method. We've already "
1227  "checked that this condition is true above, but it's false here. "
1228  "Please report this bug to the Tpetra developers.");
1229 
1230  // Do both Maps have the same number of indices on each process?
1231  const int locallyCompat =
1232  (getNodeNumElements () == map.getNodeNumElements ()) ? 1 : 0;
1233 
1234  int globallyCompat = 0;
1235  reduceAll<int, int> (*comm_, REDUCE_MIN, locallyCompat, outArg (globallyCompat));
1236  return (globallyCompat == 1);
1237  }
1238 
1239  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1240  bool
1243  {
1244  using Teuchos::ArrayView;
1245  typedef GlobalOrdinal GO;
1246  typedef typename ArrayView<const GO>::size_type size_type;
1247 
1248  // If both Maps are contiguous, we can compare their GID ranges
1249  // easily by looking at the min and max GID on this process.
1250  // Otherwise, we'll compare their GID lists. If only one Map is
1251  // contiguous, then we only have to call getNodeElementList() on
1252  // the noncontiguous Map. (It's best to avoid calling it on a
1253  // contiguous Map, since it results in unnecessary storage that
1254  // persists for the lifetime of the Map.)
1255 
1256  if (this == &map) {
1257  // Pointer equality on one process always implies pointer
1258  // equality on all processes, since Map is immutable.
1259  return true;
1260  }
1261  else if (getNodeNumElements () != map.getNodeNumElements ()) {
1262  return false;
1263  }
1264  else if (getMinGlobalIndex () != map.getMinGlobalIndex () ||
1265  getMaxGlobalIndex () != map.getMaxGlobalIndex ()) {
1266  return false;
1267  }
1268  else {
1269  if (isContiguous ()) {
1270  if (map.isContiguous ()) {
1271  return true; // min and max match, so the ranges match.
1272  }
1273  else { // *this is contiguous, but map is not contiguous
1274  TEUCHOS_TEST_FOR_EXCEPTION(
1275  ! this->isContiguous () || map.isContiguous (), std::logic_error,
1276  "Tpetra::Map::locallySameAs: BUG");
1277  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1278  const GO minLhsGid = this->getMinGlobalIndex ();
1279  const size_type numRhsElts = rhsElts.size ();
1280  for (size_type k = 0; k < numRhsElts; ++k) {
1281  const GO curLhsGid = minLhsGid + static_cast<GO> (k);
1282  if (curLhsGid != rhsElts[k]) {
1283  return false; // stop on first mismatch
1284  }
1285  }
1286  return true;
1287  }
1288  }
1289  else if (map.isContiguous ()) { // *this is not contiguous, but map is
1290  TEUCHOS_TEST_FOR_EXCEPTION(
1291  this->isContiguous () || ! map.isContiguous (), std::logic_error,
1292  "Tpetra::Map::locallySameAs: BUG");
1293  ArrayView<const GO> lhsElts = this->getNodeElementList ();
1294  const GO minRhsGid = map.getMinGlobalIndex ();
1295  const size_type numLhsElts = lhsElts.size ();
1296  for (size_type k = 0; k < numLhsElts; ++k) {
1297  const GO curRhsGid = minRhsGid + static_cast<GO> (k);
1298  if (curRhsGid != lhsElts[k]) {
1299  return false; // stop on first mismatch
1300  }
1301  }
1302  return true;
1303  }
1304  else if (this->lgMap_.data () == map.lgMap_.data ()) {
1305  // Pointers to LID->GID "map" (actually just an array) are the
1306  // same, and the number of GIDs are the same.
1307  return this->getNodeNumElements () == map.getNodeNumElements ();
1308  }
1309  else { // we actually have to compare the GIDs
1310  if (this->getNodeNumElements () != map.getNodeNumElements ()) {
1311  return false; // We already checked above, but check just in case
1312  }
1313  else {
1314  ArrayView<const GO> lhsElts = getNodeElementList ();
1315  ArrayView<const GO> rhsElts = map.getNodeElementList ();
1316 
1317  // std::equal requires that the latter range is as large as
1318  // the former. We know the ranges have equal length, because
1319  // they have the same number of local entries.
1320  return std::equal (lhsElts.begin (), lhsElts.end (), rhsElts.begin ());
1321  }
1322  }
1323  }
1324  }
1325 
1326  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1327  bool
1330  {
1331  if (this == &map)
1332  return true;
1333 
1334  // We are going to check if lmap1 is fitted into lmap2
1335  auto lmap1 = map.getLocalMap();
1336  auto lmap2 = this->getLocalMap();
1337 
1338  auto numLocalElements1 = lmap1.getNodeNumElements();
1339  auto numLocalElements2 = lmap2.getNodeNumElements();
1340 
1341  if (numLocalElements1 > numLocalElements2) {
1342  // There are more indices in the first map on this process than in second map.
1343  return false;
1344  }
1345 
1346  if (lmap1.isContiguous () && lmap2.isContiguous ()) {
1347  // When both Maps are contiguous, just check the interval inclusion.
1348  return ((lmap1.getMinGlobalIndex () == lmap2.getMinGlobalIndex ()) &&
1349  (lmap1.getMaxGlobalIndex () <= lmap2.getMaxGlobalIndex ()));
1350  }
1351 
1352  if (lmap1.getMinGlobalIndex () < lmap2.getMinGlobalIndex () ||
1353  lmap1.getMaxGlobalIndex () > lmap2.getMaxGlobalIndex ()) {
1354  // The second map does not include the first map bounds, and thus some of
1355  // the first map global indices are not in the second map.
1356  return false;
1357  }
1358 
1359  typedef Kokkos::RangePolicy<LocalOrdinal, typename Node::execution_space> range_type;
1360 
1361  // Check all elements.
1362  LocalOrdinal numDiff = 0;
1363  Kokkos::parallel_reduce("isLocallyFitted", range_type(0, numLocalElements1),
1364  KOKKOS_LAMBDA(const LocalOrdinal i, LocalOrdinal& diff) {
1365  diff += (lmap1.getGlobalElement(i) != lmap2.getGlobalElement(i));
1366  }, numDiff);
1367 
1368  return (numDiff == 0);
1369  }
1370 
1371  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1372  bool
1375  {
1376  using Teuchos::outArg;
1377  using Teuchos::REDUCE_MIN;
1378  using Teuchos::reduceAll;
1379  //
1380  // Tests that avoid the Boolean all-reduce below by using
1381  // globally consistent quantities.
1382  //
1383  if (this == &map) {
1384  // Pointer equality on one process always implies pointer
1385  // equality on all processes, since Map is immutable.
1386  return true;
1387  }
1388  else if (getComm ()->getSize () != map.getComm ()->getSize ()) {
1389  // The two communicators have different numbers of processes.
1390  // It's not correct to call isSameAs() in that case. This
1391  // may result in the all-reduce hanging below.
1392  return false;
1393  }
1394  else if (getGlobalNumElements () != map.getGlobalNumElements ()) {
1395  // Two Maps are definitely NOT the same if they have different
1396  // global numbers of indices.
1397  return false;
1398  }
1399  else if (getMinAllGlobalIndex () != map.getMinAllGlobalIndex () ||
1400  getMaxAllGlobalIndex () != map.getMaxAllGlobalIndex () ||
1401  getIndexBase () != map.getIndexBase ()) {
1402  // If the global min or max global index doesn't match, or if
1403  // the index base doesn't match, then the Maps aren't the same.
1404  return false;
1405  }
1406  else if (isDistributed () != map.isDistributed ()) {
1407  // One Map is distributed and the other is not, which means that
1408  // the Maps aren't the same.
1409  return false;
1410  }
1411  else if (isContiguous () && isUniform () &&
1412  map.isContiguous () && map.isUniform ()) {
1413  // Contiguous uniform Maps with the same number of processes in
1414  // their communicators, with the same global numbers of indices,
1415  // and with matching index bases and ranges, must be the same.
1416  return true;
1417  }
1418 
1419  // The two communicators must have the same number of processes,
1420  // with process ranks occurring in the same order. This uses
1421  // MPI_COMM_COMPARE. The MPI 3.1 standard (Section 6.4) says:
1422  // "Operations that access communicators are local and their
1423  // execution does not require interprocess communication."
1424  // However, just to be sure, I'll put this call after the above
1425  // tests that don't communicate.
1426  if (! ::Tpetra::Details::congruent (*comm_, * (map.getComm ()))) {
1427  return false;
1428  }
1429 
1430  // If we get this far, we need to check local properties and then
1431  // communicate local sameness across all processes.
1432  const int isSame_lcl = locallySameAs (map) ? 1 : 0;
1433 
1434  // Return true if and only if all processes report local sameness.
1435  int isSame_gbl = 0;
1436  reduceAll<int, int> (*comm_, REDUCE_MIN, isSame_lcl, outArg (isSame_gbl));
1437  return isSame_gbl == 1;
1438  }
1439 
1440  namespace { // (anonymous)
1441  template <class LO, class GO, class DT>
1442  class FillLgMap {
1443  public:
1444  FillLgMap (const Kokkos::View<GO*, DT>& lgMap,
1445  const GO startGid) :
1446  lgMap_ (lgMap), startGid_ (startGid)
1447  {
1448  Kokkos::RangePolicy<LO, typename DT::execution_space>
1449  range (static_cast<LO> (0), static_cast<LO> (lgMap.size ()));
1450  Kokkos::parallel_for (range, *this);
1451  }
1452 
1453  KOKKOS_INLINE_FUNCTION void operator () (const LO& lid) const {
1454  lgMap_(lid) = startGid_ + static_cast<GO> (lid);
1455  }
1456 
1457  private:
1458  const Kokkos::View<GO*, DT> lgMap_;
1459  const GO startGid_;
1460  };
1461 
1462  } // namespace (anonymous)
1463 
1464 
1465  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1466  typename Map<LocalOrdinal,GlobalOrdinal,Node>::global_indices_array_type
1468  {
1469  typedef LocalOrdinal LO;
1470  typedef GlobalOrdinal GO;
1471  typedef device_type DT;
1472 
1473  typedef decltype (lgMap_) const_lg_view_type;
1474  typedef typename const_lg_view_type::non_const_type lg_view_type;
1475 
1476  // If the local-to-global mapping doesn't exist yet, and if we
1477  // have local entries, then create and fill the local-to-global
1478  // mapping.
1479  const bool needToCreateLocalToGlobalMapping =
1480  lgMap_.extent (0) == 0 && numLocalElements_ > 0;
1481 
1482  if (needToCreateLocalToGlobalMapping) {
1483 #ifdef HAVE_TEUCHOS_DEBUG
1484  // The local-to-global mapping should have been set up already
1485  // for a noncontiguous map.
1486  TEUCHOS_TEST_FOR_EXCEPTION( ! isContiguous(), std::logic_error,
1487  "Tpetra::Map::getNodeElementList: The local-to-global mapping (lgMap_) "
1488  "should have been set up already for a noncontiguous Map. Please report"
1489  " this bug to the Tpetra team.");
1490 #endif // HAVE_TEUCHOS_DEBUG
1491 
1492  const LO numElts = static_cast<LO> (getNodeNumElements ());
1493 
1494  lg_view_type lgMap ("lgMap", numElts);
1495  FillLgMap<LO, GO, DT> fillIt (lgMap, minMyGID_);
1496 
1497  auto lgMapHost = Kokkos::create_mirror_view (lgMap);
1498  Kokkos::deep_copy (lgMapHost, lgMap);
1499 
1500  // "Commit" the local-to-global lookup table we filled in above.
1501  lgMap_ = lgMap;
1502  lgMapHost_ = lgMapHost;
1503 
1504  // lgMapHost_ may be a UVM View, so insert a fence to ensure
1505  // coherent host access. We only need to do this once, because
1506  // lgMapHost_ is immutable after initialization.
1507  execution_space::fence ();
1508  }
1509 
1510  return lgMap_;
1511  }
1512 
1513  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1514  Teuchos::ArrayView<const GlobalOrdinal>
1516  {
1517  typedef GlobalOrdinal GO; // convenient abbreviation
1518 
1519  // If the local-to-global mapping doesn't exist yet, and if we
1520  // have local entries, then create and fill the local-to-global
1521  // mapping.
1522  (void) this->getMyGlobalIndices ();
1523 
1524  // This does NOT assume UVM; lgMapHost_ is a host pointer.
1525  const GO* lgMapHostRawPtr = lgMapHost_.data ();
1526  // The third argument forces ArrayView not to try to track memory
1527  // in a debug build. We have to use it because the memory does
1528  // not belong to a Teuchos memory management class.
1529  return Teuchos::ArrayView<const GO> (lgMapHostRawPtr,
1530  lgMapHost_.extent (0),
1531  Teuchos::RCP_DISABLE_NODE_LOOKUP);
1532  }
1533 
1534  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1536  return distributed_;
1537  }
1538 
1539  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1541  using Teuchos::TypeNameTraits;
1542  std::ostringstream os;
1543 
1544  os << "Tpetra::Map: {"
1545  << "LocalOrdinalType: " << TypeNameTraits<LocalOrdinal>::name ()
1546  << ", GlobalOrdinalType: " << TypeNameTraits<GlobalOrdinal>::name ()
1547  << ", NodeType: " << TypeNameTraits<Node>::name ();
1548  if (this->getObjectLabel () != "") {
1549  os << ", Label: \"" << this->getObjectLabel () << "\"";
1550  }
1551  os << ", Global number of entries: " << getGlobalNumElements ()
1552  << ", Number of processes: " << getComm ()->getSize ()
1553  << ", Uniform: " << (isUniform () ? "true" : "false")
1554  << ", Contiguous: " << (isContiguous () ? "true" : "false")
1555  << ", Distributed: " << (isDistributed () ? "true" : "false")
1556  << "}";
1557  return os.str ();
1558  }
1559 
1564  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1565  std::string
1567  localDescribeToString (const Teuchos::EVerbosityLevel vl) const
1568  {
1569  typedef LocalOrdinal LO;
1570  using std::endl;
1571 
1572  // This preserves current behavior of Map.
1573  if (vl < Teuchos::VERB_HIGH) {
1574  return std::string ();
1575  }
1576  auto outStringP = Teuchos::rcp (new std::ostringstream ());
1577  Teuchos::RCP<Teuchos::FancyOStream> outp =
1578  Teuchos::getFancyOStream (outStringP);
1579  Teuchos::FancyOStream& out = *outp;
1580 
1581  auto comm = this->getComm ();
1582  const int myRank = comm->getRank ();
1583  const int numProcs = comm->getSize ();
1584  out << "Process " << myRank << " of " << numProcs << ":" << endl;
1585  Teuchos::OSTab tab1 (out);
1586 
1587  const LO numEnt = static_cast<LO> (this->getNodeNumElements ());
1588  out << "My number of entries: " << numEnt << endl
1589  << "My minimum global index: " << this->getMinGlobalIndex () << endl
1590  << "My maximum global index: " << this->getMaxGlobalIndex () << endl;
1591 
1592  if (vl == Teuchos::VERB_EXTREME) {
1593  out << "My global indices: [";
1594  const LO minLclInd = this->getMinLocalIndex ();
1595  for (LO k = 0; k < numEnt; ++k) {
1596  out << minLclInd + this->getGlobalElement (k);
1597  if (k + 1 < numEnt) {
1598  out << ", ";
1599  }
1600  }
1601  out << "]" << endl;
1602  }
1603 
1604  out.flush (); // make sure the ostringstream got everything
1605  return outStringP->str ();
1606  }
1607 
1608  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1609  void
1611  describe (Teuchos::FancyOStream &out,
1612  const Teuchos::EVerbosityLevel verbLevel) const
1613  {
1614  using Teuchos::TypeNameTraits;
1615  using Teuchos::VERB_DEFAULT;
1616  using Teuchos::VERB_NONE;
1617  using Teuchos::VERB_LOW;
1618  using Teuchos::VERB_HIGH;
1619  using std::endl;
1620  typedef LocalOrdinal LO;
1621  typedef GlobalOrdinal GO;
1622  const Teuchos::EVerbosityLevel vl =
1623  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
1624 
1625  if (vl == VERB_NONE) {
1626  return; // don't print anything
1627  }
1628  // If this Map's Comm is null, then the Map does not participate
1629  // in collective operations with the other processes. In that
1630  // case, it is not even legal to call this method. The reasonable
1631  // thing to do in that case is nothing.
1632  auto comm = this->getComm ();
1633  if (comm.is_null ()) {
1634  return;
1635  }
1636  const int myRank = comm->getRank ();
1637  const int numProcs = comm->getSize ();
1638 
1639  // Only Process 0 should touch the output stream, but this method
1640  // in general may need to do communication. Thus, we may need to
1641  // preserve the current tab level across multiple "if (myRank ==
1642  // 0) { ... }" inner scopes. This is why we sometimes create
1643  // OSTab instances by pointer, instead of by value. We only need
1644  // to create them by pointer if the tab level must persist through
1645  // multiple inner scopes.
1646  Teuchos::RCP<Teuchos::OSTab> tab0, tab1;
1647 
1648  if (myRank == 0) {
1649  // At every verbosity level but VERB_NONE, Process 0 prints.
1650  // By convention, describe() always begins with a tab before
1651  // printing.
1652  tab0 = Teuchos::rcp (new Teuchos::OSTab (out));
1653  out << "\"Tpetra::Map\":" << endl;
1654  tab1 = Teuchos::rcp (new Teuchos::OSTab (out));
1655  {
1656  out << "Template parameters:" << endl;
1657  Teuchos::OSTab tab2 (out);
1658  out << "LocalOrdinal: " << TypeNameTraits<LO>::name () << endl
1659  << "GlobalOrdinal: " << TypeNameTraits<GO>::name () << endl
1660  << "Node: " << TypeNameTraits<Node>::name () << endl;
1661  }
1662  const std::string label = this->getObjectLabel ();
1663  if (label != "") {
1664  out << "Label: \"" << label << "\"" << endl;
1665  }
1666  out << "Global number of entries: " << getGlobalNumElements () << endl
1667  << "Minimum global index: " << getMinAllGlobalIndex () << endl
1668  << "Maximum global index: " << getMaxAllGlobalIndex () << endl
1669  << "Index base: " << getIndexBase () << endl
1670  << "Number of processes: " << numProcs << endl
1671  << "Uniform: " << (isUniform () ? "true" : "false") << endl
1672  << "Contiguous: " << (isContiguous () ? "true" : "false") << endl
1673  << "Distributed: " << (isDistributed () ? "true" : "false") << endl;
1674  }
1675 
1676  // This is collective over the Map's communicator.
1677  if (vl >= VERB_HIGH) { // VERB_HIGH or VERB_EXTREME
1678  const std::string lclStr = this->localDescribeToString (vl);
1679  Tpetra::Details::gathervPrint (out, lclStr, *comm);
1680  }
1681  }
1682 
1683  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1684  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1686  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
1687  {
1688  using Teuchos::RCP;
1689  using Teuchos::rcp;
1690  typedef global_size_t GST;
1691  typedef LocalOrdinal LO;
1692  typedef GlobalOrdinal GO;
1693  typedef Map<LO, GO, Node> map_type;
1694 
1695  // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
1696  // the Map by calling its ordinary public constructor, using the
1697  // original Map's data. This only involves O(1) all-reduces over
1698  // the new communicator, which in the common case only includes a
1699  // small number of processes.
1700 
1701  // Create the Map to return.
1702  if (newComm.is_null () || newComm->getSize () < 1) {
1703  return Teuchos::null; // my process does not participate in the new Map
1704  }
1705  else if (newComm->getSize () == 1) {
1706  // The case where the new communicator has only one process is
1707  // easy. We don't have to communicate to get all the
1708  // information we need. Use the default comm to create the new
1709  // Map, then fill in all the fields directly.
1710  RCP<map_type> newMap (new map_type ());
1711 
1712  newMap->comm_ = newComm;
1713  // mfh 07 Oct 2016: Preserve original behavior, even though the
1714  // original index base may no longer be the globally min global
1715  // index. See #616 for why this doesn't matter so much anymore.
1716  newMap->indexBase_ = this->indexBase_;
1717  newMap->numGlobalElements_ = this->numLocalElements_;
1718  newMap->numLocalElements_ = this->numLocalElements_;
1719  newMap->minMyGID_ = this->minMyGID_;
1720  newMap->maxMyGID_ = this->maxMyGID_;
1721  newMap->minAllGID_ = this->minMyGID_;
1722  newMap->maxAllGID_ = this->maxMyGID_;
1723  newMap->firstContiguousGID_ = this->firstContiguousGID_;
1724  newMap->lastContiguousGID_ = this->lastContiguousGID_;
1725  // Since the new communicator has only one process, neither
1726  // uniformity nor contiguity have changed.
1727  newMap->uniform_ = this->uniform_;
1728  newMap->contiguous_ = this->contiguous_;
1729  // The new communicator only has one process, so the new Map is
1730  // not distributed.
1731  newMap->distributed_ = false;
1732  newMap->lgMap_ = this->lgMap_;
1733  newMap->lgMapHost_ = this->lgMapHost_;
1734  newMap->glMap_ = this->glMap_;
1735  // It's OK not to initialize the new Map's Directory.
1736  // This is initialized lazily, on first call to getRemoteIndexList.
1737 
1738  return newMap;
1739  }
1740  else { // newComm->getSize() != 1
1741  // Even if the original Map is contiguous, the new Map might not
1742  // be, especially if the excluded processes have ranks != 0 or
1743  // newComm->getSize()-1. The common case for this method is to
1744  // exclude many (possibly even all but one) processes, so it
1745  // likely doesn't pay to do the global communication (over the
1746  // original communicator) to figure out whether we can optimize
1747  // the result Map. Thus, we just set up the result Map as
1748  // noncontiguous.
1749  //
1750  // TODO (mfh 07 Oct 2016) We don't actually need to reconstruct
1751  // the global-to-local table, etc. Optimize this code path to
1752  // avoid unnecessary local work.
1753 
1754  // Make Map (re)compute the global number of elements.
1755  const GST RECOMPUTE = Tpetra::Details::OrdinalTraits<GST>::invalid ();
1756  // TODO (mfh 07 Oct 2016) If we use any Map constructor, we have
1757  // to use the noncontiguous Map constructor, since the new Map
1758  // might not be contiguous. Even if the old Map was contiguous,
1759  // some process in the "middle" might have been excluded. If we
1760  // want to avoid local work, we either have to do the setup by
1761  // hand, or write a new Map constructor.
1762 #if 1
1763  // The disabled code here throws the following exception in
1764  // Map's replaceCommWithSubset test:
1765  //
1766  // Throw test that evaluated to true: static_cast<unsigned long long> (numKeys) > static_cast<unsigned long long> (::Kokkos::Details::ArithTraits<ValueType>::max ())
1767  // 10:
1768  // 10: Tpetra::Details::FixedHashTable: The number of keys -3 is greater than the maximum representable ValueType value 2147483647. This means that it is not possible to use this constructor.
1769  // 10: Process 3: origComm->replaceCommWithSubset(subsetComm) threw an exception: /scratch/prj/Trilinos/Trilinos/packages/tpetra/core/src/Tpetra_Details_FixedHashTable_def.hpp:1044:
1770 
1771  auto lgMap = this->getMyGlobalIndices ();
1772  typedef typename std::decay<decltype (lgMap.extent (0)) >::type size_type;
1773  const size_type lclNumInds =
1774  static_cast<size_type> (this->getNodeNumElements ());
1775  using Teuchos::TypeNameTraits;
1776  TEUCHOS_TEST_FOR_EXCEPTION
1777  (lgMap.extent (0) != lclNumInds, std::logic_error,
1778  "Tpetra::Map::replaceCommWithSubset: Result of getMyGlobalIndices() "
1779  "has length " << lgMap.extent (0) << " (of type " <<
1780  TypeNameTraits<size_type>::name () << ") != this->getNodeNumElements()"
1781  " = " << this->getNodeNumElements () << ". The latter, upon being "
1782  "cast to size_type = " << TypeNameTraits<size_type>::name () << ", "
1783  "becomes " << lclNumInds << ". Please report this bug to the Tpetra "
1784  "developers.");
1785 #else
1786  Teuchos::ArrayView<const GO> lgMap = this->getNodeElementList ();
1787 #endif // 1
1788 
1789  const GO indexBase = this->getIndexBase ();
1790  return rcp (new map_type (RECOMPUTE, lgMap, indexBase, newComm));
1791  }
1792  }
1793 
1794  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1795  Teuchos::RCP<const Map<LocalOrdinal, GlobalOrdinal, Node> >
1798  {
1799  using Teuchos::Comm;
1800  using Teuchos::null;
1801  using Teuchos::outArg;
1802  using Teuchos::RCP;
1803  using Teuchos::rcp;
1804  using Teuchos::REDUCE_MIN;
1805  using Teuchos::reduceAll;
1806 
1807  // Create the new communicator. split() returns a valid
1808  // communicator on all processes. On processes where color == 0,
1809  // ignore the result. Passing key == 0 tells MPI to order the
1810  // processes in the new communicator by their rank in the old
1811  // communicator.
1812  const int color = (numLocalElements_ == 0) ? 0 : 1;
1813  // MPI_Comm_split must be called collectively over the original
1814  // communicator. We can't just call it on processes with color
1815  // one, even though we will ignore its result on processes with
1816  // color zero.
1817  RCP<const Comm<int> > newComm = comm_->split (color, 0);
1818  if (color == 0) {
1819  newComm = null;
1820  }
1821 
1822  // Create the Map to return.
1823  if (newComm.is_null ()) {
1824  return null; // my process does not participate in the new Map
1825  } else {
1826  // The default constructor that's useful for clone() above is
1827  // also useful here.
1828  RCP<Map> map = rcp (new Map ());
1829 
1830  map->comm_ = newComm;
1831  map->indexBase_ = indexBase_;
1832  map->numGlobalElements_ = numGlobalElements_;
1833  map->numLocalElements_ = numLocalElements_;
1834  map->minMyGID_ = minMyGID_;
1835  map->maxMyGID_ = maxMyGID_;
1836  map->minAllGID_ = minAllGID_;
1837  map->maxAllGID_ = maxAllGID_;
1838  map->firstContiguousGID_= firstContiguousGID_;
1839  map->lastContiguousGID_ = lastContiguousGID_;
1840 
1841  // Uniformity and contiguity have not changed. The directory
1842  // has changed, but we've taken care of that above.
1843  map->uniform_ = uniform_;
1844  map->contiguous_ = contiguous_;
1845 
1846  // If the original Map was NOT distributed, then the new Map
1847  // cannot be distributed.
1848  //
1849  // If the number of processes in the new communicator is 1, then
1850  // the new Map is not distributed.
1851  //
1852  // Otherwise, we have to check the new Map using an all-reduce
1853  // (over the new communicator). For example, the original Map
1854  // may have had some processes with zero elements, and all other
1855  // processes with the same number of elements as in the whole
1856  // Map. That Map is technically distributed, because of the
1857  // processes with zero elements. Removing those processes would
1858  // make the new Map locally replicated.
1859  if (! distributed_ || newComm->getSize () == 1) {
1860  map->distributed_ = false;
1861  } else {
1862  const int iOwnAllGids = (numLocalElements_ == numGlobalElements_) ? 1 : 0;
1863  int allProcsOwnAllGids = 0;
1864  reduceAll<int, int> (*newComm, REDUCE_MIN, iOwnAllGids, outArg (allProcsOwnAllGids));
1865  map->distributed_ = (allProcsOwnAllGids == 1) ? false : true;
1866  }
1867 
1868  map->lgMap_ = lgMap_;
1869  map->lgMapHost_ = lgMapHost_;
1870  map->glMap_ = glMap_;
1871 
1872  // Map's default constructor creates an uninitialized Directory.
1873  // The Directory will be initialized on demand in
1874  // getRemoteIndexList().
1875  //
1876  // FIXME (mfh 26 Mar 2013) It should be possible to "filter" the
1877  // directory more efficiently than just recreating it. If
1878  // directory recreation proves a bottleneck, we can always
1879  // revisit this. On the other hand, Directory creation is only
1880  // collective over the new, presumably much smaller
1881  // communicator, so it may not be worth the effort to optimize.
1882 
1883  return map;
1884  }
1885  }
1886 
1887  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1888  void
1890  {
1891  TEUCHOS_TEST_FOR_EXCEPTION(
1892  directory_.is_null (), std::logic_error, "Tpetra::Map::setupDirectory: "
1893  "The Directory is null. "
1894  "Please report this bug to the Tpetra developers.");
1895 
1896  // Only create the Directory if it hasn't been created yet.
1897  // This is a collective operation.
1898  if (! directory_->initialized ()) {
1899  directory_->initialize (*this);
1900  }
1901  }
1902 
1903  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1904  LookupStatus
1906  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal>& GIDs,
1907  const Teuchos::ArrayView<int>& PIDs,
1908  const Teuchos::ArrayView<LocalOrdinal>& LIDs) const
1909  {
1910  using Tpetra::Details::OrdinalTraits;
1911  typedef Teuchos::ArrayView<int>::size_type size_type;
1912 
1913  // Empty Maps (i.e., containing no indices on any processes in the
1914  // Map's communicator) are perfectly valid. In that case, if the
1915  // input GID list is nonempty, we fill the output arrays with
1916  // invalid values, and return IDNotPresent to notify the caller.
1917  // It's perfectly valid to give getRemoteIndexList GIDs that the
1918  // Map doesn't own. SubmapImport test 2 needs this functionality.
1919  if (getGlobalNumElements () == 0) {
1920  if (GIDs.size () == 0) {
1921  return AllIDsPresent; // trivially
1922  } else {
1923  for (size_type k = 0; k < PIDs.size (); ++k) {
1924  PIDs[k] = OrdinalTraits<int>::invalid ();
1925  }
1926  for (size_type k = 0; k < LIDs.size (); ++k) {
1927  LIDs[k] = OrdinalTraits<LocalOrdinal>::invalid ();
1928  }
1929  return IDNotPresent;
1930  }
1931  }
1932 
1933  // getRemoteIndexList must be called collectively, and Directory
1934  // initialization is collective too, so it's OK to initialize the
1935  // Directory on demand.
1936  setupDirectory ();
1937  return directory_->getDirectoryEntries (*this, GIDs, PIDs, LIDs);
1938  }
1939 
1940  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1941  LookupStatus
1943  getRemoteIndexList (const Teuchos::ArrayView<const GlobalOrdinal> & GIDs,
1944  const Teuchos::ArrayView<int> & PIDs) const
1945  {
1946  if (getGlobalNumElements () == 0) {
1947  if (GIDs.size () == 0) {
1948  return AllIDsPresent; // trivially
1949  } else {
1950  // The Map contains no indices, so all output PIDs are invalid.
1951  for (Teuchos::ArrayView<int>::size_type k = 0; k < PIDs.size (); ++k) {
1952  PIDs[k] = Tpetra::Details::OrdinalTraits<int>::invalid ();
1953  }
1954  return IDNotPresent;
1955  }
1956  }
1957 
1958  // getRemoteIndexList must be called collectively, and Directory
1959  // initialization is collective too, so it's OK to initialize the
1960  // Directory on demand.
1961  setupDirectory ();
1962  return directory_->getDirectoryEntries (*this, GIDs, PIDs);
1963  }
1964 
1965  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1966  Teuchos::RCP<const Teuchos::Comm<int> >
1968  return comm_;
1969  }
1970 
1971  template <class LocalOrdinal, class GlobalOrdinal, class Node>
1972  Teuchos::RCP<Node>
1974  // Node instances don't do anything any more, but sometimes it
1975  // helps for them to be nonnull.
1976  return Teuchos::rcp (new Node);
1977  }
1978 
1979  template <class LocalOrdinal,class GlobalOrdinal, class Node>
1981  using Teuchos::as;
1982  using Teuchos::outArg;
1983  using Teuchos::REDUCE_MIN;
1984  using Teuchos::reduceAll;
1985 
1986  bool global = false;
1987  if (comm_->getSize () > 1) {
1988  // The communicator has more than one process, but that doesn't
1989  // necessarily mean the Map is distributed.
1990  int localRep = 0;
1991  if (numGlobalElements_ == as<global_size_t> (numLocalElements_)) {
1992  // The number of local elements on this process equals the
1993  // number of global elements.
1994  //
1995  // NOTE (mfh 22 Nov 2011) Does this still work if there were
1996  // duplicates in the global ID list on input (the third Map
1997  // constructor), so that the number of local elements (which
1998  // are not duplicated) on this process could be less than the
1999  // number of global elements, even if this process owns all
2000  // the elements?
2001  localRep = 1;
2002  }
2003  int allLocalRep;
2004  reduceAll<int, int> (*comm_, REDUCE_MIN, localRep, outArg (allLocalRep));
2005  if (allLocalRep != 1) {
2006  // At least one process does not own all the elements.
2007  // This makes the Map a distributed Map.
2008  global = true;
2009  }
2010  }
2011  // If the communicator has only one process, then the Map is not
2012  // distributed.
2013  return global;
2014  }
2015 
2016 } // namespace Classes
2017 } // namespace Tpetra
2018 
2019 template <class LocalOrdinal, class GlobalOrdinal>
2020 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2021 Tpetra::createLocalMap (const size_t numElements,
2022  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2023 {
2024  typedef LocalOrdinal LO;
2025  typedef GlobalOrdinal GO;
2026  typedef typename ::Tpetra::Map<LO, GO>::node_type NT;
2027  return createLocalMapWithNode<LO, GO, NT> (numElements, comm);
2028 }
2029 
2030 template <class LocalOrdinal, class GlobalOrdinal>
2031 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2033  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2034 {
2035  typedef LocalOrdinal LO;
2036  typedef GlobalOrdinal GO;
2037  typedef typename ::Tpetra::Map<LO, GO>::node_type NT;
2038  return createUniformContigMapWithNode<LO, GO, NT> (numElements, comm);
2039 }
2040 
2041 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2042 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2044  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2045  const Teuchos::RCP<Node>& node)
2046 {
2047  using Teuchos::rcp;
2049  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2050 
2051  if (node.is_null ()) {
2052  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed));
2053  }
2054  else {
2055  return rcp (new map_type (numElements, indexBase, comm, GloballyDistributed, node));
2056  }
2057 }
2058 
2059 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2060 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2061 Tpetra::createLocalMapWithNode (const size_t numElements,
2062  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2063  const Teuchos::RCP<Node>& node)
2064 {
2065  using Tpetra::global_size_t;
2066  using Teuchos::rcp;
2068  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2069  const global_size_t globalNumElts = static_cast<global_size_t> (numElements);
2070 
2071  if (node.is_null ()) {
2072  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated));
2073  }
2074  else {
2075  return rcp (new map_type (globalNumElts, indexBase, comm, LocallyReplicated, node));
2076  }
2077 }
2078 
2079 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2080 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2082  const size_t localNumElements,
2083  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2084  const Teuchos::RCP<Node>& /* node */)
2085 {
2086  using Teuchos::rcp;
2088  const GlobalOrdinal indexBase = static_cast<GlobalOrdinal> (0);
2089 
2090  return rcp (new map_type (numElements, localNumElements, indexBase, comm));
2091 }
2092 
2093 template <class LocalOrdinal, class GlobalOrdinal>
2094 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2096  const size_t localNumElements,
2097  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2098 {
2099  typedef LocalOrdinal LO;
2100  typedef GlobalOrdinal GO;
2102 
2103  return Tpetra::createContigMapWithNode<LO, GO, NT> (numElements, localNumElements, comm);
2104 }
2105 
2106 
2107 template <class LocalOrdinal, class GlobalOrdinal>
2108 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal> >
2109 Tpetra::createNonContigMap(const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2110  const Teuchos::RCP<const Teuchos::Comm<int> >& comm)
2111 {
2112  typedef LocalOrdinal LO;
2113  typedef GlobalOrdinal GO;
2115 
2116  return Tpetra::createNonContigMapWithNode<LO, GO, NT> (elementList, comm);
2117 }
2118 
2119 
2120 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2121 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2122 Tpetra::createNonContigMapWithNode (const Teuchos::ArrayView<const GlobalOrdinal>& elementList,
2123  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2124  const Teuchos::RCP<Node>& /* node */)
2125 {
2126  using Teuchos::rcp;
2128  using GST = Tpetra::global_size_t;
2129  const GST INV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2130  // FIXME (mfh 22 Jul 2016) This is what I found here, but maybe this
2131  // shouldn't be zero, given that the index base is supposed to equal
2132  // the globally min global index?
2133  const GlobalOrdinal indexBase = 0;
2134 
2135  return rcp (new map_type (INV, elementList, indexBase, comm));
2136 }
2137 
2138 template <class LocalOrdinal, class GlobalOrdinal, class Node>
2139 Teuchos::RCP< const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2140 Tpetra::createWeightedContigMapWithNode (const int myWeight,
2141  const Tpetra::global_size_t numElements,
2142  const Teuchos::RCP<const Teuchos::Comm<int> >& comm,
2143  const Teuchos::RCP<Node>& /* node */)
2144 {
2145  Teuchos::RCP< Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> > map;
2146  int sumOfWeights, elemsLeft, localNumElements;
2147  const int numImages = comm->getSize();
2148  const int myImageID = comm->getRank();
2149  Teuchos::reduceAll<int>(*comm,Teuchos::REDUCE_SUM,myWeight,Teuchos::outArg(sumOfWeights));
2150  const double myShare = ((double)myWeight) / ((double)sumOfWeights);
2151  localNumElements = (int)std::floor( myShare * ((double)numElements) );
2152  // std::cout << "numElements: " << numElements << " myWeight: " << myWeight << " sumOfWeights: " << sumOfWeights << " myShare: " << myShare << std::endl;
2153  Teuchos::reduceAll<int>(*comm,Teuchos::REDUCE_SUM,localNumElements,Teuchos::outArg(elemsLeft));
2154  elemsLeft = numElements - elemsLeft;
2155  // std::cout << "(before) localNumElements: " << localNumElements << " elemsLeft: " << elemsLeft << std::endl;
2156  // i think this is true. just test it for now.
2157  TEUCHOS_TEST_FOR_EXCEPT(elemsLeft < -numImages || numImages < elemsLeft);
2158  if (elemsLeft < 0) {
2159  // last elemsLeft nodes lose an element
2160  if (myImageID >= numImages-elemsLeft) --localNumElements;
2161  }
2162  else if (elemsLeft > 0) {
2163  // first elemsLeft nodes gain an element
2164  if (myImageID < elemsLeft) ++localNumElements;
2165  }
2166  // std::cout << "(after) localNumElements: " << localNumElements << std::endl;
2167  return createContigMapWithNode<LocalOrdinal,GlobalOrdinal,Node>(numElements,localNumElements,comm);
2168 }
2169 
2170 
2171 template<class LO, class GO, class NT>
2172 Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >
2173 Tpetra::createOneToOne (const Teuchos::RCP<const Tpetra::Map<LO, GO, NT> >& M)
2174 {
2175  using Teuchos::Array;
2176  using Teuchos::ArrayView;
2177  using Teuchos::as;
2178  using Teuchos::rcp;
2179  typedef Tpetra::Map<LO, GO, NT> map_type;
2180  typedef global_size_t GST;
2181  const GST GINV = Tpetra::Details::OrdinalTraits<GST>::invalid ();
2182  const int myRank = M->getComm ()->getRank ();
2183 
2184  // Bypasses for special cases where either M is known to be
2185  // one-to-one, or the one-to-one version of M is easy to compute.
2186  // This is why we take M as an RCP, not as a const reference -- so
2187  // that we can return M itself if it is 1-to-1.
2188  if (! M->isDistributed ()) {
2189  // For a locally replicated Map, we assume that users want to push
2190  // all the GIDs to Process 0.
2191 
2192  // mfh 05 Nov 2013: getGlobalNumElements() does indeed return what
2193  // you think it should return, in this special case of a locally
2194  // replicated contiguous Map.
2195  const GST numGlobalEntries = M->getGlobalNumElements ();
2196  if (M->isContiguous ()) {
2197  const size_t numLocalEntries =
2198  (myRank == 0) ? as<size_t> (numGlobalEntries) : static_cast<size_t> (0);
2199  return rcp (new map_type (numGlobalEntries, numLocalEntries,
2200  M->getIndexBase (), M->getComm ()));
2201  }
2202  else {
2203  ArrayView<const GO> myGids =
2204  (myRank == 0) ? M->getNodeElementList () : Teuchos::null;
2205  return rcp (new map_type (GINV, myGids (), M->getIndexBase (),
2206  M->getComm ()));
2207  }
2208  }
2209  else if (M->isContiguous ()) {
2210  // Contiguous, distributed Maps are one-to-one by construction.
2211  // (Locally replicated Maps can be contiguous.)
2212  return M;
2213  }
2214  else {
2216  const size_t numMyElems = M->getNodeNumElements ();
2217  ArrayView<const GO> myElems = M->getNodeElementList ();
2218  Array<int> owner_procs_vec (numMyElems);
2219 
2220  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2221 
2222  Array<GO> myOwned_vec (numMyElems);
2223  size_t numMyOwnedElems = 0;
2224  for (size_t i = 0; i < numMyElems; ++i) {
2225  const GO GID = myElems[i];
2226  const int owner = owner_procs_vec[i];
2227 
2228  if (myRank == owner) {
2229  myOwned_vec[numMyOwnedElems++] = GID;
2230  }
2231  }
2232  myOwned_vec.resize (numMyOwnedElems);
2233 
2234  return rcp (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2235  M->getComm ()));
2236  }
2237 }
2238 
2239 template<class LocalOrdinal, class GlobalOrdinal, class Node>
2240 Teuchos::RCP<const Tpetra::Map<LocalOrdinal,GlobalOrdinal,Node> >
2243 {
2244  using Teuchos::Array;
2245  using Teuchos::ArrayView;
2246  using Teuchos::rcp;
2247  typedef LocalOrdinal LO;
2248  typedef GlobalOrdinal GO;
2249  typedef Tpetra::Map<LO,GO,Node> map_type;
2250  int myID = M->getComm()->getRank();
2251 
2252  // FIXME (mfh 20 Feb 2013) We should have a bypass for contiguous
2253  // Maps (which are 1-to-1 by construction).
2254 
2255  //Based off Epetra's one to one.
2256 
2258  directory.initialize (*M, tie_break);
2259  size_t numMyElems = M->getNodeNumElements ();
2260  ArrayView<const GO> myElems = M->getNodeElementList ();
2261  Array<int> owner_procs_vec (numMyElems);
2262 
2263  directory.getDirectoryEntries (*M, myElems, owner_procs_vec ());
2264 
2265  Array<GO> myOwned_vec (numMyElems);
2266  size_t numMyOwnedElems = 0;
2267  for (size_t i = 0; i < numMyElems; ++i) {
2268  GO GID = myElems[i];
2269  int owner = owner_procs_vec[i];
2270 
2271  if (myID == owner) {
2272  myOwned_vec[numMyOwnedElems++] = GID;
2273  }
2274  }
2275  myOwned_vec.resize (numMyOwnedElems);
2276 
2277  // FIXME (mfh 08 May 2014) The above Directory should be perfectly
2278  // valid for the new Map. Why can't we reuse it?
2279  const global_size_t GINV =
2280  Tpetra::Details::OrdinalTraits<global_size_t>::invalid ();
2281  return rcp (new map_type (GINV, myOwned_vec (), M->getIndexBase (),
2282  M->getComm ()));
2283 }
2284 
2285 //
2286 // Explicit instantiation macro
2287 //
2288 // Must be expanded from within the Tpetra namespace!
2289 //
2290 
2292 #define TPETRA_MAP_INSTANT(LO,GO,NODE) \
2293  \
2294  namespace Classes { template class Map< LO , GO , NODE >; } \
2295  \
2296  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2297  createLocalMapWithNode<LO,GO,NODE> (const size_t numElements, \
2298  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2299  const Teuchos::RCP< NODE >& node); \
2300  \
2301  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2302  createContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2303  const size_t localNumElements, \
2304  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2305  const Teuchos::RCP< NODE > &node); \
2306  \
2307  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2308  createNonContigMapWithNode(const Teuchos::ArrayView<const GO> &elementList, \
2309  const Teuchos::RCP<const Teuchos::Comm<int> > &comm, \
2310  const Teuchos::RCP<NODE> &node); \
2311  \
2312  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2313  createUniformContigMapWithNode<LO,GO,NODE> (const global_size_t numElements, \
2314  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2315  const Teuchos::RCP< NODE > &node); \
2316  \
2317  template Teuchos::RCP< const Map<LO,GO,NODE> > \
2318  createWeightedContigMapWithNode<LO,GO,NODE> (const int thisNodeWeight, \
2319  const global_size_t numElements, \
2320  const Teuchos::RCP< const Teuchos::Comm< int > >& comm, \
2321  const Teuchos::RCP< NODE >& node); \
2322  \
2323  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2324  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M); \
2325  \
2326  template Teuchos::RCP<const Map<LO,GO,NODE> > \
2327  createOneToOne (const Teuchos::RCP<const Map<LO,GO,NODE> >& M, \
2328  const Tpetra::Details::TieBreak<LO,GO>& tie_break); \
2329 
2330 
2332 #define TPETRA_MAP_INSTANT_DEFAULTNODE(LO,GO) \
2333  template Teuchos::RCP< const Map<LO,GO> > \
2334  createLocalMap<LO,GO>( const size_t, const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2335  \
2336  template Teuchos::RCP< const Map<LO,GO> > \
2337  createContigMap<LO,GO>( global_size_t, size_t, \
2338  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2339  \
2340  template Teuchos::RCP< const Map<LO,GO> > \
2341  createNonContigMap(const Teuchos::ArrayView<const GO> &, \
2342  const Teuchos::RCP<const Teuchos::Comm<int> > &); \
2343  \
2344  template Teuchos::RCP< const Map<LO,GO> > \
2345  createUniformContigMap<LO,GO>( const global_size_t, \
2346  const Teuchos::RCP< const Teuchos::Comm< int > > &); \
2347 
2348 #endif // TPETRA_MAP_DEF_HPP
bool congruent(const Teuchos::Comm< int > &comm1, const Teuchos::Comm< int > &comm2)
Whether the two communicators are congruent.
Definition: Tpetra_Util.cpp:65
global_size_t getGlobalNumElements() const
The number of elements in this Map.
Namespace Tpetra contains the class and methods constituting the Tpetra library.
Declaration of Tpetra::Details::extractMpiCommFromTeuchos.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createUniformContigMapWithNode(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Non-member constructor for a uniformly distributed, contiguous Map with a user-specified Kokkos Node...
GlobalOrdinal getMaxAllGlobalIndex() const
The maximum global index over all processes in the communicator.
Declaration of Tpetra::Details::printOnce.
LookupStatus
Return status of Map remote index lookup (getRemoteIndexList()).
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createOneToOne(const Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > &M)
Creates a one-to-one version of the given Map where each GID lives on only one process.
local_map_type getLocalMap() const
Get the local Map for Kokkos kernels.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createContigMap(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a (potentially) non-uniformly distributed, contiguous Map using the defaul...
size_t getNodeNumElements() const
The number of elements belonging to the calling process.
GlobalOrdinal getMinAllGlobalIndex() const
The minimum global index over all processes in the communicator.
Node node_type
The type of the Kokkos Node.
GlobalOrdinal getIndexBase() const
The index base for this Map.
Classes::Map< LocalOrdinal, GlobalOrdinal, Node > Map
Alias for Tpetra::Classes::Map.
void initialize(const map_type &map)
Initialize the Directory with its Map.
Implementation details of Tpetra.
Declaration of Tpetra::Details::initializeKokkos.
void gathervPrint(std::ostream &out, const std::string &s, const Teuchos::Comm< int > &comm)
On Process 0 in the given communicator, print strings from each process in that communicator, in rank order.
void initializeKokkos()
Initialize Kokkos, using command-line arguments (if any) given to Teuchos::GlobalMPISession.
size_t global_size_t
Global size_t object.
void deep_copy(MultiVector< DS, DL, DG, DN > &dst, const MultiVector< SS, SL, SG, SN > &src)
Copy the contents of the MultiVector src into dst.
bool isContiguous() const
True if this Map is distributed contiguously, else false.
GlobalOrdinal getMinGlobalIndex() const
The minimum global index owned by the calling process.
LookupStatus getDirectoryEntries(const map_type &map, const Teuchos::ArrayView< const GlobalOrdinal > &globalIDs, const Teuchos::ArrayView< int > &nodeIDs) const
Given a global ID list, return the list of their owning process IDs.
Implement mapping from global ID to process ID and local ID.
Functions for initializing and finalizing Tpetra.
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Accessors for the Teuchos::Comm and Kokkos Node objects.
Teuchos::ArrayView< const GlobalOrdinal > getNodeElementList() const
Return a NONOWNING view of the global indices owned by this process.
NT ::device_type device_type
The Kokkos device type over which to allocate Views and perform work.
GlobalOrdinal getMaxGlobalIndex() const
The maximum global index owned by the calling process.
"Local" part of Map suitable for Kokkos kernels.
bool isDistributed() const
Whether this Map is globally distributed or locally replicated.
Map()
Default constructor (that does nothing).
Stand-alone utility functions and macros.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createLocalMap(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a locally replicated Map with the default Kokkos Node.
A parallel distribution of indices over processes.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createLocalMapWithNode(const size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a locally replicated Map with a specified Kokkos Node.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createContigMapWithNode(const global_size_t numElements, const size_t localNumElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a (potentially) nonuniformly distributed, contiguous Map for a user-specifi...
LocalGlobal
Enum for local versus global allocation of Map entries.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createWeightedContigMapWithNode(const int thisNodeWeight, const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a contiguous Map with user-defined weights and a user-specified, possibly nondefault Kokkos Node type.
Interface for breaking ties in ownership.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createNonContigMap(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Nonmember constructor for a non-contiguous Map using the default Kokkos::Device type.
bool isUniform() const
Whether the range of global indices is uniform.
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal > > createUniformContigMap(const global_size_t numElements, const Teuchos::RCP< const Teuchos::Comm< int > > &comm)
Non-member constructor for a uniformly distributed, contiguous Map with the default Kokkos Node...
Teuchos::RCP< const Map< LocalOrdinal, GlobalOrdinal, Node > > createNonContigMapWithNode(const Teuchos::ArrayView< const GlobalOrdinal > &elementList, const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const Teuchos::RCP< Node > &node=Teuchos::null)
Nonmember constructor for a noncontiguous Map with a user-specified, possibly nondefault Kokkos Node ...