1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_ 12 #include "Teuchos_ArrayViewDecl.hpp" 15 #include "Teuchos_ReductionOp.hpp" 20 #include <zoltan_dd.h> 23 #include "Teuchos_Comm.hpp" 24 #ifdef HAVE_ZOLTAN2_MPI 25 #include "Teuchos_DefaultMpiComm.hpp" 26 #endif // HAVE_ZOLTAN2_MPI 27 #include <Teuchos_DefaultSerialComm.hpp> 36 template <
typename Ordinal,
typename T>
49 void reduce(
const Ordinal count,
const T inBuffer[], T inoutBuffer[])
const 52 for (Ordinal i=0; i < count; i++){
53 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON){
54 inoutBuffer[0] = inBuffer[0];
55 inoutBuffer[1] = inBuffer[1];
56 }
else if(inBuffer[0] - inoutBuffer[0] < _EPSILON &&
57 inBuffer[1] - inoutBuffer[1] < _EPSILON){
58 inoutBuffer[0] = inBuffer[0];
59 inoutBuffer[1] = inBuffer[1];
69 template <
typename it>
71 return (x == 1 ? x : x * z2Fact<it>(x - 1));
74 template <
typename gno_t,
typename part_t>
82 template <
typename IT>
92 fact[k] = fact[k - 1] * k;
95 for (k = 0; k < n; ++k)
97 perm[k] = i / fact[n - 1 - k];
98 i = i % fact[n - 1 - k];
103 for (k = n - 1; k > 0; --k)
104 for (j = k - 1; j >= 0; --j)
105 if (perm[j] <= perm[k])
111 template <
typename part_t>
113 int dim = grid_dims.size();
114 int neighborCount = 2 * dim;
115 task_comm_xadj = allocMemory<part_t>(taskCount+1);
116 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
119 task_comm_xadj[0] = 0;
120 for (
part_t i = 0; i < taskCount; ++i){
122 for (
int j = 0; j < dim; ++j){
123 part_t lNeighbor = i - prevDimMul;
124 part_t rNeighbor = i + prevDimMul;
125 prevDimMul *= grid_dims[j];
126 if (lNeighbor >= 0 && lNeighbor/ prevDimMul == i / prevDimMul && lNeighbor < taskCount){
127 task_comm_adj[neighBorIndex++] = lNeighbor;
129 if (rNeighbor >= 0 && rNeighbor/ prevDimMul == i / prevDimMul && rNeighbor < taskCount){
130 task_comm_adj[neighBorIndex++] = rNeighbor;
133 task_comm_xadj[i+1] = neighBorIndex;
138 template <
typename Adapter,
typename scalar_t,
typename part_t>
141 const Teuchos::Comm<int> *comm,
147 scalar_t **partCenters){
149 typedef typename Adapter::lno_t lno_t;
150 typedef typename Adapter::gno_t gno_t;
153 ArrayView<const gno_t> gnos;
154 ArrayView<input_t> xyz;
155 ArrayView<input_t> wgts;
165 gno_t *point_counts = allocMemory<gno_t>(ntasks);
166 memset(point_counts, 0,
sizeof(gno_t) * ntasks);
169 gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
172 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
174 for (
int dim=0; dim < coordDim; dim++){
175 ArrayRCP<const scalar_t> ar;
176 xyz[dim].getInputArray(ar);
178 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
179 memset(partCenters[dim], 0,
sizeof(scalar_t) * ntasks);
194 for (lno_t i=0; i < numLocalCoords; i++){
197 for(
int j = 0; j < coordDim; ++j){
198 scalar_t c = multiJagged_coordinates[j][i];
199 partCenters[j][p] += c;
205 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
206 ntasks, point_counts, global_point_counts
209 for(
int j = 0; j < coordDim; ++j){
210 for (
part_t i=0; i < ntasks; ++i){
211 partCenters[j][i] /= global_point_counts[i];
215 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
216 for(
int j = 0; j < coordDim; ++j){
217 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
218 ntasks, partCenters[j], tmpCoords
221 scalar_t *tmp = partCenters[j];
222 partCenters[j] = tmpCoords;
227 freeArray<gno_t>(point_counts);
228 freeArray<gno_t>(global_point_counts);
230 freeArray<scalar_t>(tmpCoords);
231 freeArray<scalar_t *>(multiJagged_coordinates);
235 template <
typename Adapter,
typename scalar_t,
typename part_t>
238 const Teuchos::Comm<int> *comm,
243 ArrayRCP<part_t> &g_part_xadj,
244 ArrayRCP<part_t> &g_part_adj,
245 ArrayRCP<scalar_t> &g_part_ew
248 typedef typename Adapter::lno_t t_lno_t;
249 typedef typename Adapter::gno_t t_gno_t;
250 typedef typename Adapter::scalar_t t_scalar_t;
251 typedef typename Adapter::offset_t t_offset_t;
272 ArrayView<const t_gno_t> Ids;
273 ArrayView<t_input_t> v_wghts;
277 ArrayView<const t_gno_t> edgeIds;
278 ArrayView<const t_offset_t> offsets;
279 ArrayView<t_input_t> e_wgts;
283 std::vector<t_scalar_t> edge_weights;
286 if (numWeightPerEdge > 0){
287 edge_weights = std::vector<t_scalar_t>(localNumEdges);
288 for (t_lno_t i = 0; i < localNumEdges; ++i){
289 edge_weights[i] = e_wgts[0][i];
295 std::vector<part_t> e_parts(localNumEdges);
296 #ifdef HAVE_ZOLTAN2_MPI 297 if (comm->getSize() > 1)
299 Zoltan_DD_Struct *dd = NULL;
301 MPI_Comm mpicomm = Teuchos::getRawMpiComm(*comm);
305 Zoltan_DD_Create(&dd, mpicomm,
307 sizeof(
part_t), localNumVertices, debug_level);
309 ZOLTAN_ID_PTR ddnotneeded = NULL;
312 (ZOLTAN_ID_PTR) Ids.getRawPtr(),
316 int(localNumVertices));
320 (ZOLTAN_ID_PTR) edgeIds.getRawPtr(),
322 (
char *)&(e_parts[0]),
327 Zoltan_DD_Destroy(&dd);
339 for (t_lno_t i = 0; i < localNumEdges; ++i){
340 t_gno_t ei = edgeIds[i];
346 std::vector<t_lno_t> part_begins(np, -1);
347 std::vector<t_lno_t> part_nexts(localNumVertices, -1);
351 for (t_lno_t i = 0; i < localNumVertices; ++i){
353 part_nexts[i] = part_begins[ap];
358 g_part_xadj = ArrayRCP<part_t>(np + 1);
359 g_part_adj = ArrayRCP<part_t>(localNumEdges);
360 g_part_ew = ArrayRCP<t_scalar_t>(localNumEdges);
363 std::vector<part_t> part_neighbors(np);
364 std::vector<t_scalar_t> part_neighbor_weights(np, 0);
365 std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
368 for (t_lno_t i = 0; i < np; ++i){
369 part_t num_neighbor_parts = 0;
370 t_lno_t v = part_begins[i];
374 for (t_offset_t j = offsets[v]; j < offsets[v+1]; ++j){
379 if (numWeightPerEdge > 0){
380 ew = edge_weights[j];
384 if (part_neighbor_weights[ep] < 0.00001){
385 part_neighbors[num_neighbor_parts++] = ep;
387 part_neighbor_weights[ep] += ew;
394 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
395 part_t neighbor_part = part_neighbors[j];
396 g_part_adj[nindex] = neighbor_part;
397 g_part_ew[nindex++] = part_neighbor_weights[neighbor_part];
398 part_neighbor_weights[neighbor_part] = 0;
400 g_part_xadj[i + 1] = nindex;
405 RCP<const Teuchos::Comm<int> > tcomm = rcpFromRef(*comm);
411 typedef Tpetra::Map<>::global_ordinal_type use_this_gno_t;
412 typedef Tpetra::Map<part_t, use_this_gno_t> t_map_t;
413 Teuchos::RCP<const t_map_t> map = Teuchos::rcp(
new t_map_t(np, 0, tcomm));
414 typedef Tpetra::CrsMatrix<t_scalar_t, part_t, use_this_gno_t>
tcrsMatrix_t;
415 Teuchos::RCP<tcrsMatrix_t> tMatrix(
new tcrsMatrix_t(map, 0));
422 std::vector<t_lno_t> part_begins(np, -1);
423 std::vector<t_lno_t> part_nexts(localNumVertices, -1);
427 for (t_lno_t i = 0; i < localNumVertices; ++i){
429 part_nexts[i] = part_begins[ap];
433 std::vector<use_this_gno_t> part_neighbors(np);
434 std::vector<t_scalar_t> part_neighbor_weights(np, 0);
435 std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
438 for (t_lno_t i = 0; i < np; ++i){
439 part_t num_neighbor_parts = 0;
440 t_lno_t v = part_begins[i];
444 for (t_offset_t j = offsets[v]; j < offsets[v+1]; ++j){
449 if (numWeightPerEdge > 0){
450 ew = edge_weights[j];
453 if (part_neighbor_weights[ep] < 0.00001){
454 part_neighbors[num_neighbor_parts++] = ep;
456 part_neighbor_weights[ep] += ew;
462 for (t_lno_t j = 0; j < num_neighbor_parts; ++j){
463 use_this_gno_t neighbor_part = part_neighbors[j];
464 part_neighbor_weights_ordered[j] = part_neighbor_weights[neighbor_part];
465 part_neighbor_weights[neighbor_part] = 0;
469 if (num_neighbor_parts > 0){
470 Teuchos::ArrayView<const use_this_gno_t> destinations(
471 &(part_neighbors[0]), num_neighbor_parts);
472 Teuchos::ArrayView<const t_scalar_t>
vals(
473 &(part_neighbor_weights_ordered[0]), num_neighbor_parts);
474 tMatrix->insertGlobalValues(i,destinations, vals);
480 tMatrix->fillComplete();
484 std::vector<use_this_gno_t> part_indices(np);
486 for (use_this_gno_t i = 0; i < np; ++i) part_indices[i] = i;
488 Teuchos::ArrayView<const use_this_gno_t> global_ids(&(part_indices[0]), np);
492 Teuchos::RCP<const t_map_t> gatherRowMap(
new t_map_t(
493 Teuchos::OrdinalTraits<Tpetra::global_size_t>::invalid(),
494 global_ids, 0, tcomm));
498 Teuchos::RCP<tcrsMatrix_t> A_gather =
500 typedef Tpetra::Import<
typename t_map_t::local_ordinal_type,
501 typename t_map_t::global_ordinal_type,
502 typename t_map_t::node_type> import_type;
503 import_type
import(map, gatherRowMap);
504 A_gather->doImport(*tMatrix,
import, Tpetra::INSERT);
505 A_gather->fillComplete();
510 g_part_xadj = ArrayRCP<part_t>(np + 1);
511 g_part_adj = ArrayRCP<part_t>(A_gather->getNodeNumEntries());
512 g_part_ew = ArrayRCP<t_scalar_t>(A_gather->getNodeNumEntries());
514 part_t *taskidx = g_part_xadj.getRawPtr();
515 part_t *taskadj = g_part_adj.getRawPtr();
516 t_scalar_t *taskadjwgt = g_part_ew.getRawPtr();
521 for (
part_t i = 0; i < np; i++) {
522 part_t length = A_gather->getNumEntriesInLocalRow(i);
524 taskidx[i+1] = taskidx[i] + length;
526 Teuchos::ArrayView<part_t> Indices(taskadj + taskidx[i], length);
527 Teuchos::ArrayView<t_scalar_t> Values(taskadjwgt + taskidx[i], length);
528 A_gather->getLocalRowCopy(i, Indices, Values, nentries);
536 template <
class IT,
class WT>
546 this->heapSize = heapsize_;
547 this->indices = allocMemory<IT>(heapsize_ );
548 this->values = allocMemory<WT>(heapsize_ );
553 freeArray<IT>(this->indices);
554 freeArray<WT>(this->values);
559 WT maxVal = this->values[0];
562 if (distance >= maxVal)
return;
564 this->values[0] = distance;
565 this->indices[0] = index;
572 IT child_index1 = 2 * index_on_heap + 1;
573 IT child_index2 = 2 * index_on_heap + 2;
576 if(child_index1 < this->heapSize && child_index2 < this->heapSize){
578 if (this->values[child_index1] < this->values[child_index2]){
579 biggerIndex = child_index2;
582 biggerIndex = child_index1;
585 else if(child_index1 < this->heapSize){
586 biggerIndex = child_index1;
589 else if(child_index2 < this->heapSize){
590 biggerIndex = child_index2;
592 if (biggerIndex >= 0 && this->values[biggerIndex] > this->values[index_on_heap]){
593 WT tmpVal = this->values[biggerIndex];
594 this->values[biggerIndex] = this->values[index_on_heap];
595 this->values[index_on_heap] = tmpVal;
597 IT tmpIndex = this->indices[biggerIndex];
598 this->indices[biggerIndex] = this->indices[index_on_heap];
599 this->indices[index_on_heap] = tmpIndex;
600 this->push_down(biggerIndex);
605 WT MAXVAL = std::numeric_limits<WT>::max();
606 for(IT i = 0; i < this->heapSize; ++i){
607 this->values[i] = MAXVAL;
608 this->indices[i] = -1;
616 for(IT j = 0; j < this->heapSize; ++j){
617 nc += this->values[j];
627 for(
int i = 0; i < dimension; ++i){
629 for(IT j = 0; j < this->heapSize; ++j){
630 IT k = this->indices[j];
634 nc /= this->heapSize;
635 moved = (
ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
643 for(IT i = 0; i < this->heapSize; ++i){
644 permutation[i] = this->indices[i];
651 template <
class IT,
class WT>
660 freeArray<WT>(center);
664 this->dimension = dimension_;
665 this->center = allocMemory<WT>(dimension_);
674 return this->closestPoints.
getNewCenters(center, coords, dimension);
681 for (
int i = 0; i < this->dimension; ++i){
682 WT d = (center[i] - elementCoords[i][index]);
685 distance = pow(distance, WT(1.0 / this->dimension));
686 closestPoints.
addPoint(index, distance);
702 template <
class IT,
class WT>
709 IT required_elements;
715 freeArray<KMeansCluster<IT,WT> >(clusters);
716 freeArray<WT>(maxCoordinates);
717 freeArray<WT>(minCoordinates);
726 IT required_elements_):
728 numElements(numElements_),
729 elementCoords(elementCoords_),
730 numClusters((1 << dim_) + 1),
731 required_elements(required_elements_)
733 this->clusters = allocMemory<KMeansCluster<IT,WT> >(this->numClusters);
735 for (
int i = 0; i < numClusters; ++i){
736 this->clusters[i].
setParams(this->dim, this->required_elements);
739 this->maxCoordinates = allocMemory <WT>(this->dim);
740 this->minCoordinates = allocMemory <WT>(this->dim);
743 for (
int j = 0; j < dim; ++j){
744 this->minCoordinates[j] = this->maxCoordinates[j] = this->elementCoords[j][0];
745 for(IT i = 1; i < numElements; ++i){
746 WT t = this->elementCoords[j][i];
747 if(t > this->maxCoordinates[j]){
748 this->maxCoordinates[j] = t;
750 if (t < minCoordinates[j]){
751 this->minCoordinates[j] = t;
758 for (
int j = 0; j < dim; ++j){
759 int mod = (1 << (j+1));
760 for (
int i = 0; i < numClusters - 1; ++i){
762 if ( (i % mod) < mod / 2){
763 c = this->maxCoordinates[j];
767 c = this->minCoordinates[j];
769 this->clusters[i].
center[j] = c;
774 for (
int j = 0; j < dim; ++j){
775 this->clusters[numClusters - 1].
center[j] = (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
791 for(
int it = 0; it < 10; ++it){
793 for (IT j = 0; j < this->numClusters; ++j){
796 for (IT i = 0; i < this->numElements; ++i){
798 for (IT j = 0; j < this->numClusters; ++j){
800 this->clusters[j].
getDistance(i,this->elementCoords);
804 for (IT j = 0; j < this->numClusters; ++j){
805 moved =(this->clusters[j].
getNewCenters(this->elementCoords) || moved );
821 for (IT j = 1; j < this->numClusters; ++j){
824 if(minTmpDistance < minDistance){
825 minDistance = minTmpDistance;
837 #define MINOF(a,b) (((a)<(b))?(a):(b)) 845 template <
typename T>
849 #ifdef HAVE_ZOLTAN2_OMP 850 #pragma omp parallel for 852 for(
size_t i = 0; i < arrSize; ++i){
859 #ifdef HAVE_ZOLTAN2_OMP 860 #pragma omp parallel for 862 for(
size_t i = 0; i < arrSize; ++i){
871 template <
typename part_t,
typename pcoord_t>
883 no_tasks(no_tasks_){}
886 return this->no_procs;
889 return this->no_tasks;
895 part_t *task_communication_xadj,
896 part_t *task_communication_adj,
897 pcoord_t *task_communication_edge_weight){
899 double totalCost = 0;
902 for (
part_t task = 0; task < this->no_tasks; ++task){
903 int assigned_proc = task_to_proc[task];
905 part_t task_adj_begin = task_communication_xadj[task];
906 part_t task_adj_end = task_communication_xadj[task+1];
908 commCount += task_adj_end - task_adj_begin;
910 for (
part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2){
913 part_t neighborTask = task_communication_adj[task2];
915 int neighborProc = task_to_proc[neighborTask];
917 double distance = getProcDistance(assigned_proc, neighborProc);
919 if (task_communication_edge_weight == NULL){
920 totalCost += distance ;
923 totalCost += distance * task_communication_edge_weight[task2];
936 this->commCost = totalCost;
940 return this->commCost;
943 virtual double getProcDistance(
int procId1,
int procId2)
const = 0;
951 virtual void getMapping(
953 const RCP<const Environment> &env,
954 ArrayRCP <part_t> &proc_to_task_xadj,
955 ArrayRCP <part_t> &proc_to_task_adj,
956 ArrayRCP <part_t> &task_to_proc
957 ,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
963 template <
typename pcoord_t,
typename tcoord_t,
typename part_t>
990 machine_extent(NULL),
991 machine_extent_wrap_around(NULL),
993 num_ranks_per_node(1),
994 divide_to_prime_first(false){}
1008 pcoord_t **pcoords_,
1010 tcoord_t **tcoords_,
1013 int *machine_extent_,
1014 bool *machine_extent_wrap_around_,
1018 proc_coord_dim(pcoord_dim_), proc_coords(pcoords_),
1019 task_coord_dim(tcoord_dim_), task_coords(tcoords_),
1022 machine_extent(machine_extent_),
1023 machine_extent_wrap_around(machine_extent_wrap_around_),
1025 num_ranks_per_node(1),
1026 divide_to_prime_first(false){
1031 this->partArraySize = psize;
1034 this->partNoArray = pNo;
1046 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
1048 minCoordDim, nprocs,
1049 this->proc_coords, ntasks);
1054 for(
int i = ntasks; i < nprocs; ++i){
1055 proc_permutation[i] = -1;
1079 lo = _u_umpa_seed % q;
1080 hi = _u_umpa_seed / q;
1081 test = (a*lo)-(r*hi);
1083 _u_umpa_seed = test;
1085 _u_umpa_seed = test + m;
1086 d = (double) ((
double) _u_umpa_seed / (double) m);
1087 return (
part_t) (d*(double)l);
1091 pcoord_t distance = 0;
1092 if (machine == NULL){
1093 for (
int i = 0 ; i < this->proc_coord_dim; ++i){
1094 double d =
ZOLTAN2_ABS(proc_coords[i][procId1] - proc_coords[i][procId2]);
1095 if (machine_extent_wrap_around && machine_extent_wrap_around[i]){
1096 if (machine_extent[i] - d < d){
1097 d = machine_extent[i] - d;
1104 this->machine->
getHopCount(procId1, procId2, distance);
1125 for (i=0; i<n; i+=16)
1127 u = umpa_uRandom(n-4, _u_umpa_seed);
1128 v = umpa_uRandom(n-4, _u_umpa_seed);
1142 for (i=1; i<end; i++)
1144 part_t j=umpa_uRandom(n-i, _u_umpa_seed);
1163 const RCP<const Environment> &env,
1164 ArrayRCP <part_t> &rcp_proc_to_task_xadj,
1165 ArrayRCP <part_t> &rcp_proc_to_task_adj,
1166 ArrayRCP <part_t> &rcp_task_to_proc
1167 ,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
1170 rcp_proc_to_task_xadj = ArrayRCP <part_t>(this->no_procs+1);
1171 rcp_proc_to_task_adj = ArrayRCP <part_t>(this->no_tasks);
1172 rcp_task_to_proc = ArrayRCP <part_t>(this->no_tasks);
1174 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr();
1175 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr();
1176 part_t *task_to_proc = rcp_task_to_proc.getRawPtr();
1180 fillContinousArray<part_t>(proc_to_task_xadj, this->no_procs+1, &invalid);
1183 part_t num_parts =
MINOF(this->no_procs, this->no_tasks);
1189 int recursion_depth = partArraySize;
1191 if (partArraySize == -1){
1193 if (divide_to_prime_first){
1199 recursion_depth = log(
float(this->no_procs)) / log(2.0) * 2 + 1;
1202 recursion_depth = log(
float(this->no_procs)) / log(2.0) + 1;
1207 int taskPerm = z2Fact<int>(this->task_coord_dim);
1208 int procPerm = z2Fact<int>(this->proc_coord_dim);
1210 int permutations = taskPerm * procPerm;
1213 permutations += taskPerm;
1215 permutations += procPerm;
1221 part_t *proc_xadj = allocMemory<part_t>(num_parts+1);
1224 part_t *proc_adjList = allocMemory<part_t>(this->no_procs);
1227 part_t used_num_procs = this->no_procs;
1228 if(this->no_procs > this->no_tasks){
1230 this->getClosestSubset(proc_adjList, this->no_procs, this->no_tasks);
1231 used_num_procs = this->no_tasks;
1234 fillContinousArray<part_t>(proc_adjList,this->no_procs, NULL);
1237 int myPermutation = myRank % permutations;
1238 bool task_partition_along_longest_dim =
false;
1239 bool proc_partition_along_longest_dim =
false;
1245 if (myPermutation == 0){
1246 task_partition_along_longest_dim =
true;
1247 proc_partition_along_longest_dim =
true;
1251 if (myPermutation < taskPerm){
1252 proc_partition_along_longest_dim =
true;
1253 myTaskPerm = myPermutation;
1256 myPermutation -= taskPerm;
1257 if (myPermutation < procPerm){
1258 task_partition_along_longest_dim =
true;
1259 myProcPerm = myPermutation;
1262 myPermutation -= procPerm;
1263 myProcPerm = myPermutation % procPerm;
1264 myTaskPerm = myPermutation / procPerm;
1289 int *permutation = allocMemory<int>((this->proc_coord_dim > this->task_coord_dim)
1290 ? this->proc_coord_dim : this->task_coord_dim);
1293 ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
1303 int procdim = this->proc_coord_dim;
1304 pcoord_t **pcoords = this->proc_coords;
1346 env->timerStart(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1361 proc_partition_along_longest_dim
1363 ,divide_to_prime_first
1365 env->timerStop(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1373 part_t *task_xadj = allocMemory<part_t>(num_parts+1);
1374 part_t *task_adjList = allocMemory<part_t>(this->no_tasks);
1376 fillContinousArray<part_t>(task_adjList,this->no_tasks, NULL);
1379 ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
1382 tcoord_t **tcoords = allocMemory<tcoord_t *>(this->task_coord_dim);
1383 for(
int i = 0; i < this->task_coord_dim; ++i){
1384 tcoords[i] = this->task_coords[permutation[i]];
1388 env->timerStart(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1395 this->task_coord_dim,
1402 task_partition_along_longest_dim
1404 ,divide_to_prime_first
1408 env->timerStop(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1414 freeArray<pcoord_t *>(tcoords);
1415 freeArray<int>(permutation);
1419 for(
part_t i = 0; i < num_parts; ++i){
1421 part_t proc_index_begin = proc_xadj[i];
1422 part_t task_begin_index = task_xadj[i];
1423 part_t proc_index_end = proc_xadj[i+1];
1424 part_t task_end_index = task_xadj[i+1];
1427 if(proc_index_end - proc_index_begin != 1){
1428 std::cerr <<
"Error at partitioning of processors" << std::endl;
1429 std::cerr <<
"PART:" << i <<
" is assigned to " << proc_index_end - proc_index_begin <<
" processors." << std::endl;
1432 part_t assigned_proc = proc_adjList[proc_index_begin];
1433 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
1439 part_t *proc_to_task_xadj_work = allocMemory<part_t>(this->no_procs);
1441 for(
part_t i = 0; i < this->no_procs; ++i){
1442 part_t tmp = proc_to_task_xadj[i];
1443 proc_to_task_xadj[i] = sum;
1445 proc_to_task_xadj_work[i] = sum;
1447 proc_to_task_xadj[this->no_procs] = sum;
1449 for(
part_t i = 0; i < num_parts; ++i){
1451 part_t proc_index_begin = proc_xadj[i];
1452 part_t task_begin_index = task_xadj[i];
1453 part_t task_end_index = task_xadj[i+1];
1455 part_t assigned_proc = proc_adjList[proc_index_begin];
1457 for (
part_t j = task_begin_index; j < task_end_index; ++j){
1458 part_t taskId = task_adjList[j];
1460 task_to_proc[taskId] = assigned_proc;
1462 proc_to_task_adj [ --proc_to_task_xadj_work[assigned_proc] ] = taskId;
1515 freeArray<part_t>(proc_to_task_xadj_work);
1516 freeArray<part_t>(task_xadj);
1517 freeArray<part_t>(task_adjList);
1518 freeArray<part_t>(proc_xadj);
1519 freeArray<part_t>(proc_adjList);
1524 template <
typename Adapter,
typename part_t>
1528 #ifndef DOXYGEN_SHOULD_SKIP_THIS 1530 typedef typename Adapter::scalar_t pcoord_t;
1531 typedef typename Adapter::scalar_t tcoord_t;
1532 typedef typename Adapter::scalar_t scalar_t;
1533 typedef typename Adapter::lno_t
lno_t;
1554 void doMapping(
int myRank,
const Teuchos::RCP <
const Teuchos::Comm<int> > comm_){
1556 if(this->proc_task_comm){
1560 this->proc_to_task_xadj,
1561 this->proc_to_task_adj,
1567 std::cerr <<
"communicationModel is not specified in the Mapper" << std::endl;
1579 int taskPerm = z2Fact<int>(procDim);
1580 int procPerm = z2Fact<int>(taskDim);
1581 int idealGroupSize = taskPerm * procPerm;
1583 idealGroupSize += taskPerm + procPerm + 1;
1585 int myRank = this->comm->getRank();
1586 int commSize = this->comm->getSize();
1588 int myGroupIndex = myRank / idealGroupSize;
1590 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1591 if (prevGroupBegin < 0) prevGroupBegin = 0;
1592 int myGroupBegin = myGroupIndex * idealGroupSize;
1593 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1594 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1596 if (myGroupEnd > commSize){
1597 myGroupBegin = prevGroupBegin;
1598 myGroupEnd = commSize;
1600 if (nextGroupEnd > commSize){
1601 myGroupEnd = commSize;
1603 int myGroupSize = myGroupEnd - myGroupBegin;
1605 part_t *myGroup = allocMemory<part_t>(myGroupSize);
1606 for (
int i = 0; i < myGroupSize; ++i){
1607 myGroup[i] = myGroupBegin + i;
1611 ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1613 RCP<Comm<int> > subComm = this->comm->createSubcommunicator(myGroupView);
1614 freeArray<part_t>(myGroup);
1623 RCP<Comm<int> > subComm = this->create_subCommunicator();
1627 double localCost[2], globalCost[2];
1629 localCost[0] = myCost;
1630 localCost[1] = double(subComm->getRank());
1632 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1634 reduceAll<int, double>(*subComm, reduceBest,
1635 2, localCost, globalCost);
1637 int sender = int(globalCost[1]);
1651 broadcast(*subComm, sender, this->ntasks, this->task_to_proc.getRawPtr());
1652 broadcast(*subComm, sender, this->nprocs, this->proc_to_task_xadj.getRawPtr());
1653 broadcast(*subComm, sender, this->ntasks, this->proc_to_task_adj.getRawPtr());
1660 std::ofstream gnuPlotCode(
"gnuPlot.plot", std::ofstream::out);
1663 std::string ss =
"";
1664 for(
part_t i = 0; i < this->nprocs; ++i){
1666 std::string procFile = Teuchos::toString<int>(i) +
"_mapping.txt";
1668 gnuPlotCode <<
"plot \"" << procFile <<
"\"\n";
1671 gnuPlotCode <<
"replot \"" << procFile <<
"\"\n";
1674 std::ofstream inpFile(procFile.c_str(), std::ofstream::out);
1676 std::string gnuPlotArrow =
"set arrow from ";
1677 for(
int j = 0; j < mindim; ++j){
1678 if (j == mindim - 1){
1680 gnuPlotArrow += Teuchos::toString<float>(proc_task_comm->
proc_coords[j][i]);
1684 inpFile << proc_task_comm->
proc_coords[j][i] <<
" ";
1685 gnuPlotArrow += Teuchos::toString<float>(proc_task_comm->
proc_coords[j][i]) +
",";
1688 gnuPlotArrow +=
" to ";
1691 inpFile << std::endl;
1692 ArrayView<part_t> a = this->getAssignedTasksForProc(i);
1693 for(
int k = 0; k < a.size(); ++k){
1696 std::string gnuPlotArrow2 = gnuPlotArrow;
1697 for(
int z = 0; z < mindim; ++z){
1698 if(z == mindim - 1){
1702 gnuPlotArrow2 += Teuchos::toString<float>(proc_task_comm->
task_coords[z][j]);
1705 inpFile << proc_task_comm->
task_coords[z][j] <<
" ";
1706 gnuPlotArrow2 += Teuchos::toString<float>(proc_task_comm->
task_coords[z][j]) +
",";
1709 ss += gnuPlotArrow2 +
"\n";
1710 inpFile << std::endl;
1715 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1716 gnuPlotCode.close();
1723 std::string rankStr = Teuchos::toString<int>(myRank);
1724 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
1725 std::string outF = gnuPlots + rankStr+ extentionS;
1726 std::ofstream gnuPlotCode(outF.c_str(), std::ofstream::out);
1733 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
1736 std::string ss =
"";
1737 std::string procs =
"";
1739 std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
1740 for(
part_t origin_rank = 0; origin_rank < this->nprocs; ++origin_rank){
1741 ArrayView<part_t> a = this->getAssignedTasksForProc(origin_rank);
1746 std::string gnuPlotArrow =
"set arrow from ";
1747 for(
int j = 0; j < mindim; ++j){
1748 if (j == mindim - 1){
1749 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1750 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]);
1754 gnuPlotArrow += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank]) +
",";
1755 procs += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][origin_rank])+
" ";
1760 gnuPlotArrow +=
" to ";
1763 for(
int k = 0; k < a.size(); ++k){
1764 int origin_task = a[k];
1766 for (
int nind = task_communication_xadj[origin_task]; nind < task_communication_xadj[origin_task + 1]; ++nind){
1767 int neighbor_task = task_communication_adj[nind];
1769 bool differentnode =
false;
1771 int neighbor_rank = this->getAssignedProcForTask(neighbor_task);
1773 for(
int j = 0; j < mindim; ++j){
1774 if (
int(tmpproc_task_comm->proc_coords[j][origin_rank]) !=
int(tmpproc_task_comm->proc_coords[j][neighbor_rank])){
1775 differentnode =
true;
break;
1778 std::tuple<int,int,int, int, int, int> foo(
1779 int(tmpproc_task_comm->proc_coords[0][origin_rank]),
1780 int(tmpproc_task_comm->proc_coords[1][origin_rank]),
1781 int(tmpproc_task_comm->proc_coords[2][origin_rank]),
1782 int(tmpproc_task_comm->proc_coords[0][neighbor_rank]),
1783 int(tmpproc_task_comm->proc_coords[1][neighbor_rank]),
1784 int(tmpproc_task_comm->proc_coords[2][neighbor_rank]));
1787 if (differentnode && my_arrows.find(foo) == my_arrows.end()){
1788 my_arrows.insert(foo);
1790 std::string gnuPlotArrow2 =
"";
1791 for(
int j = 0; j < mindim; ++j){
1792 if(j == mindim - 1){
1793 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]);
1796 gnuPlotArrow2 += Teuchos::toString<float>(tmpproc_task_comm->proc_coords[j][neighbor_rank]) +
",";
1799 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
1807 std::ofstream procFile(
"procPlot.plot", std::ofstream::out);
1808 procFile << procs <<
"\n";
1813 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
1815 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
1818 gnuPlotCode << ss <<
"\nreplot\n pause -1 \n";
1819 gnuPlotCode.close();
1826 const Teuchos::Comm<int> *comm_,
1829 tcoord_t **partCenters
1831 std::string file =
"gggnuPlot";
1832 std::string exten =
".plot";
1833 std::ofstream mm(
"2d.txt");
1834 file += Teuchos::toString<int>(comm_->getRank()) + exten;
1835 std::ofstream ff(file.c_str());
1839 for (
part_t i = 0; i < this->ntasks;++i){
1840 outPartBoxes[i].writeGnuPlot(ff, mm);
1843 ff <<
"plot \"2d.txt\"" << std::endl;
1847 ff <<
"splot \"2d.txt\"" << std::endl;
1852 ff <<
"set style arrow 5 nohead size screen 0.03,15,135 ls 1" << std::endl;
1853 for (
part_t i = 0; i < this->ntasks;++i){
1854 part_t pb = task_communication_xadj[i];
1855 part_t pe = task_communication_xadj[i+1];
1856 for (
part_t p = pb; p < pe; ++p){
1857 part_t n = task_communication_adj[p];
1860 std::string arrowline =
"set arrow from ";
1861 for (
int j = 0; j < coordDim - 1; ++j){
1862 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][n]) +
",";
1864 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][n]) +
" to ";
1867 for (
int j = 0; j < coordDim - 1; ++j){
1868 arrowline += Teuchos::toString<tcoord_t>(partCenters[j][i]) +
",";
1870 arrowline += Teuchos::toString<tcoord_t>(partCenters[coordDim -1][i]) +
" as 5\n";
1877 ff <<
"replot\n pause -1" << std::endl;
1885 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
1886 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
1893 mappingsoln->setMap_RankForLocalElements(local_task_to_rank);
1907 if(this->isOwnerofModel){
1908 delete this->proc_task_comm;
1913 const lno_t num_local_coords,
1914 const part_t *local_coord_parts,
1915 const ArrayRCP<part_t> task_to_proc_){
1916 local_task_to_rank = ArrayRCP <part_t>(num_local_coords);
1918 for (lno_t i = 0; i < num_local_coords; ++i){
1919 part_t local_coord_part = local_coord_parts[i];
1920 part_t rank_index = task_to_proc_[local_coord_part];
1921 local_task_to_rank[i] = rank_index;
1938 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
1940 const Teuchos::RCP <const Adapter> input_adapter_,
1942 const Teuchos::RCP <const Environment> envConst,
1943 bool is_input_adapter_distributed =
true,
1944 int num_ranks_per_node = 1,
1945 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true):
1946 PartitionMapping<Adapter>(comm_, machine_, input_adapter_, soln_, envConst),
1947 proc_to_task_xadj(0),
1948 proc_to_task_adj(0),
1950 isOwnerofModel(true),
1952 task_communication_xadj(0),
1953 task_communication_adj(0),
1954 task_communication_edge_weight(0){
1958 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
1959 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
1961 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
1962 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
1963 if (!is_input_adapter_distributed){
1964 ia_comm = Teuchos::createSerialComm<int>();
1967 RCP<const Environment> envConst_ = envConst;
1969 RCP<const ctm_base_adapter_t> baseInputAdapter_(
1970 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
1978 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
1988 baseInputAdapter_, envConst_, ia_comm,
1992 if (!machine_->hasMachineCoordinates()) {
1993 throw std::runtime_error(
"Existing machine does not provide coordinates " 1994 "for coordinate task mapping");
1998 int procDim = machine_->getMachineDim();
1999 this->nprocs = machine_->getNumRanks();
2002 pcoord_t **procCoordinates = NULL;
2003 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2004 throw std::runtime_error(
"Existing machine does not implement " 2005 "getAllMachineCoordinatesView");
2012 std::vector<int> machine_extent_vec(procDim);
2014 int *machine_extent = &(machine_extent_vec[0]);
2015 bool *machine_extent_wrap_around =
new bool[procDim];
2016 for (
int i = 0; i < procDim; ++i)machine_extent_wrap_around[i] =
false;
2017 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2025 if (machine_->getMachineExtent(machine_extent)) {
2027 this->shiftMachineCoordinates(
2030 machine_extent_wrap_around,
2038 int coordDim = coordinateModel_->getCoordinateDim();
2048 tcoord_t **partCenters = NULL;
2049 partCenters = allocMemory<tcoord_t *>(coordDim);
2050 for (
int i = 0; i < coordDim; ++i){
2051 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2054 typedef typename Adapter::scalar_t t_scalar_t;
2057 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2060 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2061 envConst.getRawPtr(),
2062 ia_comm.getRawPtr(),
2063 coordinateModel_.getRawPtr(),
2064 this->solution_parts,
2071 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2075 if (graph_model_.getRawPtr() != NULL){
2076 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2077 envConst.getRawPtr(),
2078 ia_comm.getRawPtr(),
2079 graph_model_.getRawPtr(),
2081 this->solution_parts,
2084 task_communication_xadj,
2085 task_communication_adj,
2086 task_communication_edge_weight
2091 this->proc_task_comm =
2100 machine_extent_wrap_around,
2101 machine_.getRawPtr());
2103 int myRank = comm_->getRank();
2105 this->proc_task_comm->divide_to_prime_first = divide_to_prime_first;
2108 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2109 this->doMapping(myRank, comm_);
2110 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2113 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2119 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2121 if (comm_->getRank() == 0){
2123 part_t taskCommCount = task_communication_xadj.size();
2124 std::cout <<
" TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
2125 part_t maxN = task_communication_xadj[0];
2126 for (
part_t i = 1; i <= taskCommCount; ++i){
2127 part_t nc = task_communication_xadj[i] - task_communication_xadj[i-1];
2128 if (maxN < nc) maxN = nc;
2130 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2133 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2136 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2138 if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2139 this->proc_task_comm->calculateCommunicationCost(
2140 task_to_proc.getRawPtr(),
2141 task_communication_xadj.getRawPtr(),
2142 task_communication_adj.getRawPtr(),
2143 task_communication_edge_weight.getRawPtr()
2149 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2154 this->getBestMapping();
2155 this->create_local_task_to_rank(
2156 coordinateModel_->getLocalNumCoordinates(),
2157 this->solution_parts,
2158 this->task_to_proc);
2174 this->writeMapping2(comm_->getRank());
2177 delete []machine_extent_wrap_around;
2178 if (machine_->getMachineExtent(machine_extent)){
2179 for (
int i = 0; i < procDim; ++i){
2180 delete [] procCoordinates[i];
2182 delete [] procCoordinates;
2185 for (
int i = 0; i < coordDim; ++i){
2186 freeArray<tcoord_t>(partCenters[i]);
2188 freeArray<tcoord_t *>(partCenters);
2204 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
2206 const Teuchos::RCP <const Adapter> input_adapter_,
2208 const part_t *result_parts,
2209 const Teuchos::RCP <const Environment> envConst,
2210 bool is_input_adapter_distributed =
true,
2211 int num_ranks_per_node = 1,
2212 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true):
2213 PartitionMapping<Adapter>(comm_, machine_, input_adapter_, num_parts_, result_parts, envConst),
2214 proc_to_task_xadj(0),
2215 proc_to_task_adj(0),
2217 isOwnerofModel(true),
2219 task_communication_xadj(0),
2220 task_communication_adj(0),
2221 task_communication_edge_weight(0){
2225 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
2226 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
2228 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
2229 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
2230 if (!is_input_adapter_distributed){
2231 ia_comm = Teuchos::createSerialComm<int>();
2233 RCP<const Environment> envConst_ = envConst;
2235 RCP<const ctm_base_adapter_t> baseInputAdapter_(
2236 rcp(dynamic_cast<const ctm_base_adapter_t *>(input_adapter_.getRawPtr()),
false));
2244 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
2254 baseInputAdapter_, envConst_, ia_comm,
2258 if (!machine_->hasMachineCoordinates()) {
2259 throw std::runtime_error(
"Existing machine does not provide coordinates " 2260 "for coordinate task mapping");
2264 int procDim = machine_->getMachineDim();
2265 this->nprocs = machine_->getNumRanks();
2268 pcoord_t **procCoordinates = NULL;
2269 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2270 throw std::runtime_error(
"Existing machine does not implement " 2271 "getAllMachineCoordinatesView");
2278 std::vector<int> machine_extent_vec(procDim);
2280 int *machine_extent = &(machine_extent_vec[0]);
2281 bool *machine_extent_wrap_around =
new bool[procDim];
2282 machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2290 if (machine_->getMachineExtent(machine_extent)) {
2292 this->shiftMachineCoordinates(
2295 machine_extent_wrap_around,
2303 int coordDim = coordinateModel_->getCoordinateDim();
2306 this->ntasks = num_parts_;
2307 this->solution_parts = result_parts;
2310 tcoord_t **partCenters = NULL;
2311 partCenters = allocMemory<tcoord_t *>(coordDim);
2312 for (
int i = 0; i < coordDim; ++i){
2313 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2316 typedef typename Adapter::scalar_t t_scalar_t;
2319 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2322 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2323 envConst.getRawPtr(),
2324 ia_comm.getRawPtr(),
2325 coordinateModel_.getRawPtr(),
2326 this->solution_parts,
2333 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2337 if (graph_model_.getRawPtr() != NULL){
2338 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2339 envConst.getRawPtr(),
2340 ia_comm.getRawPtr(),
2341 graph_model_.getRawPtr(),
2343 this->solution_parts,
2346 task_communication_xadj,
2347 task_communication_adj,
2348 task_communication_edge_weight
2354 envConst->timerStart(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2356 this->proc_task_comm =
2365 machine_extent_wrap_around,
2366 machine_.getRawPtr());
2368 envConst->timerStop(
MACRO_TIMERS,
"CoordinateCommunicationModel Create");
2374 int myRank = comm_->getRank();
2377 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2378 this->doMapping(myRank, comm_);
2379 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2382 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2388 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2390 if (comm_->getRank() == 0){
2392 part_t taskCommCount = task_communication_xadj.size();
2393 std::cout <<
" TotalComm:" << task_communication_xadj[taskCommCount] << std::endl;
2394 part_t maxN = task_communication_xadj[0];
2395 for (
part_t i = 1; i <= taskCommCount; ++i){
2396 part_t nc = task_communication_xadj[i] - task_communication_xadj[i-1];
2397 if (maxN < nc) maxN = nc;
2399 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2402 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2405 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2407 if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2409 task_to_proc.getRawPtr(),
2410 task_communication_xadj.getRawPtr(),
2411 task_communication_adj.getRawPtr(),
2412 task_communication_edge_weight.getRawPtr()
2418 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2423 this->getBestMapping();
2425 this->create_local_task_to_rank(
2426 coordinateModel_->getLocalNumCoordinates(),
2427 this->solution_parts,
2428 this->task_to_proc);
2446 this->writeMapping2(comm_->getRank());
2449 delete []machine_extent_wrap_around;
2450 if (machine_->getMachineExtent(machine_extent)){
2451 for (
int i = 0; i < procDim; ++i){
2452 delete [] procCoordinates[i];
2454 delete [] procCoordinates;
2457 for (
int i = 0; i < coordDim; ++i){
2458 freeArray<tcoord_t>(partCenters[i]);
2460 freeArray<tcoord_t *>(partCenters);
2512 const Teuchos::Comm<int> *problemComm,
2515 pcoord_t **machine_coords,
2519 tcoord_t **task_coords,
2520 ArrayRCP<part_t>task_comm_xadj,
2521 ArrayRCP<part_t>task_comm_adj,
2522 pcoord_t *task_communication_edge_weight_,
2523 int recursion_depth,
2525 const part_t *machine_dimensions,
2526 int num_ranks_per_node = 1,
2527 bool divide_to_prime_first =
false,
bool reduce_best_mapping =
true 2531 proc_to_task_xadj(0),
2532 proc_to_task_adj(0),
2534 isOwnerofModel(true),
2536 task_communication_xadj(task_comm_xadj),
2537 task_communication_adj(task_comm_adj){
2540 pcoord_t ** virtual_machine_coordinates = machine_coords;
2541 bool *wrap_arounds =
new bool [proc_dim];
2542 for (
int i = 0; i < proc_dim; ++i) wrap_arounds[i] =
true;
2544 if (machine_dimensions){
2545 virtual_machine_coordinates =
2546 this->shiftMachineCoordinates(
2554 this->nprocs = num_processors;
2556 int coordDim = task_dim;
2557 this->ntasks = num_tasks;
2560 tcoord_t **partCenters = task_coords;
2563 this->proc_task_comm =
2566 virtual_machine_coordinates,
2570 this->ntasks, NULL, NULL
2579 int myRank = problemComm->getRank();
2581 this->doMapping(myRank, this->comm);
2583 this->writeMapping2(myRank);
2586 if (reduce_best_mapping && task_communication_xadj.getRawPtr() && task_communication_adj.getRawPtr()){
2588 task_to_proc.getRawPtr(),
2589 task_communication_xadj.getRawPtr(),
2590 task_communication_adj.getRawPtr(),
2591 task_communication_edge_weight_
2595 this->getBestMapping();
2610 delete [] wrap_arounds;
2612 if (machine_dimensions){
2613 for (
int i = 0; i < proc_dim; ++i){
2614 delete [] virtual_machine_coordinates[i];
2616 delete [] virtual_machine_coordinates;
2619 if(problemComm->getRank() == 0)
2620 this->writeMapping2(-1);
2647 const part_t *machine_dimensions,
2648 bool *machine_extent_wrap_around,
2650 pcoord_t **mCoords){
2651 pcoord_t **result_machine_coords = NULL;
2652 result_machine_coords =
new pcoord_t*[machine_dim];
2653 for (
int i = 0; i < machine_dim; ++i){
2654 result_machine_coords[i] =
new pcoord_t [numProcs];
2657 for (
int i = 0; i < machine_dim; ++i){
2658 part_t numMachinesAlongDim = machine_dimensions[i];
2659 part_t *machineCounts=
new part_t[numMachinesAlongDim];
2660 memset(machineCounts, 0,
sizeof(
part_t) *numMachinesAlongDim);
2662 int *filledCoordinates=
new int[numMachinesAlongDim];
2664 pcoord_t *coords = mCoords[i];
2665 for(
part_t j = 0; j < numProcs; ++j){
2667 ++machineCounts[mc];
2670 part_t filledCoordinateCount = 0;
2671 for(
part_t j = 0; j < numMachinesAlongDim; ++j){
2672 if (machineCounts[j] > 0){
2673 filledCoordinates[filledCoordinateCount++] = j;
2677 part_t firstProcCoord = filledCoordinates[0];
2678 part_t firstProcCount = machineCounts[firstProcCoord];
2680 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
2681 part_t lastProcCount = machineCounts[lastProcCoord];
2683 part_t firstLastGap = numMachinesAlongDim - lastProcCoord + firstProcCoord;
2684 part_t firstLastGapProc = lastProcCount + firstProcCount;
2686 part_t leftSideProcCoord = firstProcCoord;
2687 part_t leftSideProcCount = firstProcCount;
2689 part_t biggestGapProc = numProcs;
2691 part_t shiftBorderCoordinate = -1;
2692 for(
part_t j = 1; j < filledCoordinateCount; ++j){
2693 part_t rightSideProcCoord= filledCoordinates[j];
2694 part_t rightSideProcCount = machineCounts[rightSideProcCoord];
2696 part_t gap = rightSideProcCoord - leftSideProcCoord;
2697 part_t gapProc = rightSideProcCount + leftSideProcCount;
2702 if (gap > biggestGap || (gap == biggestGap && biggestGapProc > gapProc)){
2703 shiftBorderCoordinate = rightSideProcCoord;
2704 biggestGapProc = gapProc;
2707 leftSideProcCoord = rightSideProcCoord;
2708 leftSideProcCount = rightSideProcCount;
2712 if (!(biggestGap > firstLastGap || (biggestGap == firstLastGap && biggestGapProc < firstLastGapProc))){
2713 shiftBorderCoordinate = -1;
2716 for(
part_t j = 0; j < numProcs; ++j){
2718 if (machine_extent_wrap_around[i] && coords[j] < shiftBorderCoordinate){
2719 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
2723 result_machine_coords[i][j] = coords[j];
2727 delete [] machineCounts;
2728 delete [] filledCoordinates;
2731 return result_machine_coords;
2743 procs = this->task_to_proc.getRawPtr() + taskId;
2749 return this->task_to_proc[taskId];
2759 part_t task_begin = this->proc_to_task_xadj[procId];
2760 part_t taskend = this->proc_to_task_xadj[procId+1];
2761 parts = this->proc_to_task_adj.getRawPtr() + task_begin;
2762 numParts = taskend - task_begin;
2766 part_t task_begin = this->proc_to_task_xadj[procId];
2767 part_t taskend = this->proc_to_task_xadj[procId+1];
2775 if (taskend - task_begin > 0){
2776 ArrayView <part_t> assignedParts(this->proc_to_task_adj.getRawPtr() + task_begin, taskend - task_begin);
2777 return assignedParts;
2780 ArrayView <part_t> assignedParts;
2781 return assignedParts;
2857 template <
typename part_t,
typename pcoord_t,
typename tcoord_t>
2859 RCP<
const Teuchos::Comm<int> > problemComm,
2862 pcoord_t **machine_coords,
2865 tcoord_t **task_coords,
2868 pcoord_t *task_communication_edge_weight_,
2869 part_t *proc_to_task_xadj,
2870 part_t *proc_to_task_adj,
2871 int recursion_depth,
2873 const part_t *machine_dimensions,
2874 int num_ranks_per_node = 1,
2875 bool divide_to_prime_first =
false 2884 typedef Tpetra::MultiVector<tcoord_t, part_t, part_t>
tMVector_t;
2886 Teuchos::ArrayRCP<part_t> task_communication_xadj(task_comm_xadj, 0, num_tasks+1,
false);
2888 Teuchos::ArrayRCP<part_t> task_communication_adj;
2889 if (task_comm_xadj){
2890 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj(task_comm_adj, 0, task_comm_xadj[num_tasks],
false);
2891 task_communication_adj = tmp_task_communication_adj;
2898 problemComm.getRawPtr(),
2907 task_communication_xadj,
2908 task_communication_adj,
2909 task_communication_edge_weight_,
2914 divide_to_prime_first
2918 part_t* proc_to_task_xadj_;
2919 part_t* proc_to_task_adj_;
2921 ctm->
getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
2923 for (
part_t i = 0; i <= num_processors; ++i){
2924 proc_to_task_xadj[i] = proc_to_task_xadj_[i];
2926 for (
part_t i = 0; i < num_tasks; ++i){
2927 proc_to_task_adj[i] = proc_to_task_adj_[i];
2934 template <
typename proc_coord_t,
typename v_lno_t>
2936 const int machine_coord_dim,
const int num_ranks, proc_coord_t **machine_coords,
2937 const v_lno_t num_tasks,
const v_lno_t *task_communication_xadj,
const v_lno_t *task_communication_adj,
const int *task_to_rank){
2939 std::string rankStr = Teuchos::toString<int>(myRank);
2940 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
2941 std::string outF = gnuPlots + rankStr+ extentionS;
2942 std::ofstream gnuPlotCode( outF.c_str(), std::ofstream::out);
2944 if (machine_coord_dim != 3) {
2945 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
2948 std::string ss =
"";
2949 std::string procs =
"";
2951 std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
2952 for(v_lno_t origin_task = 0; origin_task < num_tasks; ++origin_task){
2953 int origin_rank = task_to_rank[origin_task];
2954 std::string gnuPlotArrow =
"set arrow from ";
2956 for(
int j = 0; j < machine_coord_dim; ++j){
2957 if (j == machine_coord_dim - 1){
2958 gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2959 procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
2963 gnuPlotArrow += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]) +
",";
2964 procs += Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank])+
" ";
2969 gnuPlotArrow +=
" to ";
2972 for (
int nind = task_communication_xadj[origin_task]; nind < task_communication_xadj[origin_task + 1]; ++nind){
2973 int neighbor_task = task_communication_adj[nind];
2975 bool differentnode =
false;
2976 int neighbor_rank = task_to_rank[neighbor_task];
2978 for(
int j = 0; j < machine_coord_dim; ++j){
2979 if (
int(machine_coords[j][origin_rank]) != int(machine_coords[j][neighbor_rank])){
2980 differentnode =
true;
break;
2983 std::tuple<int,int,int, int, int, int> foo(
2984 (
int)(machine_coords[0][origin_rank]),
2985 (
int)(machine_coords[1][origin_rank]),
2986 (
int)(machine_coords[2][origin_rank]),
2987 (
int)(machine_coords[0][neighbor_rank]),
2988 (
int)(machine_coords[1][neighbor_rank]),
2989 (
int)(machine_coords[2][neighbor_rank]));
2992 if (differentnode && my_arrows.find(foo) == my_arrows.end()){
2993 my_arrows.insert(foo);
2995 std::string gnuPlotArrow2 =
"";
2996 for(
int j = 0; j < machine_coord_dim; ++j){
2997 if(j == machine_coord_dim - 1){
2998 gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]);
3001 gnuPlotArrow2 += Teuchos::toString<float>(machine_coords[j][neighbor_rank]) +
",";
3004 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
3009 std::ofstream procFile(
"procPlot.plot", std::ofstream::out);
3010 procFile << procs <<
"\n";
3014 if(machine_coord_dim == 2){
3015 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
3017 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
3020 gnuPlotCode << ss <<
"\nreplot\n pause -1\npause -1";
3021 gnuPlotCode.close();
void setParams(int dimension_, int heapsize)
CommunicationModel Base Class that performs mapping between the coordinate partitioning result...
Zoltan2::BaseAdapter< userTypes_t > base_adapter_t
void getBestMapping()
finds the lowest cost mapping and broadcasts solution to everyone.
Tpetra::CrsMatrix< zscalar_t, zlno_t, zgno_t, znode_t > tcrsMatrix_t
void ithPermutation(const IT n, IT i, IT *perm)
CommunicationModel(part_t no_procs_, part_t no_tasks_)
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const part_t num_parts_, const part_t *result_parts, const Teuchos::RCP< const Environment > envConst, bool is_input_adapter_distributed=true, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor. Instead of Solution we have two parameters, numparts When this constructor is called...
virtual ~CommunicationModel()
size_t getLocalNumCoordinates() const
Returns the number of coordinates on this process.
Time an algorithm (or other entity) as a whole.
bool * machine_extent_wrap_around
void setPartArraySize(int psize)
WT getDistance(IT index, WT **elementCoords)
virtual size_t getLocalNumberOfParts() const
Returns the number of parts to be assigned to this process.
bool getHopCount(int rank1, int rank2, pcoord_t &hops) const
CoordinateCommunicationModel(int pcoord_dim_, pcoord_t **pcoords_, int tcoord_dim_, tcoord_t **tcoords_, part_t no_procs_, part_t no_tasks_, int *machine_extent_, bool *machine_extent_wrap_around_, const MachineRepresentation< pcoord_t, part_t > *machine_=NULL)
Class Constructor:
void getCoarsenedPartGraph(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::GraphModel< typename Adapter::base_adapter_t > *graph, part_t np, const part_t *parts, ArrayRCP< part_t > &g_part_xadj, ArrayRCP< part_t > &g_part_adj, ArrayRCP< scalar_t > &g_part_ew)
void getClosestSubset(part_t *proc_permutation, part_t nprocs, part_t ntasks) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
void visualize_mapping(int myRank, const int machine_coord_dim, const int num_ranks, proc_coord_t **machine_coords, const v_lno_t num_tasks, const v_lno_t *task_communication_xadj, const v_lno_t *task_communication_adj, const int *task_to_rank)
PartitionMapping maps a solution or an input distribution to ranks.
ArrayRCP< part_t > local_task_to_rank
void getSolutionCenterCoordinates(const Environment *envConst, const Teuchos::Comm< int > *comm, const Zoltan2::CoordinateModel< typename Adapter::base_adapter_t > *coords, const part_t *parts, int coordDim, part_t ntasks, scalar_t **partCenters)
size_t getLocalNumVertices() const
Returns the number vertices on this process.
size_t getEdgeList(ArrayView< const gno_t > &edgeIds, ArrayView< const offset_t > &offsets, ArrayView< input_t > &wgts) const
Sets pointers to this process' edge (neighbor) global Ids, including off-process edges.
bool getNewCenters(WT **coords)
void timerStop(TimerType tt, const char *timerName) const
Stop a named timer.
std::bitset< NUM_MODEL_FLAGS > modelFlag_t
size_t getLocalNumEdges() const
Returns the number of edges on this process. In global or subset graphs, includes off-process edges...
virtual ~CoordinateTaskMapper()
void sequential_task_partitioning(const RCP< const Environment > &env, mj_lno_t num_total_coords, mj_lno_t num_selected_coords, size_t num_target_part, int coord_dim, mj_scalar_t **mj_coordinates, mj_lno_t *initial_selected_coords_output_permutation, mj_lno_t *output_xadj, int recursion_depth, const mj_part_t *part_no_array, bool partition_along_longest_dim, int num_ranks_per_node, bool divide_to_prime_first_)
Special function for partitioning for task mapping. Runs sequential, and performs deterministic parti...
void calculateCommunicationCost(part_t *task_to_proc, part_t *task_communication_xadj, part_t *task_communication_adj, pcoord_t *task_communication_edge_weight)
ArrayView< part_t > getAssignedTasksForProc(part_t procId)
const MachineRepresentation< pcoord_t, part_t > * machine
part_t getAssignedProcForTask(part_t taskId)
getAssignedProcForTask function, returns the assigned processor id for the given task ...
void getMinDistanceCluster(IT *procPermutation)
void doMapping(int myRank, const Teuchos::RCP< const Teuchos::Comm< int > > comm_)
doMapping function, calls getMapping function of communicationModel object.
virtual void getPartsForProc(int procId, part_t &numParts, part_t *&parts) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
Defines the XpetraMultiVectorAdapter.
SparseMatrixAdapter_t::part_t part_t
Multi Jagged coordinate partitioning algorithm.
virtual void getProcsForPart(part_t taskId, part_t &numProcs, part_t *&procs) const
getAssignedProcForTask function, returns the assigned tasks with the number of tasks.
void create_local_task_to_rank(const lno_t num_local_coords, const part_t *local_coord_parts, const ArrayRCP< part_t > task_to_proc_)
This class provides geometric coordinates with optional weights to the Zoltan2 algorithm.
virtual void map(const RCP< MappingSolution< Adapter > > &mappingsoln)
Mapping method.
int getNumWeightsPerEdge() const
Returns the number (0 or greater) of weights per edge.
Contains the Multi-jagged algorthm.
PartitionMapping maps a solution or an input distribution to ranks.
void getProcTask(part_t *&proc_to_task_xadj_, part_t *&proc_to_task_adj_)
A PartitioningSolution is a solution to a partitioning problem.
size_t getCoordinates(ArrayView< const gno_t > &Ids, ArrayView< input_t > &xyz, ArrayView< input_t > &wgts) const
Returns the coordinate ids, values and optional weights.
void copyCoordinates(IT *permutation)
void getGridCommunicationGraph(part_t taskCount, part_t *&task_comm_xadj, part_t *&task_comm_adj, std::vector< int > grid_dims)
Zoltan2_ReduceBestMapping Class, reduces the minimum cost mapping, ties breaks with minimum proc id...
ArrayRCP< part_t > task_communication_adj
BaseAdapterType
An enum to identify general types of adapters.
virtual void getMapping(int myRank, const RCP< const Environment > &env, ArrayRCP< part_t > &rcp_proc_to_task_xadj, ArrayRCP< part_t > &rcp_proc_to_task_adj, ArrayRCP< part_t > &rcp_task_to_proc, const Teuchos::RCP< const Teuchos::Comm< int > > comm_) const
Function is called whenever nprocs > no_task. Function returns only the subset of processors that are...
The StridedData class manages lists of weights or coordinates.
CoordinateTaskMapper(const Environment *env_const_, const Teuchos::Comm< int > *problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, ArrayRCP< part_t >task_comm_xadj, ArrayRCP< part_t >task_comm_adj, pcoord_t *task_communication_edge_weight_, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor The mapping constructor which will also perform the mapping operation. The result mapping can be obtained by –getAssignedProcForTask function: which returns the assigned processor id for the given task –getPartsForProc: which returns the assigned tasks with the number of tasks.
void setPartArray(part_t *pNo)
ArrayRCP< part_t > proc_to_task_adj
The user parameters, debug, timing and memory profiling output objects, and error checking methods...
static part_t umpa_uRandom(part_t l, int &_u_umpa_seed)
void reduce(const Ordinal count, const T inBuffer[], T inoutBuffer[]) const
Implement Teuchos::ValueTypeReductionOp interface.
void addPoint(IT index, WT distance)
bool getNewCenters(WT *center, WT **coords, int dimension)
MachineRepresentation Class Base class for representing machine coordinates, networks, etc.
size_t getVertexList(ArrayView< const gno_t > &Ids, ArrayView< input_t > &wgts) const
Sets pointers to this process' vertex Ids and their weights.
CoordinateTaskMapper(const Teuchos::RCP< const Teuchos::Comm< int > > comm_, const Teuchos::RCP< const MachineRepresentation< pcoord_t, part_t > > machine_, const Teuchos::RCP< const Adapter > input_adapter_, const Teuchos::RCP< const Zoltan2::PartitioningSolution< Adapter > > soln_, const Teuchos::RCP< const Environment > envConst, bool is_input_adapter_distributed=true, int num_ranks_per_node=1, bool divide_to_prime_first=false, bool reduce_best_mapping=true)
Constructor. When this constructor is called, in order to calculate the communication metric...
CoordinateModelInput Class that performs mapping between the coordinate partitioning result and mpi r...
Traits class to handle conversions between gno_t/lno_t and TPL data types (e.g., ParMETIS's idx_t...
double getCommunicationCostMetric()
size_t getActualGlobalNumberOfParts() const
Returns the actual global number of parts provided in setParts().
pcoord_t ** shiftMachineCoordinates(int machine_dim, const part_t *machine_dimensions, bool *machine_extent_wrap_around, part_t numProcs, pcoord_t **mCoords)
Using the machine dimensions provided, create virtual machine coordinates by assigning the largest ga...
Defines the MappingSolution class.
ArrayRCP< part_t > task_communication_xadj
const part_t * getPartListView() const
Returns the part list corresponding to the global ID list.
GraphModel defines the interface required for graph models.
Gathering definitions used in software development.
void update_visit_order(part_t *visitOrder, part_t n, int &_u_umpa_seed, part_t rndm)
Zoltan2_ReduceBestMapping()
Default Constructor.
bool divide_to_prime_first
size_t getTargetGlobalNumberOfParts() const
Returns the global number of parts desired in the solution.
void timerStart(TimerType tt, const char *timerName) const
Start a named timer.
KmeansHeap Class, max heap, but holds the minimum values.
Defines the GraphModel interface.
void fillContinousArray(T *arr, size_t arrSize, T *val)
fillContinousArray function
#define ZOLTAN2_ALGMULTIJAGGED_SWAP(a, b, temp)
void push_down(IT index_on_heap)
ArrayRCP< scalar_t > task_communication_edge_weight
virtual ~CoordinateCommunicationModel()
RCP< Comm< int > > create_subCommunicator()
creates and returns the subcommunicator for the processor group.
KMeansAlgorithm Class that performs clustering of the coordinates, and returns the closest set of coo...
ArrayRCP< part_t > task_to_proc
KMeansAlgorithm(int dim_, IT numElements_, WT **elementCoords_, IT required_elements_)
KMeansAlgorithm Constructor.
virtual double getProcDistance(int procId1, int procId2) const
void writeMapping2(int myRank)
void copyCoordinates(IT *permutation)
void coordinateTaskMapperInterface(RCP< const Teuchos::Comm< int > > problemComm, int proc_dim, int num_processors, pcoord_t **machine_coords, int task_dim, part_t num_tasks, tcoord_t **task_coords, part_t *task_comm_xadj, part_t *task_comm_adj, pcoord_t *task_communication_edge_weight_, part_t *proc_to_task_xadj, part_t *proc_to_task_adj, int recursion_depth, part_t *part_no_array, const part_t *machine_dimensions, int num_ranks_per_node=1, bool divide_to_prime_first=false)
Constructor The interface function that calls CoordinateTaskMapper which will also perform the mappin...
CoordinateCommunicationModel()
ArrayRCP< part_t > proc_to_task_xadj
void setHeapsize(IT heapsize_)
CoordinateCommunicationModel< pcoord_t, tcoord_t, part_t > * proc_task_comm