1 #ifndef _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
2 #define _ZOLTAN2_COORD_PARTITIONMAPPING_HPP_
12 #include "Teuchos_ArrayViewDecl.hpp"
15 #include "Teuchos_ReductionOp.hpp"
23 #include "Teuchos_Comm.hpp"
24 #ifdef HAVE_ZOLTAN2_MPI
25 #include "Teuchos_DefaultMpiComm.hpp"
26 #endif // HAVE_ZOLTAN2_MPI
27 #include <Teuchos_DefaultSerialComm.hpp>
38 template <
typename Ordinal,
typename T>
54 T inoutBuffer[])
const {
56 for (Ordinal i = 0; i < count; i++) {
57 if (inBuffer[0] - inoutBuffer[0] < -_EPSILON) {
58 inoutBuffer[0] = inBuffer[0];
59 inoutBuffer[1] = inBuffer[1];
61 else if (inBuffer[0] - inoutBuffer[0] < _EPSILON &&
62 inBuffer[1] - inoutBuffer[1] < _EPSILON) {
63 inoutBuffer[0] = inBuffer[0];
64 inoutBuffer[1] = inBuffer[1];
75 template <
typename it>
77 return (x == 1 ? x : x * z2Fact<it>(x - 1));
80 template <
typename gno_t,
typename part_t>
90 template <
typename IT>
99 fact[k] = fact[k - 1] * k;
102 for (k = 0; k < n; ++k)
104 perm[k] = i / fact[n - 1 - k];
105 i = i % fact[n - 1 - k];
110 for (k = n - 1; k > 0; --k)
111 for (j = k - 1; j >= 0; --j)
112 if (perm[j] <= perm[k])
118 template <
typename part_t>
122 std::vector<int> grid_dims) {
124 int dim = grid_dims.size();
125 int neighborCount = 2 * dim;
126 task_comm_xadj = allocMemory<part_t>(taskCount + 1);
127 task_comm_adj = allocMemory<part_t>(taskCount * neighborCount);
130 task_comm_xadj[0] = 0;
132 for (
part_t i = 0; i < taskCount; ++i) {
135 for (
int j = 0; j < dim; ++j) {
136 part_t lNeighbor = i - prevDimMul;
137 part_t rNeighbor = i + prevDimMul;
138 prevDimMul *= grid_dims[j];
140 if (lNeighbor >= 0 &&
141 lNeighbor / prevDimMul == i / prevDimMul &&
142 lNeighbor < taskCount) {
143 task_comm_adj[neighBorIndex++] = lNeighbor;
146 if (rNeighbor >= 0 &&
147 rNeighbor / prevDimMul == i / prevDimMul &&
148 rNeighbor < taskCount) {
149 task_comm_adj[neighBorIndex++] = rNeighbor;
153 task_comm_xadj[i + 1] = neighBorIndex;
158 template <
typename Adapter,
typename scalar_t,
typename part_t>
161 const Teuchos::Comm<int> *comm,
167 scalar_t **partCenters) {
169 typedef typename Adapter::lno_t lno_t;
170 typedef typename Adapter::gno_t gno_t;
173 ArrayView<const gno_t> gnos;
174 ArrayView<input_t> xyz;
175 ArrayView<input_t> wgts;
183 gno_t *point_counts = allocMemory<gno_t>(ntasks);
184 memset(point_counts, 0,
sizeof(gno_t) * ntasks);
187 gno_t *global_point_counts = allocMemory<gno_t>(ntasks);
189 scalar_t **multiJagged_coordinates = allocMemory<scalar_t *>(coordDim);
191 for (
int dim = 0; dim < coordDim; dim++) {
192 ArrayRCP<const scalar_t> ar;
193 xyz[dim].getInputArray(ar);
196 multiJagged_coordinates[dim] = (scalar_t *)ar.getRawPtr();
197 memset(partCenters[dim], 0,
sizeof(scalar_t) * ntasks);
205 for (lno_t i = 0; i < numLocalCoords; i++) {
209 for (
int j = 0; j < coordDim; ++j) {
210 scalar_t c = multiJagged_coordinates[j][i];
211 partCenters[j][p] += c;
217 reduceAll<int, gno_t>(*comm, Teuchos::REDUCE_SUM,
218 ntasks, point_counts, global_point_counts);
220 for (
int j = 0; j < coordDim; ++j) {
221 for (
part_t i = 0; i < ntasks; ++i) {
222 if (global_point_counts[i] > 0)
223 partCenters[j][i] /= global_point_counts[i];
227 scalar_t *tmpCoords = allocMemory<scalar_t>(ntasks);
228 for (
int j = 0; j < coordDim; ++j) {
229 reduceAll<int, scalar_t>(*comm, Teuchos::REDUCE_SUM,
230 ntasks, partCenters[j], tmpCoords);
232 scalar_t *tmp = partCenters[j];
233 partCenters[j] = tmpCoords;
239 freeArray<gno_t>(point_counts);
240 freeArray<gno_t>(global_point_counts);
242 freeArray<scalar_t>(tmpCoords);
243 freeArray<scalar_t *>(multiJagged_coordinates);
247 template <
typename Adapter,
typename scalar_t,
typename part_t>
250 const Teuchos::Comm<int> *comm,
255 ArrayRCP<part_t> &g_part_xadj,
256 ArrayRCP<part_t> &g_part_adj,
257 ArrayRCP<scalar_t> &g_part_ew) {
259 typedef typename Adapter::lno_t t_lno_t;
260 typedef typename Adapter::gno_t t_gno_t;
261 typedef typename Adapter::scalar_t t_scalar_t;
262 typedef typename Adapter::offset_t t_offset_t;
285 ArrayView<const t_gno_t> Ids;
286 ArrayView<t_input_t> v_wghts;
290 ArrayView<const t_gno_t> edgeIds;
291 ArrayView<const t_offset_t> offsets;
292 ArrayView<t_input_t> e_wgts;
295 std::vector<t_scalar_t> edge_weights;
298 if (numWeightPerEdge > 0) {
299 edge_weights = std::vector<t_scalar_t>(localNumEdges);
301 for (t_lno_t i = 0; i < localNumEdges; ++i) {
302 edge_weights[i] = e_wgts[0][i];
308 std::vector<part_t> e_parts(localNumEdges);
310 #ifdef HAVE_ZOLTAN2_MPI
311 if (comm->getSize() > 1)
315 const bool bUseLocalIDs =
false;
317 const RCP<const Comm<int> > rcp_comm(comm,
false);
319 directory_t directory(rcp_comm, bUseLocalIDs, debug_level);
321 directory.update(localNumVertices, &Ids[0], NULL, &parts[0],
322 NULL, directory_t::Update_Mode::Replace);
324 directory.find(localNumEdges, &edgeIds[0], NULL, &e_parts[0],
337 for (t_lno_t i = 0; i < localNumEdges; ++i) {
338 t_gno_t ei = edgeIds[i];
344 std::vector<t_lno_t> part_begins(np, -1);
345 std::vector<t_lno_t> part_nexts(localNumVertices, -1);
349 for (t_lno_t i = 0; i < localNumVertices; ++i) {
351 part_nexts[i] = part_begins[ap];
355 g_part_xadj = ArrayRCP<part_t>(np + 1);
356 g_part_adj = ArrayRCP<part_t>(localNumEdges);
357 g_part_ew = ArrayRCP<t_scalar_t>(localNumEdges);
362 std::vector<part_t> part_neighbors(np);
363 std::vector<t_scalar_t> part_neighbor_weights(np, 0);
364 std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
367 for (t_lno_t i = 0; i < np; ++i) {
368 part_t num_neighbor_parts = 0;
369 t_lno_t v = part_begins[i];
374 for (t_offset_t j = offsets[v]; j < offsets[v + 1]; ++j) {
379 if (numWeightPerEdge > 0) {
380 ew = edge_weights[j];
388 if (part_neighbor_weights[ep] < 0.00001) {
389 part_neighbors[num_neighbor_parts++] = ep;
392 part_neighbor_weights[ep] += ew;
400 for (t_lno_t j = 0; j < num_neighbor_parts; ++j) {
401 part_t neighbor_part = part_neighbors[j];
402 g_part_adj[nindex] = neighbor_part;
403 g_part_ew[nindex++] = part_neighbor_weights[neighbor_part];
404 part_neighbor_weights[neighbor_part] = 0;
406 g_part_xadj[i + 1] = nindex;
415 part_info() : weight(0) {
418 const part_info & operator+=(
const part_info & src) {
419 this->weight += src.weight;
423 bool operator>(
const part_info & src) {
424 return (destination_part > src.destination_part);
427 bool operator==(
const part_info & src) {
428 return (destination_part == src.destination_part);
438 bool bUseLocalIDs =
false;
439 const int debug_level = 0;
440 const RCP<const Comm<int> > rcp_comm(comm,
false);
442 directory_t directory(rcp_comm, bUseLocalIDs, debug_level);
444 std::vector<part_t> part_data;
445 std::vector<std::vector<part_info>> user_data;
450 std::vector<t_lno_t> part_begins(np, -1);
451 std::vector<t_lno_t> part_nexts(localNumVertices, -1);
455 for (t_lno_t i = 0; i < localNumVertices; ++i) {
457 part_nexts[i] = part_begins[ap];
461 std::vector<part_t> part_neighbors(np);
462 std::vector<t_scalar_t> part_neighbor_weights(np, 0);
463 std::vector<t_scalar_t> part_neighbor_weights_ordered(np);
466 for (t_lno_t i = 0; i < np; ++i) {
467 part_t num_neighbor_parts = 0;
468 t_lno_t v = part_begins[i];
473 for (t_offset_t j = offsets[v]; j < offsets[v + 1]; ++j) {
478 if (numWeightPerEdge > 0) {
479 ew = edge_weights[j];
483 if (part_neighbor_weights[ep] < 0.00001) {
484 part_neighbors[num_neighbor_parts++] = ep;
487 part_neighbor_weights[ep] += ew;
494 for (t_lno_t j = 0; j < num_neighbor_parts; ++j) {
495 part_t neighbor_part = part_neighbors[j];
496 part_neighbor_weights_ordered[j] =
497 part_neighbor_weights[neighbor_part];
498 part_neighbor_weights[neighbor_part] = 0;
502 if (num_neighbor_parts > 0) {
504 part_data.push_back(i);
505 std::vector<part_info> new_user_data(num_neighbor_parts);
507 for (
int q = 0; q < num_neighbor_parts; ++q) {
508 part_info & info = new_user_data[q];
509 info.weight = part_neighbor_weights_ordered[q];
510 info.destination_part = part_neighbors[q];
513 user_data.push_back(new_user_data);
519 std::vector<part_t> part_indices(np);
521 for (
part_t i = 0; i < np; ++i) part_indices[i] = i;
524 directory.update(part_data.size(), &part_data[0], NULL, &user_data[0],
525 NULL, directory_t::Update_Mode::AggregateAdd);
529 std::vector<std::vector<part_info>> find_user_data(part_indices.size());
530 directory.find(find_user_data.size(), &part_indices[0], NULL,
531 &find_user_data[0], NULL, NULL,
false);
541 int get_total_length = 0;
542 for (
size_t n = 0; n < find_user_data.size(); ++n) {
543 get_total_length += find_user_data[n].size();
547 g_part_xadj = ArrayRCP<part_t>(np + 1);
548 g_part_adj = ArrayRCP<part_t>(get_total_length);
549 g_part_ew = ArrayRCP<t_scalar_t>(get_total_length);
552 int track_insert_index = 0;
553 for (
size_t n = 0; n < find_user_data.size(); ++n) {
554 g_part_xadj[n] = track_insert_index;
555 const std::vector<part_info> & user_data_vector = find_user_data[n];
557 for (
size_t q = 0; q < user_data_vector.size(); ++q) {
558 const part_info & info = user_data_vector[q];
559 g_part_adj[track_insert_index] = info.destination_part;
560 g_part_ew[track_insert_index] = info.weight;
561 ++track_insert_index;
564 g_part_xadj[np] = get_total_length;
603 template <
class IT,
class WT>
614 freeArray<IT>(this->indices);
615 freeArray<WT>(this->values);
619 this->heapSize = heapsize_;
620 this->indices = allocMemory<IT>(heapsize_ );
621 this->values = allocMemory<WT>(heapsize_ );
627 WT maxVal = this->values[0];
633 if (distance >= maxVal)
636 this->values[0] = distance;
637 this->indices[0] = index;
644 IT child_index1 = 2 * index_on_heap + 1;
645 IT child_index2 = 2 * index_on_heap + 2;
648 if (child_index1 < this->heapSize && child_index2 < this->heapSize) {
650 if (this->values[child_index1] < this->values[child_index2]) {
651 biggerIndex = child_index2;
654 biggerIndex = child_index1;
657 else if (child_index1 < this->heapSize) {
658 biggerIndex = child_index1;
660 else if (child_index2 < this->heapSize) {
661 biggerIndex = child_index2;
664 if (biggerIndex >= 0 &&
665 this->values[biggerIndex] > this->values[index_on_heap]) {
666 WT tmpVal = this->values[biggerIndex];
667 this->values[biggerIndex] = this->values[index_on_heap];
668 this->values[index_on_heap] = tmpVal;
670 IT tmpIndex = this->indices[biggerIndex];
671 this->indices[biggerIndex] = this->indices[index_on_heap];
672 this->indices[index_on_heap] = tmpIndex;
678 WT MAXVAL = std::numeric_limits<WT>::max();
680 for (IT i = 0; i < this->heapSize; ++i) {
681 this->values[i] = MAXVAL;
682 this->indices[i] = -1;
690 for (IT j = 0; j < this->heapSize; ++j) {
691 nc += this->values[j];
704 for (
int i = 0; i < dimension; ++i) {
707 for (IT j = 0; j < this->heapSize; ++j) {
708 IT k = this->indices[j];
716 nc /= this->heapSize;
717 moved = (
ZOLTAN2_ABS(center[i] - nc) > this->_EPSILON || moved );
725 for (IT i = 0; i < this->heapSize; ++i) {
726 permutation[i] = this->indices[i];
733 template <
class IT,
class WT>
748 this->dimension = dimension_;
749 this->center = allocMemory<WT>(dimension_);
766 for (
int i = 0; i < this->dimension; ++i) {
767 WT d = (
center[i] - elementCoords[i][index]);
771 distance = pow(distance, WT(1.0 / this->dimension));
772 closestPoints.
addPoint(index, distance);
792 template <
class IT,
class WT>
799 IT required_elements;
807 freeArray<KMeansCluster<IT,WT> >(clusters);
808 freeArray<WT>(maxCoordinates);
809 freeArray<WT>(minCoordinates);
818 IT required_elements_):
820 numElements(numElements_),
821 elementCoords(elementCoords_),
822 numClusters((1 << dim_) + 1),
823 required_elements(required_elements_) {
825 this->clusters = allocMemory<KMeansCluster<IT,WT> >(this->numClusters);
828 for (
int i = 0; i < numClusters; ++i) {
829 this->clusters[i].
setParams(this->dim, this->required_elements);
832 this->maxCoordinates = allocMemory <WT>(this->dim);
833 this->minCoordinates = allocMemory <WT>(this->dim);
836 for (
int j = 0; j < dim; ++j) {
837 this->minCoordinates[j] = this->elementCoords[j][0];
838 this->maxCoordinates[j] = this->elementCoords[j][0];
840 for (IT i = 1; i < numElements; ++i) {
841 WT t = this->elementCoords[j][i];
843 if (t > this->maxCoordinates[j]) {
844 this->maxCoordinates[j] = t;
847 if (t < minCoordinates[j]) {
848 this->minCoordinates[j] = t;
855 for (
int j = 0; j < dim; ++j) {
856 int mod = (1 << (j + 1));
858 for (
int i = 0; i < numClusters - 1; ++i) {
861 if ( (i % mod) < mod / 2) {
862 c = this->maxCoordinates[j];
867 c = this->minCoordinates[j];
870 this->clusters[i].
center[j] = c;
875 for (
int j = 0; j < dim; ++j) {
876 this->clusters[numClusters - 1].
center[j] =
877 (this->maxCoordinates[j] + this->minCoordinates[j]) / 2;
894 for (
int it = 0; it < 10; ++it) {
897 for (IT j = 0; j < this->numClusters; ++j) {
901 for (IT i = 0; i < this->numElements; ++i) {
904 for (IT j = 0; j < this->numClusters; ++j) {
908 this->clusters[j].
getDistance(i,this->elementCoords);
914 for (IT j = 0; j < this->numClusters; ++j) {
916 (this->clusters[j].
getNewCenters(this->elementCoords) || moved );
935 for (IT j = 1; j < this->numClusters; ++j) {
942 if (minTmpDistance < minDistance) {
943 minDistance = minTmpDistance;
954 #define MINOF(a,b) (((a)<(b))?(a):(b))
963 template <
typename T>
967 #ifdef HAVE_ZOLTAN2_OMP
968 #pragma omp parallel for
970 for (
size_t i = 0; i < arrSize; ++i) {
977 #ifdef HAVE_ZOLTAN2_OMP
978 #pragma omp parallel for
980 for (
size_t i = 0; i < arrSize; ++i) {
990 template <
typename part_t,
typename pcoord_t>
1021 part_t *task_communication_xadj,
1022 part_t *task_communication_adj,
1023 pcoord_t *task_communication_edge_weight) {
1025 double totalCost = 0;
1029 int assigned_proc = task_to_proc[task];
1031 part_t task_adj_begin = task_communication_xadj[task];
1032 part_t task_adj_end = task_communication_xadj[task + 1];
1034 commCount += task_adj_end - task_adj_begin;
1036 for (
part_t task2 = task_adj_begin; task2 < task_adj_end; ++task2) {
1038 part_t neighborTask = task_communication_adj[task2];
1039 int neighborProc = task_to_proc[neighborTask];
1042 if (task_communication_edge_weight == NULL) {
1043 totalCost += distance ;
1046 totalCost += distance * task_communication_edge_weight[task2];
1052 this->commCost = totalCost;
1072 const RCP<const Environment> &env,
1073 ArrayRCP <part_t> &proc_to_task_xadj,
1074 ArrayRCP <part_t> &proc_to_task_adj,
1075 ArrayRCP <part_t> &task_to_proc,
1076 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
1085 template <
typename pcoord_t,
typename tcoord_t,
typename part_t>
1139 pcoord_t **pcoords_,
1141 tcoord_t **tcoords_,
1144 int *machine_extent_,
1145 bool *machine_extent_wrap_around_,
1162 this->partArraySize = psize;
1166 this->partNoArray = pNo;
1182 part_t minCoordDim =
MINOF(this->task_coord_dim, this->proc_coord_dim);
1184 minCoordDim, nprocs,
1185 this->proc_coords, ntasks);
1190 for (
int i = ntasks; i < nprocs; ++i) {
1191 proc_permutation[i] = -1;
1219 lo = _u_umpa_seed % q;
1220 hi = _u_umpa_seed / q;
1221 test = (a * lo) - (r * hi);
1224 _u_umpa_seed = test;
1226 _u_umpa_seed = test + m;
1228 d = (double) ((
double) _u_umpa_seed / (double) m);
1230 return (
part_t) (d*(double)l);
1234 pcoord_t distance = 0;
1248 this->machine->
getHopCount(procId1, procId2, distance);
1257 int &_u_umpa_seed,
part_t rndm) {
1269 for (i = 0; i < n; i += 16) {
1285 for (i = 1; i < end; i++) {
1306 const RCP<const Environment> &env,
1307 ArrayRCP <part_t> &rcp_proc_to_task_xadj,
1308 ArrayRCP <part_t> &rcp_proc_to_task_adj,
1309 ArrayRCP <part_t> &rcp_task_to_proc,
1310 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_
1313 rcp_proc_to_task_xadj = ArrayRCP <part_t>(this->
no_procs + 1);
1314 rcp_proc_to_task_adj = ArrayRCP <part_t>(this->
no_tasks);
1315 rcp_task_to_proc = ArrayRCP <part_t>(this->
no_tasks);
1318 part_t *proc_to_task_xadj = rcp_proc_to_task_xadj.getRawPtr();
1321 part_t *proc_to_task_adj = rcp_proc_to_task_adj.getRawPtr();
1324 part_t *task_to_proc = rcp_task_to_proc.getRawPtr();
1327 fillContinousArray<part_t>(proc_to_task_xadj, this->
no_procs + 1,
1353 recursion_depth = log(
float(this->
no_procs)) / log(2.0) * 2 + 1;
1356 recursion_depth = log(
float(this->
no_procs)) / log(2.0) + 1;
1361 int taskPerm = z2Fact<int>(this->task_coord_dim);
1363 int procPerm = z2Fact<int>(this->proc_coord_dim);
1366 int permutations = taskPerm * procPerm;
1370 permutations += taskPerm;
1374 permutations += procPerm;
1381 part_t *proc_xadj = allocMemory<part_t>(num_parts + 1);
1399 fillContinousArray<part_t>(proc_adjList,this->
no_procs, NULL);
1403 int myPermutation = myRank % permutations;
1404 bool task_partition_along_longest_dim =
false;
1405 bool proc_partition_along_longest_dim =
false;
1411 if (myPermutation == 0) {
1412 task_partition_along_longest_dim =
true;
1413 proc_partition_along_longest_dim =
true;
1417 if (myPermutation < taskPerm) {
1418 proc_partition_along_longest_dim =
true;
1420 myTaskPerm = myPermutation;
1423 myPermutation -= taskPerm;
1425 if (myPermutation < procPerm) {
1426 task_partition_along_longest_dim =
true;
1428 myProcPerm = myPermutation;
1431 myPermutation -= procPerm;
1433 myProcPerm = myPermutation % procPerm;
1435 myTaskPerm = myPermutation / procPerm;
1461 allocMemory<int>((this->proc_coord_dim > this->task_coord_dim) ?
1462 this->proc_coord_dim : this->task_coord_dim);
1465 ithPermutation<int>(this->proc_coord_dim, myProcPerm, permutation);
1533 part_t num_group_count = 1;
1534 part_t *group_count = NULL;
1537 num_group_count =
machine->getNumUniqueGroups();
1539 if (num_group_count > 1) {
1540 group_count =
new part_t[num_group_count];
1541 memset(group_count, 0,
sizeof(
part_t) * num_group_count);
1543 machine->getGroupCount(group_count);
1547 env->timerStart(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1563 proc_partition_along_longest_dim,
1569 env->timerStop(
MACRO_TIMERS,
"Mapping - Proc Partitioning");
1575 part_t *task_xadj = allocMemory<part_t>(num_parts + 1);
1579 fillContinousArray<part_t>(task_adjList,this->
no_tasks, NULL);
1582 ithPermutation<int>(this->task_coord_dim, myTaskPerm, permutation);
1585 tcoord_t **tcoords = allocMemory<tcoord_t *>(this->task_coord_dim);
1587 tcoords[i] = this->task_coords[permutation[i]];
1590 env->timerStart(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1597 this->task_coord_dim,
1604 task_partition_along_longest_dim,
1612 env->timerStop(
MACRO_TIMERS,
"Mapping - Task Partitioning");
1619 freeArray<pcoord_t *>(tcoords);
1620 freeArray<int>(permutation);
1623 for (
part_t i = 0; i < num_parts; ++i) {
1625 part_t proc_index_begin = proc_xadj[i];
1626 part_t task_begin_index = task_xadj[i];
1627 part_t proc_index_end = proc_xadj[i + 1];
1628 part_t task_end_index = task_xadj[i + 1];
1630 if (proc_index_end - proc_index_begin != 1) {
1631 std::cerr <<
"Error at partitioning of processors" << std::endl;
1632 std::cerr <<
"PART:" << i <<
" is assigned to "
1633 << proc_index_end - proc_index_begin <<
" processors."
1637 part_t assigned_proc = proc_adjList[proc_index_begin];
1638 proc_to_task_xadj[assigned_proc] = task_end_index - task_begin_index;
1643 part_t *proc_to_task_xadj_work = allocMemory<part_t>(this->
no_procs);
1646 part_t tmp = proc_to_task_xadj[i];
1647 proc_to_task_xadj[i] = sum;
1649 proc_to_task_xadj_work[i] = sum;
1651 proc_to_task_xadj[this->
no_procs] = sum;
1653 for (
part_t i = 0; i < num_parts; ++i) {
1655 part_t proc_index_begin = proc_xadj[i];
1656 part_t task_begin_index = task_xadj[i];
1657 part_t task_end_index = task_xadj[i + 1];
1659 part_t assigned_proc = proc_adjList[proc_index_begin];
1661 for (
part_t j = task_begin_index; j < task_end_index; ++j) {
1662 part_t taskId = task_adjList[j];
1664 task_to_proc[taskId] = assigned_proc;
1666 proc_to_task_adj[--proc_to_task_xadj_work[assigned_proc]] = taskId;
1723 freeArray<part_t>(proc_to_task_xadj_work);
1724 freeArray<part_t>(task_xadj);
1725 freeArray<part_t>(task_adjList);
1726 freeArray<part_t>(proc_xadj);
1727 freeArray<part_t>(proc_adjList);
1732 template <
typename Adapter,
typename part_t>
1736 #ifndef DOXYGEN_SHOULD_SKIP_THIS
1738 typedef typename Adapter::scalar_t pcoord_t;
1739 typedef typename Adapter::scalar_t tcoord_t;
1740 typedef typename Adapter::scalar_t
scalar_t;
1741 typedef typename Adapter::lno_t
lno_t;
1776 const Teuchos::RCP<
const Teuchos::Comm<int> > comm_) {
1778 if (this->proc_task_comm) {
1783 this->proc_to_task_xadj,
1785 this->proc_to_task_adj,
1792 std::cerr <<
"communicationModel is not specified in the Mapper"
1807 int taskPerm = z2Fact<int>(procDim);
1809 int procPerm = z2Fact<int>(taskDim);
1811 int idealGroupSize = taskPerm * procPerm;
1814 idealGroupSize += taskPerm + procPerm + 1;
1816 int myRank = this->
comm->getRank();
1817 int commSize = this->
comm->getSize();
1819 int myGroupIndex = myRank / idealGroupSize;
1821 int prevGroupBegin = (myGroupIndex - 1)* idealGroupSize;
1822 if (prevGroupBegin < 0) prevGroupBegin = 0;
1823 int myGroupBegin = myGroupIndex * idealGroupSize;
1824 int myGroupEnd = (myGroupIndex + 1) * idealGroupSize;
1825 int nextGroupEnd = (myGroupIndex + 2)* idealGroupSize;
1827 if (myGroupEnd > commSize) {
1828 myGroupBegin = prevGroupBegin;
1829 myGroupEnd = commSize;
1831 if (nextGroupEnd > commSize) {
1832 myGroupEnd = commSize;
1834 int myGroupSize = myGroupEnd - myGroupBegin;
1836 part_t *myGroup = allocMemory<part_t>(myGroupSize);
1837 for (
int i = 0; i < myGroupSize; ++i) {
1838 myGroup[i] = myGroupBegin + i;
1843 ArrayView<const part_t> myGroupView(myGroup, myGroupSize);
1845 RCP<Comm<int> > subComm =
1846 this->
comm->createSubcommunicator(myGroupView);
1847 freeArray<part_t>(myGroup);
1862 double localCost[2], globalCost[2];
1864 localCost[0] = myCost;
1865 localCost[1] = double(subComm->getRank());
1867 globalCost[1] = globalCost[0] = std::numeric_limits<double>::max();
1869 reduceAll<int, double>(*subComm, reduceBest,
1870 2, localCost, globalCost);
1872 int sender = int(globalCost[1]);
1889 broadcast(*subComm, sender, this->ntasks,
1890 this->task_to_proc.getRawPtr());
1891 broadcast(*subComm, sender, this->nprocs,
1892 this->proc_to_task_xadj.getRawPtr());
1893 broadcast(*subComm, sender, this->ntasks,
1894 this->proc_to_task_adj.getRawPtr());
1899 std::ofstream gnuPlotCode(
"gnuPlot.plot", std::ofstream::out);
1903 std::string ss =
"";
1906 std::string procFile = Teuchos::toString<int>(i) +
"_mapping.txt";
1908 gnuPlotCode <<
"plot \"" << procFile <<
"\"\n";
1911 gnuPlotCode <<
"replot \"" << procFile <<
"\"\n";
1914 std::ofstream inpFile(procFile.c_str(), std::ofstream::out);
1916 std::string gnuPlotArrow =
"set arrow from ";
1917 for (
int j = 0; j < mindim; ++j) {
1918 if (j == mindim - 1) {
1928 proc_coords[j][i]) +
",";
1931 gnuPlotArrow +=
" to ";
1933 inpFile << std::endl;
1936 for (
int k = 0; k < a.size(); ++k) {
1939 std::string gnuPlotArrow2 = gnuPlotArrow;
1940 for (
int z = 0; z < mindim; ++z) {
1941 if (z == mindim - 1) {
1953 task_coords[z][j]) +
",";
1956 ss += gnuPlotArrow2 +
"\n";
1957 inpFile << std::endl;
1962 gnuPlotCode <<
"\nreplot\n pause -1 \n";
1963 gnuPlotCode.close();
1968 std::string rankStr = Teuchos::toString<int>(myRank);
1969 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
1970 std::string outF = gnuPlots + rankStr+ extentionS;
1971 std::ofstream gnuPlotCode(outF.c_str(), std::ofstream::out);
1974 *tmpproc_task_comm =
1983 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
1986 std::string ss =
"";
1987 std::string procs =
"";
1989 std::set < std::tuple<int,int,int,int,int,int> > my_arrows;
1990 for (
part_t origin_rank = 0; origin_rank < this->
nprocs; ++origin_rank) {
1992 if (a.size() == 0) {
1996 std::string gnuPlotArrow =
"set arrow from ";
1997 for (
int j = 0; j < mindim; ++j) {
1998 if (j == mindim - 1) {
2000 Teuchos::toString<float>(tmpproc_task_comm->
2001 proc_coords[j][origin_rank]);
2003 Teuchos::toString<float>(tmpproc_task_comm->
2004 proc_coords[j][origin_rank]);
2009 Teuchos::toString<float>(tmpproc_task_comm->
2010 proc_coords[j][origin_rank]) +
",";
2012 Teuchos::toString<float>(tmpproc_task_comm->
2013 proc_coords[j][origin_rank])+
" ";
2018 gnuPlotArrow +=
" to ";
2021 for (
int k = 0; k < a.size(); ++k) {
2022 int origin_task = a[k];
2028 bool differentnode =
false;
2032 for (
int j = 0; j < mindim; ++j) {
2033 if (
int(tmpproc_task_comm->
proc_coords[j][origin_rank]) !=
2034 int(tmpproc_task_comm->
proc_coords[j][neighbor_rank])) {
2035 differentnode =
true;
break;
2038 std::tuple<int,int,int, int, int, int> foo(
2039 int(tmpproc_task_comm->
proc_coords[0][origin_rank]),
2040 int(tmpproc_task_comm->
proc_coords[1][origin_rank]),
2041 int(tmpproc_task_comm->
proc_coords[2][origin_rank]),
2042 int(tmpproc_task_comm->
proc_coords[0][neighbor_rank]),
2043 int(tmpproc_task_comm->
proc_coords[1][neighbor_rank]),
2044 int(tmpproc_task_comm->
proc_coords[2][neighbor_rank]));
2047 if (differentnode && my_arrows.find(foo) == my_arrows.end()) {
2048 my_arrows.insert(foo);
2050 std::string gnuPlotArrow2 =
"";
2051 for (
int j = 0; j < mindim; ++j) {
2052 if (j == mindim - 1) {
2054 Teuchos::toString<float>(tmpproc_task_comm->
2055 proc_coords[j][neighbor_rank]);
2059 Teuchos::toString<float>(tmpproc_task_comm->
2060 proc_coords[j][neighbor_rank]) +
",";
2063 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
2069 std::ofstream procFile(
"procPlot.plot", std::ofstream::out);
2070 procFile << procs <<
"\n";
2075 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
2077 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
2080 gnuPlotCode << ss <<
"\nreplot\n pause -1 \n";
2081 gnuPlotCode.close();
2088 const Teuchos::Comm<int> *comm_,
2091 tcoord_t **partCenters) {
2093 std::string file =
"gggnuPlot";
2094 std::string exten =
".plot";
2095 std::ofstream mm(
"2d.txt");
2096 file += Teuchos::toString<int>(comm_->getRank()) + exten;
2097 std::ofstream ff(file.c_str());
2099 std::vector<Zoltan2::coordinateModelPartBox <tcoord_t, part_t> >
2105 outPartBoxes[i].writeGnuPlot(ff, mm);
2107 if (coordDim == 2) {
2108 ff <<
"plot \"2d.txt\"" << std::endl;
2112 ff <<
"splot \"2d.txt\"" << std::endl;
2117 ff <<
"set style arrow 5 nohead size screen 0.03,15,135 ls 1"
2124 for (
part_t p = pb; p < pe; ++p) {
2128 std::string arrowline =
"set arrow from ";
2129 for (
int j = 0; j < coordDim - 1; ++j) {
2131 Teuchos::toString<tcoord_t>(partCenters[j][n]) +
",";
2134 Teuchos::toString<tcoord_t>(partCenters[coordDim - 1][n]) +
2137 for (
int j = 0; j < coordDim - 1; ++j) {
2139 Teuchos::toString<tcoord_t>(partCenters[j][i]) +
",";
2142 Teuchos::toString<tcoord_t>(partCenters[coordDim - 1][i]) +
2150 ff <<
"replot\n pause -1" << std::endl;
2158 part_t* &proc_to_task_adj_) {
2159 proc_to_task_xadj_ = this->proc_to_task_xadj.getRawPtr();
2160 proc_to_task_adj_ = this->proc_to_task_adj.getRawPtr();
2181 if (this->isOwnerofModel) {
2187 const lno_t num_local_coords,
2188 const part_t *local_coord_parts,
2189 const ArrayRCP<part_t> task_to_proc_) {
2192 for (
lno_t i = 0; i < num_local_coords; ++i) {
2193 part_t local_coord_part = local_coord_parts[i];
2194 part_t rank_index = task_to_proc_[local_coord_part];
2213 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
2216 const Teuchos::RCP <const Adapter> input_adapter_,
2219 const Teuchos::RCP <const Environment> envConst,
2220 bool is_input_adapter_distributed =
true,
2221 int num_ranks_per_node = 1,
2222 bool divide_to_prime_first =
false,
2223 bool reduce_best_mapping =
true):
2238 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
2239 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
2241 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
2242 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
2243 if (!is_input_adapter_distributed) {
2244 ia_comm = Teuchos::createSerialComm<int>();
2247 RCP<const Environment> envConst_ = envConst;
2249 RCP<const ctm_base_adapter_t> baseInputAdapter_(
2250 rcp(
dynamic_cast<const ctm_base_adapter_t *
>(
2251 input_adapter_.getRawPtr()),
false));
2259 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
2269 baseInputAdapter_, envConst_, ia_comm,
2273 if (!machine_->hasMachineCoordinates()) {
2274 throw std::runtime_error(
"Existing machine does not provide "
2275 "coordinates for coordinate task mapping");
2279 int procDim = machine_->getMachineDim();
2280 this->nprocs = machine_->getNumRanks();
2283 pcoord_t **procCoordinates = NULL;
2284 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2285 throw std::runtime_error(
"Existing machine does not implement "
2286 "getAllMachineCoordinatesView");
2293 std::vector<int> machine_extent_vec(procDim);
2295 int *machine_extent = &(machine_extent_vec[0]);
2296 bool *machine_extent_wrap_around =
new bool[procDim];
2297 for (
int i = 0; i < procDim; ++i)
2298 machine_extent_wrap_around[i] =
false;
2300 bool haveWrapArounds = machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2306 if (machine_->getMachineExtent(machine_extent) &&
2313 machine_extent_wrap_around,
2320 int coordDim = coordinateModel_->getCoordinateDim();
2331 tcoord_t **partCenters = NULL;
2332 partCenters = allocMemory<tcoord_t *>(coordDim);
2333 for (
int i = 0; i < coordDim; ++i) {
2334 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2337 typedef typename Adapter::scalar_t t_scalar_t;
2340 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2343 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2344 envConst.getRawPtr(),
2345 ia_comm.getRawPtr(),
2346 coordinateModel_.getRawPtr(),
2347 this->solution_parts,
2354 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2357 if (graph_model_.getRawPtr() != NULL) {
2358 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2359 envConst.getRawPtr(),
2360 ia_comm.getRawPtr(),
2361 graph_model_.getRawPtr(),
2363 this->solution_parts,
2373 this->proc_task_comm =
2383 machine_extent_wrap_around,
2384 machine_.getRawPtr());
2386 int myRank = comm_->getRank();
2390 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2392 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2394 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2401 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2403 if (comm_->getRank() == 0) {
2406 std::cout <<
" TotalComm:"
2409 for (
part_t i = 1; i <= taskCommCount; ++i) {
2415 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2418 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2421 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2436 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2443 coordinateModel_->getLocalNumCoordinates(),
2444 this->solution_parts,
2445 this->task_to_proc);
2466 delete [] machine_extent_wrap_around;
2468 if (machine_->getMachineExtent(machine_extent) &&
2470 for (
int i = 0; i < procDim; ++i) {
2471 delete [] procCoordinates[i];
2473 delete [] procCoordinates;
2476 for (
int i = 0; i < coordDim; ++i) {
2477 freeArray<tcoord_t>(partCenters[i]);
2479 freeArray<tcoord_t *>(partCenters);
2501 const Teuchos::RCP <
const Teuchos::Comm<int> > comm_,
2504 const Teuchos::RCP <const Adapter> input_adapter_,
2506 const part_t *result_parts,
2507 const Teuchos::RCP <const Environment> envConst,
2508 bool is_input_adapter_distributed =
true,
2509 int num_ranks_per_node = 1,
2510 bool divide_to_prime_first =
false,
2511 bool reduce_best_mapping =
true):
2513 num_parts_, result_parts, envConst),
2526 RCP<Zoltan2::GraphModel<ctm_base_adapter_t> > graph_model_;
2527 RCP<Zoltan2::CoordinateModel<ctm_base_adapter_t> > coordinateModel_ ;
2529 RCP<const Teuchos::Comm<int> > rcp_comm = comm_;
2530 RCP<const Teuchos::Comm<int> > ia_comm = rcp_comm;
2531 if (!is_input_adapter_distributed) {
2532 ia_comm = Teuchos::createSerialComm<int>();
2534 RCP<const Environment> envConst_ = envConst;
2536 RCP<const ctm_base_adapter_t> baseInputAdapter_(
2537 rcp(
dynamic_cast<const ctm_base_adapter_t *
>(
2538 input_adapter_.getRawPtr()),
false));
2546 baseInputAdapter_, envConst_, ia_comm, coordFlags_));
2556 baseInputAdapter_, envConst_, ia_comm,
2560 if (!machine_->hasMachineCoordinates()) {
2561 throw std::runtime_error(
"Existing machine does not provide "
2562 "coordinates for coordinate task mapping.");
2566 int procDim = machine_->getMachineDim();
2567 this->nprocs = machine_->getNumRanks();
2570 pcoord_t **procCoordinates = NULL;
2571 if (!machine_->getAllMachineCoordinatesView(procCoordinates)) {
2572 throw std::runtime_error(
"Existing machine does not implement "
2573 "getAllMachineCoordinatesView");
2581 std::vector<int> machine_extent_vec(procDim);
2583 int *machine_extent = &(machine_extent_vec[0]);
2584 bool *machine_extent_wrap_around =
new bool[procDim];
2585 bool haveWrapArounds = machine_->getMachineExtentWrapArounds(machine_extent_wrap_around);
2591 if (machine_->getMachineExtent(machine_extent) &&
2597 machine_extent_wrap_around,
2604 int coordDim = coordinateModel_->getCoordinateDim();
2609 this->ntasks = num_parts_;
2613 tcoord_t **partCenters = NULL;
2614 partCenters = allocMemory<tcoord_t *>(coordDim);
2615 for (
int i = 0; i < coordDim; ++i) {
2616 partCenters[i] = allocMemory<tcoord_t>(this->ntasks);
2619 typedef typename Adapter::scalar_t t_scalar_t;
2622 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Solution Center");
2625 getSolutionCenterCoordinates<Adapter, t_scalar_t,part_t>(
2626 envConst.getRawPtr(),
2627 ia_comm.getRawPtr(),
2628 coordinateModel_.getRawPtr(),
2629 this->solution_parts,
2636 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Solution Center");
2640 if (graph_model_.getRawPtr() != NULL) {
2641 getCoarsenedPartGraph<Adapter, t_scalar_t, part_t>(
2642 envConst.getRawPtr(),
2643 ia_comm.getRawPtr(),
2644 graph_model_.getRawPtr(),
2646 this->solution_parts,
2657 "CoordinateCommunicationModel Create");
2659 this->proc_task_comm =
2669 machine_extent_wrap_around,
2670 machine_.getRawPtr());
2673 "CoordinateCommunicationModel Create");
2679 int myRank = comm_->getRank();
2682 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Processor Task map");
2684 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Processor Task map");
2687 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Graph");
2694 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Graph");
2696 if (comm_->getRank() == 0) {
2699 std::cout <<
" TotalComm:"
2702 for (
part_t i = 1; i <= taskCommCount; ++i) {
2708 std::cout <<
" maxNeighbor:" << maxN << std::endl;
2711 this->writeGnuPlot(comm_, soln_, coordDim, partCenters);
2714 envConst->timerStart(
MACRO_TIMERS,
"Mapping - Communication Cost");
2729 envConst->timerStop(
MACRO_TIMERS,
"Mapping - Communication Cost");
2737 coordinateModel_->getLocalNumCoordinates(),
2738 this->solution_parts,
2739 this->task_to_proc);
2761 delete [] machine_extent_wrap_around;
2763 if (machine_->getMachineExtent(machine_extent) &&
2765 for (
int i = 0; i < procDim; ++i) {
2766 delete [] procCoordinates[i];
2768 delete [] procCoordinates;
2771 for (
int i = 0; i < coordDim; ++i) {
2772 freeArray<tcoord_t>(partCenters[i]);
2775 freeArray<tcoord_t *>(partCenters);
2845 const Teuchos::Comm<int> *problemComm,
2848 pcoord_t **machine_coords,
2851 tcoord_t **task_coords,
2852 ArrayRCP<part_t>task_comm_xadj,
2853 ArrayRCP<part_t>task_comm_adj,
2854 pcoord_t *task_communication_edge_weight_,
2855 int recursion_depth,
2857 const part_t *machine_dimensions,
2858 int num_ranks_per_node = 1,
2859 bool divide_to_prime_first =
false,
2860 bool reduce_best_mapping =
true):
2873 pcoord_t ** virtual_machine_coordinates = machine_coords;
2874 bool *wrap_arounds =
new bool [proc_dim];
2875 for (
int i = 0; i < proc_dim; ++i) wrap_arounds[i] =
true;
2877 if (machine_dimensions) {
2878 virtual_machine_coordinates =
2887 this->nprocs = num_processors;
2889 int coordDim = task_dim;
2890 this->ntasks = num_tasks;
2893 tcoord_t **partCenters = task_coords;
2896 this->proc_task_comm =
2899 virtual_machine_coordinates,
2903 this->ntasks, NULL, NULL
2912 int myRank = problemComm->getRank();
2925 task_communication_edge_weight_
2945 delete [] wrap_arounds;
2947 if (machine_dimensions) {
2948 for (
int i = 0; i < proc_dim; ++i) {
2949 delete [] virtual_machine_coordinates[i];
2951 delete [] virtual_machine_coordinates;
2954 if (problemComm->getRank() == 0)
2984 const part_t *machine_dimensions,
2985 bool *machine_extent_wrap_around,
2987 pcoord_t **mCoords) {
2989 pcoord_t **result_machine_coords = NULL;
2990 result_machine_coords =
new pcoord_t*[machine_dim];
2992 for (
int i = 0; i < machine_dim; ++i) {
2993 result_machine_coords[i] =
new pcoord_t [numProcs];
2996 for (
int i = 0; i < machine_dim; ++i) {
2997 part_t numMachinesAlongDim = machine_dimensions[i];
2999 part_t *machineCounts =
new part_t[numMachinesAlongDim];
3000 memset(machineCounts, 0,
sizeof(
part_t) * numMachinesAlongDim);
3002 int *filledCoordinates =
new int[numMachinesAlongDim];
3004 pcoord_t *coords = mCoords[i];
3006 for (
part_t j = 0; j < numProcs; ++j) {
3008 ++machineCounts[mc];
3011 part_t filledCoordinateCount = 0;
3012 for (
part_t j = 0; j < numMachinesAlongDim; ++j) {
3013 if (machineCounts[j] > 0) {
3014 filledCoordinates[filledCoordinateCount++] = j;
3018 part_t firstProcCoord = filledCoordinates[0];
3019 part_t firstProcCount = machineCounts[firstProcCoord];
3021 part_t lastProcCoord = filledCoordinates[filledCoordinateCount - 1];
3022 part_t lastProcCount = machineCounts[lastProcCoord];
3025 numMachinesAlongDim - lastProcCoord + firstProcCoord;
3026 part_t firstLastGapProc = lastProcCount + firstProcCount;
3028 part_t leftSideProcCoord = firstProcCoord;
3029 part_t leftSideProcCount = firstProcCount;
3031 part_t biggestGapProc = numProcs;
3033 part_t shiftBorderCoordinate = -1;
3034 for (
part_t j = 1; j < filledCoordinateCount; ++j) {
3035 part_t rightSideProcCoord= filledCoordinates[j];
3036 part_t rightSideProcCount = machineCounts[rightSideProcCoord];
3038 part_t gap = rightSideProcCoord - leftSideProcCoord;
3039 part_t gapProc = rightSideProcCount + leftSideProcCount;
3045 if (gap > biggestGap ||
3046 (gap == biggestGap && biggestGapProc > gapProc)) {
3047 shiftBorderCoordinate = rightSideProcCoord;
3048 biggestGapProc = gapProc;
3051 leftSideProcCoord = rightSideProcCoord;
3052 leftSideProcCount = rightSideProcCount;
3056 if (!(biggestGap > firstLastGap ||
3057 (biggestGap == firstLastGap &&
3058 biggestGapProc < firstLastGapProc))) {
3059 shiftBorderCoordinate = -1;
3062 for (
part_t j = 0; j < numProcs; ++j) {
3064 if (machine_extent_wrap_around[i] &&
3065 coords[j] < shiftBorderCoordinate) {
3066 result_machine_coords[i][j] = coords[j] + numMachinesAlongDim;
3070 result_machine_coords[i][j] = coords[j];
3073 delete [] machineCounts;
3074 delete [] filledCoordinates;
3077 return result_machine_coords;
3091 procs = this->task_to_proc.getRawPtr() + taskId;
3099 return this->task_to_proc[taskId];
3112 part_t task_begin = this->proc_to_task_xadj[procId];
3113 part_t taskend = this->proc_to_task_xadj[procId + 1];
3115 parts = this->proc_to_task_adj.getRawPtr() + task_begin;
3116 numParts = taskend - task_begin;
3120 part_t task_begin = this->proc_to_task_xadj[procId];
3121 part_t taskend = this->proc_to_task_xadj[procId + 1];
3132 if (taskend - task_begin > 0) {
3133 ArrayView <part_t> assignedParts(
3134 this->proc_to_task_adj.getRawPtr() + task_begin,
3135 taskend - task_begin);
3137 return assignedParts;
3140 ArrayView <part_t> assignedParts;
3142 return assignedParts;
3226 template <
typename part_t,
typename pcoord_t,
typename tcoord_t>
3228 RCP<
const Teuchos::Comm<int> > problemComm,
3231 pcoord_t **machine_coords,
3234 tcoord_t **task_coords,
3239 pcoord_t *task_communication_edge_weight_,
3242 int recursion_depth,
3244 const part_t *machine_dimensions,
3245 int num_ranks_per_node = 1,
3246 bool divide_to_prime_first =
false) {
3253 typedef Tpetra::MultiVector<tcoord_t, part_t, part_t>
tMVector_t;
3256 task_comm_xadj, 0, num_tasks + 1,
false);
3259 if (task_comm_xadj) {
3260 Teuchos::ArrayRCP<part_t> tmp_task_communication_adj(
3261 task_comm_adj, 0, task_comm_xadj[num_tasks],
false);
3270 problemComm.getRawPtr(),
3282 task_communication_edge_weight_,
3287 divide_to_prime_first);
3290 part_t* proc_to_task_xadj_;
3291 part_t* proc_to_task_adj_;
3293 ctm->
getProcTask(proc_to_task_xadj_, proc_to_task_adj_);
3295 for (
part_t i = 0; i <= num_processors; ++i) {
3299 for (
part_t i = 0; i < num_tasks; ++i) {
3307 template <
typename proc_coord_t,
typename v_lno_t>
3309 const int machine_coord_dim,
3310 const int num_ranks,
3311 proc_coord_t **machine_coords,
3312 const v_lno_t num_tasks,
3315 const int *task_to_rank) {
3317 std::string rankStr = Teuchos::toString<int>(myRank);
3318 std::string gnuPlots =
"gnuPlot", extentionS =
".plot";
3319 std::string outF = gnuPlots + rankStr+ extentionS;
3320 std::ofstream gnuPlotCode( outF.c_str(), std::ofstream::out);
3322 if (machine_coord_dim != 3) {
3323 std::cerr <<
"Mapping Write is only good for 3 dim" << std::endl;
3326 std::string ss =
"";
3327 std::string procs =
"";
3329 std::set<std::tuple<int, int, int, int, int, int> > my_arrows;
3331 for (v_lno_t origin_task = 0; origin_task < num_tasks; ++origin_task) {
3332 int origin_rank = task_to_rank[origin_task];
3333 std::string gnuPlotArrow =
"set arrow from ";
3335 for (
int j = 0; j < machine_coord_dim; ++j) {
3336 if (j == machine_coord_dim - 1) {
3338 Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
3340 Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank]);
3345 Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank])
3348 Teuchos::toString<proc_coord_t>(machine_coords[j][origin_rank])
3354 gnuPlotArrow +=
" to ";
3362 bool differentnode =
false;
3363 int neighbor_rank = task_to_rank[neighbor_task];
3365 for (
int j = 0; j < machine_coord_dim; ++j) {
3366 if (
int(machine_coords[j][origin_rank]) !=
3367 int(machine_coords[j][neighbor_rank])) {
3368 differentnode =
true;
break;
3372 std::tuple<int,int,int, int, int, int> foo(
3373 (
int)(machine_coords[0][origin_rank]),
3374 (
int)(machine_coords[1][origin_rank]),
3375 (
int)(machine_coords[2][origin_rank]),
3376 (
int)(machine_coords[0][neighbor_rank]),
3377 (
int)(machine_coords[1][neighbor_rank]),
3378 (
int)(machine_coords[2][neighbor_rank]));
3380 if (differentnode && my_arrows.find(foo) == my_arrows.end()) {
3381 my_arrows.insert(foo);
3383 std::string gnuPlotArrow2 =
"";
3384 for (
int j = 0; j < machine_coord_dim; ++j) {
3385 if (j == machine_coord_dim - 1) {
3387 Teuchos::toString<float>(machine_coords[j][neighbor_rank]);
3391 Teuchos::toString<float>(machine_coords[j][neighbor_rank])
3395 ss += gnuPlotArrow + gnuPlotArrow2 +
" nohead\n";
3400 std::ofstream procFile(
"procPlot.plot", std::ofstream::out);
3401 procFile << procs <<
"\n";
3405 if (machine_coord_dim == 2) {
3406 gnuPlotCode <<
"plot \"procPlot.plot\" with points pointsize 3\n";
3409 gnuPlotCode <<
"splot \"procPlot.plot\" with points pointsize 3\n";
3412 gnuPlotCode << ss <<
"\nreplot\n pause -1\npause -1";
3413 gnuPlotCode.close();