diff --git a/.gitignore b/.gitignore index d197ecf..bd7ac3b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ .emacs + doc/doxygen/ extern lib *.o *.d +a.out +*~ +config.mk diff --git a/atrip.org b/atrip.org index 0f68b9d..a61a33d 100644 --- a/atrip.org +++ b/atrip.org @@ -1400,6 +1400,16 @@ as well as their distribution to nodes and cores. #include #include +// TODO: remove some +#include +#include +#include +#include +#include +#include +#include +#include + #include #include @@ -1491,6 +1501,498 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) { #+end_src *** Group and sort list + +**** Prolog :noexport: +#+begin_src c++ :tangle (atrip-tuples-h) +namespace group_and_sort { +#+end_src + +**** Node information + +- nodeList :: + List of hostnames of size \( N_n \) +- nodeInfos :: + List of (hostname, local rank Id) + of size \( N_p \), i.e., size of ranks + where local rank id goes from 0 to 48. + + + +=getNodeNames= gets the names of the nodes used, +i.e., the size of the resulting vector gives the +number of nodes. +#+begin_src c++ :tangle (atrip-tuples-h) +std::vector getNodeNames(MPI_Comm comm){ + int rank, np; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &np); + + std::vector nodeList(np); + char nodeName[MPI_MAX_PROCESSOR_NAME] + , nodeNames[np*MPI_MAX_PROCESSOR_NAME] + ; + std::vector nameLengths(np) + , off(np) + ; + int nameLength; + MPI_Get_processor_name(nodeName, &nameLength); + MPI_Allgather(&nameLength, + 1, + MPI_INT, + nameLengths.data(), + 1, + MPI_INT, + comm); + for (int i(1); i < np; i++) + off[i] = off[i-1] + nameLengths[i-1]; + MPI_Allgatherv(nodeName, + nameLengths[rank], + MPI_BYTE, + nodeNames, + nameLengths.data(), + off.data(), + MPI_BYTE, + comm); + for (int i(0); i < np; i++) { + std::string const s(&nodeNames[off[i]], nameLengths[i]); + nodeList[i] = s; + } + return nodeList; +} +#+end_src + +=getNodeInfos= +#+begin_src c++ :tangle (atrip-tuples-h) +struct RankInfo { + const std::string name; + const size_t nodeId; + const size_t globalRank; + const size_t localRank; + const size_t ranksPerNode; +}; + +std::vector +getNodeInfos(std::vector const& nodeNames) { + std::vector result; + auto uniqueNames = nodeNames; + { + std::sort(uniqueNames.begin(), uniqueNames.end()); + auto const& last = std::unique(uniqueNames.begin(), uniqueNames.end()); + uniqueNames.erase(last, uniqueNames.end()); + } + const auto index = [&uniqueNames](std::string const& s) { + auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s); + return std::distance(uniqueNames.begin(), it); + }; + std::vector localRanks(uniqueNames.size(), 0); + size_t rank = 0; + for (auto const& name: nodeNames) { + const size_t nodeId = index(name); + result.push_back({name, + nodeId, + rank++, + localRanks[nodeId]++, + std::count(nodeNames.begin(), + nodeNames.end(), + name) + }); + } + return result; +} +#+end_src + +**** Utils + +#+begin_src c++ :tangle (atrip-tuples-h) +// Provides the node on which the slice-element is found +// Right now we distribute the slices in a round robin fashion +// over the different nodes (NOTE: not mpi ranks but nodes) +size_t isOnNode(size_t tuple, size_t nodes) { return tuple % nodes; } + + +struct Info { + size_t nNodes; + size_t Nv; + size_t np; + size_t nodeId; +}; + + +// return the node (or all nodes) where the elements of this +// tuple are located +std::vector getTupleNodes(ABCTuple t, size_t nNodes) { + std::vector result; + ABCTuple nTuple = { isOnNode(t[0], nNodes) + , isOnNode(t[1], nNodes) + , isOnNode(t[2], nNodes) + }; + std::sort(nTuple.begin(), nTuple.end()); + ABCTuple::iterator it = std::unique(nTuple.begin(), nTuple.end()); + result.resize(it - nTuple.begin()); + std::copy(nTuple.begin(), it, result.begin()); + return result; +} +#+end_src + +**** Distribution +wording: home element = element which is located on the given node +1. we distribute the tuples such that each tuple has at least one 'home element' +2. we sort each tuple in a way that the 'home element' are the fastest indices +3. we sort the list of tuples on every node +4. we resort the tuples that for every tuple abc the following holds: a +specialDistribution(Info info, std::vector const& allTuples) { + + std::vector nodeTuples; + size_t nNodes(info.nNodes); + size_t np(info.np); + size_t N(allTuples.size()); + size_t tuplePerNode( ceil( ((double)N) / nNodes) ); + + // nodeid tuple list + std::map > container1d; + std::map > container2d; + std::map > container3d; + + // build container-n-d's + for (auto t: allTuples) { + // one which node(s) are the tuple elements located... + // put them into the right container + auto nt = getTupleNodes(t, nNodes); + if ( nt.size() == 1) container1d[nt[0]].push_back(t); + if ( nt.size() == 2) container2d[nt[0] + nNodes*nt[1]].push_back(t); + if ( nt.size() == 3) + container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t); + } + + // DISTRIBUTE 1-d containers + // every tuple which is only located at one node belongs to this node + { + auto const& tuplesVec = container1d[info.nodeId]; + nodeTuples.resize(tuplesVec.size()); + std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin()); + } + + // DISTRIBUTE 2-d containers + //the tuples which are located at two nodes are half/half given to these nodes + for (auto &m: container2d) { + size_t idx = m.first%nNodes; + size_t idy = m.first/nNodes; + size_t myNode = idx; + + // either idx or idy is my node + if (idx != info.nodeId && idy != info.nodeId) continue; + if (idy == info.nodeId) myNode = idy; + + auto tuplesVec = m.second; + auto n = tuplesVec.size() / 2; + auto size = nodeTuples.size(); + if (myNode == idx) { + nodeTuples.resize(size + n); + std::copy(tuplesVec.begin(), + tuplesVec.begin() + n, + nodeTuples.begin() + size); + } else { + auto ny = tuplesVec.size() - n; + nodeTuples.resize(size + ny); + std::copy(tuplesVec.begin() + n, + tuplesVec.end(), + nodeTuples.begin() + size); + } + + } + + // DISTRIBUTE 3-d containers + // similar game for the tuples which belong to three different nodes + for (auto m: container3d){ + auto tuplesVec = m.second; + auto idx = m.first%nNodes; + auto idy = (m.first/nNodes)%nNodes; + auto idz = m.first/nNodes/nNodes; + if (idx != info.nodeId && idy != info.nodeId && idz != info.nodeId) continue; + + size_t nx = tuplesVec.size() / 3; + size_t n, nbegin, nend; + if (info.nodeId == idx) { + n = nx; + nbegin = 0; + nend = n; + } else if (info.nodeId == idy) { + n = nx; + nbegin = n; + nend = n + n; + } else { + n = tuplesVec.size() - 2 * nx; + nbegin = 2 * nx; + nend = 2 * nx + n; + } + + auto size = nodeTuples.size(); + nodeTuples.resize(size + n); + std::copy(tuplesVec.begin() + nbegin, + tuplesVec.begin() + nend, + nodeTuples.begin() + size); + + } + + + // sort part of group-and-sort algorithm + // every tuple on a given node is sorted in a way that + // the 'home elements' are the fastest index. + // 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn + size_t n = info.nodeId; + for (auto &nt: nodeTuples){ + if ( isOnNode(nt[0], nNodes) == n ){ // 1234 + if ( isOnNode(nt[2], nNodes) != n ){ // 24 + size_t x(nt[0]); nt[0] = nt[2]; nt[2] = x; // switch first and last + } + else if ( isOnNode(nt[1], nNodes) != n){ // 3 + size_t x(nt[0]); nt[0] = nt[1]; nt[1] = x; // switch first two + } + } else { + if ( isOnNode(nt[1], nNodes) == n // 56 + && isOnNode(nt[2], nNodes) != n){ // 6 + size_t x(nt[1]); nt[1] = nt[2]; nt[2] = x; // switch last two + } + } + } + //now we sort the list of tuples + std::sort(nodeTuples.begin(), nodeTuples.end()); + // we bring the tuples abc back in the order a fetchElement(ABCTuple cur, ABCTuple suc){ + std::vector result; + ABCTuple inter; + std::sort(cur.begin(), cur.end()); + std::sort(suc.begin(), suc.end()); + std::array::iterator rit, cit, sit; + cit = std::unique(cur.begin(), cur.end()); + sit = std::unique(suc.begin(), suc.end()); + rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin()); + result.resize(rit - inter.begin()); + std::copy(inter.begin(), rit, result.begin()); + return result; +} +#+end_src + +**** Main + +The main routine should return the list of tuples to be handled by the current rank. + +Let \( N_p \) be the number of ranks or processes. +Let \( N_n \) be the number of nodes or sockets. + +Then we have the following + +#+begin_example +Global rank | 0 1 2 3 4 5 6 7 8 +nodeId | 0 1 0 1 1 0 2 2 2 +Local rank | 0 0 1 1 2 2 0 1 2 +intra color | 0 1 0 1 1 0 2 2 2 +key | global rank +#+end_example + + + + + +#+begin_src c++ :tangle (atrip-tuples-h) +std::vector main(MPI_Comm universe, size_t Nv) { + + int rank, np; + MPI_Comm_rank(universe, &rank); + MPI_Comm_size(universe, &np); + + std::vector result; + + const auto nodeNames(getNodeNames(universe)); + auto nodeNamesUnique(nodeNames); + { + const auto& last = std::unique(nodeNamesUnique.begin(), + nodeNamesUnique.end()); + nodeNamesUnique.erase(last, nodeNamesUnique.end()); + } + // we pick one rank from every node + auto const nodeInfos = getNodeInfos(nodeNames); + size_t const nNodes = nodeNamesUnique.size(); + + // We want to construct a communicator which only contains of one + // element per node + bool makeDistribution + = nodeInfos[rank].localRank == 0 + ? true + : false + ; + + std::vector + nodeTuples = makeDistribution + ? specialDistribution(Info{ nNodes + , Nv + , np + , nodeInfos[rank].nodeId + }, + getTuplesList(Nv)) + : std::vector() + ; + + + // now we have to send the data from **one** rank on each node + // to all others ranks of this node + const + int color = nodeInfos[rank].nodeId + , key = nodeInfos[rank].localRank + ; + + + MPI_Comm INTRA_COMM; + MPI_Comm_split(universe, color, key, &INTRA_COMM); +#+end_src + +Every node has to distribute **at least** +nodeTuples.size() / nodeInfos[rank].ranksPerNode +tuples among the ranks. + +We have to communicate this quantity among all nodes. + +#+begin_src c++ :tangle (atrip-tuples-h) + + const size_t + tuplesPerRankLocal + = nodeTuples.size() / nodeInfos[rank].ranksPerNode + + size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0) + ; + + size_t tuplesPerRankGlobal; + + MPI_Reduce(&tuplesPerRankLocal, + &tuplesPerRankGlobal, + 1, + MPI_UINT64_T, + MPI_MAX, + 0, + universe); + + MPI_Bcast(&tuplesPerRankGlobal, + 1, + MPI_UINT64_T, + 0, + universe); +#+end_src + +Now we have the tuples that every rank has to have, i.e., +=tuplesPerRankGlobal=. + +However before this, +the tuples in =nodeTuples= now have to be sent from the local rank +in every node to all the ranks in the given node, +and we have to make sure that every rank inside a given node +gets the same amount of tuples, in this case it should be +=tuplesPerRankLocal=, and in our node the total number +of tuples should be =tuplesPerRankLocal * nodeInfos[rank].ranksPerNode=, +however this might not be the case up to now due to divisibility issues. + +Up to now we have exactly =nodeTuples.size()= tuples, we have to make sure by +resizing that the condition above is met, i.e., so we can resize +and add some fake tuples at the end as padding. + +#+begin_src c++ :tangle (atrip-tuples-h) +size_t const totalTuplesLocal + = tuplesPerRankLocal + ,* nodeInfos[rank].ranksPerNode; + +if (makeDistribution) + nodeTuples.insert(nodeTuples.end(), + totalTuplesLocal - nodeTuples.size(), + FAKE_TUPLE); +#+end_src + +The next step is sending the tuples in the local root rank +to the other ranks in the node, this we do with the MPI function +=MPI_Scatterv=. +Every rank gets =tuplesPerRankLocal= tuples and +the =nodeTuples= vector is now homogeneous and divisible by the number +of ranks per node in our node. +Therefore, the =displacements= are simply the vector +\begin{equation*} + \left\{ + k * \mathrm{tuplesPerNodeLocal} + \mid + k \in + \left\{ 0 + , \ldots + , \#\text{ranks in node} - 1 + \right\} + \right\} +\end{equation*} + +and the =sendCounts= vector is simply the constant vector +=tuplesPerRankLocal= of size =ranksPerNode=. + +#+begin_src c++ :tangle (atrip-tuples-h) +{ + std::vector const + sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal); + + std::vector + displacements(nodeInfos[rank].ranksPerNode); + + std::iota(displacements.begin(), + displacements.end(), + tuplesPerRankLocal); + + // important! + result.resize(tuplesPerRankLocal); + + // construct mpi type for abctuple + MPI_Datatype MPI_ABCTUPLE; + MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE); + MPI_Type_commit(&MPI_ABCTUPLE); + + MPI_Scatterv(nodeTuples.data(), + sendCounts.data(), + displacements.data(), + MPI_ABCTUPLE, + result.data(), + tuplesPerRankLocal, + MPI_ABCTUPLE, + 0, + INTRA_COMM); + + // free type + MPI_Type_free(&MPI_ABCTUPLE); + +} +#+end_src + +and now we have to make sure that the size of the result +is the same with every rank in the universe communicator, +inserting fake tuples where needed + +#+begin_src c++ :tangle (atrip-tuples-h) + + result.insert(result.end(), + tuplesPerRankGlobal - result.size(), + FAKE_TUPLE); + + return result; + +} +#+end_src + + +**** Epilog :noexport: +#+begin_src c++ :tangle (atrip-tuples-h) +} +#+end_src + + *** Epilog :noexport: #+begin_src c++ :tangle (atrip-tuples-h) } diff --git a/include/atrip/Tuples.hpp b/include/atrip/Tuples.hpp index dc1297d..980ecc4 100644 --- a/include/atrip/Tuples.hpp +++ b/include/atrip/Tuples.hpp @@ -5,6 +5,16 @@ #include #include +// TODO: remove some +#include +#include +#include +#include +#include +#include +#include +#include + #include #include @@ -68,6 +78,399 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) { } // Naive list:2 ends here +// [[file:../../atrip.org::*Prolog][Prolog:1]] +namespace group_and_sort { +// Prolog:1 ends here + +// [[file:../../atrip.org::*Node information][Node information:1]] +std::vector getNodeNames(MPI_Comm comm){ + int rank, np; + MPI_Comm_rank(comm, &rank); + MPI_Comm_size(comm, &np); + + std::vector nodeList(np); + char nodeName[MPI_MAX_PROCESSOR_NAME] + , nodeNames[np*MPI_MAX_PROCESSOR_NAME] + ; + std::vector nameLengths(np) + , off(np) + ; + int nameLength; + MPI_Get_processor_name(nodeName, &nameLength); + MPI_Allgather(&nameLength, + 1, + MPI_INT, + nameLengths.data(), + 1, + MPI_INT, + comm); + for (int i(1); i < np; i++) + off[i] = off[i-1] + nameLengths[i-1]; + MPI_Allgatherv(nodeName, + nameLengths[rank], + MPI_BYTE, + nodeNames, + nameLengths.data(), + off.data(), + MPI_BYTE, + comm); + for (int i(0); i < np; i++) { + std::string const s(&nodeNames[off[i]], nameLengths[i]); + nodeList[i] = s; + } + return nodeList; +} +// Node information:1 ends here + +// [[file:../../atrip.org::*Node information][Node information:2]] +struct RankInfo { + const std::string name; + const size_t nodeId; + const size_t globalRank; + const size_t localRank; + const size_t ranksPerNode; +}; + +std::vector +getNodeInfos(std::vector const& nodeNames) { + std::vector result; + auto uniqueNames = nodeNames; + { + std::sort(uniqueNames.begin(), uniqueNames.end()); + auto const& last = std::unique(uniqueNames.begin(), uniqueNames.end()); + uniqueNames.erase(last, uniqueNames.end()); + } + const auto index = [&uniqueNames](std::string const& s) { + auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s); + return std::distance(uniqueNames.begin(), it); + }; + std::vector localRanks(uniqueNames.size(), 0); + size_t rank = 0; + for (auto const& name: nodeNames) { + const size_t nodeId = index(name); + result.push_back({name, + nodeId, + rank++, + localRanks[nodeId]++, + std::count(nodeNames.begin(), + nodeNames.end(), + name) + }); + } + return result; +} +// Node information:2 ends here + +// [[file:../../atrip.org::*Utils][Utils:1]] +// Provides the node on which the slice-element is found +// Right now we distribute the slices in a round robin fashion +// over the different nodes (NOTE: not mpi ranks but nodes) +size_t isOnNode(size_t tuple, size_t nodes) { return tuple % nodes; } + + +struct Info { + size_t nNodes; + size_t Nv; + size_t np; + size_t nodeId; +}; + + +// return the node (or all nodes) where the elements of this +// tuple are located +std::vector getTupleNodes(ABCTuple t, size_t nNodes) { + std::vector result; + ABCTuple nTuple = { isOnNode(t[0], nNodes) + , isOnNode(t[1], nNodes) + , isOnNode(t[2], nNodes) + }; + std::sort(nTuple.begin(), nTuple.end()); + ABCTuple::iterator it = std::unique(nTuple.begin(), nTuple.end()); + result.resize(it - nTuple.begin()); + std::copy(nTuple.begin(), it, result.begin()); + return result; +} +// Utils:1 ends here + +// [[file:../../atrip.org::*Distribution][Distribution:1]] +std::vector +specialDistribution(Info info, std::vector const& allTuples) { + + std::vector nodeTuples; + size_t nNodes(info.nNodes); + size_t np(info.np); + size_t N(allTuples.size()); + size_t tuplePerNode( ceil( ((double)N) / nNodes) ); + + // nodeid tuple list + std::map > container1d; + std::map > container2d; + std::map > container3d; + + // build container-n-d's + for (auto t: allTuples) { + // one which node(s) are the tuple elements located... + // put them into the right container + auto nt = getTupleNodes(t, nNodes); + if ( nt.size() == 1) container1d[nt[0]].push_back(t); + if ( nt.size() == 2) container2d[nt[0] + nNodes*nt[1]].push_back(t); + if ( nt.size() == 3) + container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t); + } + + // DISTRIBUTE 1-d containers + // every tuple which is only located at one node belongs to this node + { + auto const& tuplesVec = container1d[info.nodeId]; + nodeTuples.resize(tuplesVec.size()); + std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin()); + } + + // DISTRIBUTE 2-d containers + //the tuples which are located at two nodes are half/half given to these nodes + for (auto &m: container2d) { + size_t idx = m.first%nNodes; + size_t idy = m.first/nNodes; + size_t myNode = idx; + + // either idx or idy is my node + if (idx != info.nodeId && idy != info.nodeId) continue; + if (idy == info.nodeId) myNode = idy; + + auto tuplesVec = m.second; + auto n = tuplesVec.size() / 2; + auto size = nodeTuples.size(); + if (myNode == idx) { + nodeTuples.resize(size + n); + std::copy(tuplesVec.begin(), + tuplesVec.begin() + n, + nodeTuples.begin() + size); + } else { + auto ny = tuplesVec.size() - n; + nodeTuples.resize(size + ny); + std::copy(tuplesVec.begin() + n, + tuplesVec.end(), + nodeTuples.begin() + size); + } + + } + + // DISTRIBUTE 3-d containers + // similar game for the tuples which belong to three different nodes + for (auto m: container3d){ + auto tuplesVec = m.second; + auto idx = m.first%nNodes; + auto idy = (m.first/nNodes)%nNodes; + auto idz = m.first/nNodes/nNodes; + if (idx != info.nodeId && idy != info.nodeId && idz != info.nodeId) continue; + + size_t nx = tuplesVec.size() / 3; + size_t n, nbegin, nend; + if (info.nodeId == idx) { + n = nx; + nbegin = 0; + nend = n; + } else if (info.nodeId == idy) { + n = nx; + nbegin = n; + nend = n + n; + } else { + n = tuplesVec.size() - 2 * nx; + nbegin = 2 * nx; + nend = 2 * nx + n; + } + + auto size = nodeTuples.size(); + nodeTuples.resize(size + n); + std::copy(tuplesVec.begin() + nbegin, + tuplesVec.begin() + nend, + nodeTuples.begin() + size); + + } + + + // sort part of group-and-sort algorithm + // every tuple on a given node is sorted in a way that + // the 'home elements' are the fastest index. + // 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn + size_t n = info.nodeId; + for (auto &nt: nodeTuples){ + if ( isOnNode(nt[0], nNodes) == n ){ // 1234 + if ( isOnNode(nt[2], nNodes) != n ){ // 24 + size_t x(nt[0]); nt[0] = nt[2]; nt[2] = x; // switch first and last + } + else if ( isOnNode(nt[1], nNodes) != n){ // 3 + size_t x(nt[0]); nt[0] = nt[1]; nt[1] = x; // switch first two + } + } else { + if ( isOnNode(nt[1], nNodes) == n // 56 + && isOnNode(nt[2], nNodes) != n){ // 6 + size_t x(nt[1]); nt[1] = nt[2]; nt[2] = x; // switch last two + } + } + } + //now we sort the list of tuples + std::sort(nodeTuples.begin(), nodeTuples.end()); + // we bring the tuples abc back in the order a fetchElement(ABCTuple cur, ABCTuple suc){ + std::vector result; + ABCTuple inter; + std::sort(cur.begin(), cur.end()); + std::sort(suc.begin(), suc.end()); + std::array::iterator rit, cit, sit; + cit = std::unique(cur.begin(), cur.end()); + sit = std::unique(suc.begin(), suc.end()); + rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin()); + result.resize(rit - inter.begin()); + std::copy(inter.begin(), rit, result.begin()); + return result; +} +// Distribution:1 ends here + +// [[file:../../atrip.org::*Main][Main:1]] +std::vector main(MPI_Comm universe, size_t Nv) { + + int rank, np; + MPI_Comm_rank(universe, &rank); + MPI_Comm_size(universe, &np); + + std::vector result; + + const auto nodeNames(getNodeNames(universe)); + auto nodeNamesUnique(nodeNames); + { + const auto& last = std::unique(nodeNamesUnique.begin(), + nodeNamesUnique.end()); + nodeNamesUnique.erase(last, nodeNamesUnique.end()); + } + // we pick one rank from every node + auto const nodeInfos = getNodeInfos(nodeNames); + size_t const nNodes = nodeNamesUnique.size(); + + // We want to construct a communicator which only contains of one + // element per node + bool makeDistribution + = nodeInfos[rank].localRank == 0 + ? true + : false + ; + + std::vector + nodeTuples = makeDistribution + ? specialDistribution(Info{ nNodes + , Nv + , np + , nodeInfos[rank].nodeId + }, + getTuplesList(Nv)) + : std::vector() + ; + + + // now we have to send the data from **one** rank on each node + // to all others ranks of this node + const + int color = nodeInfos[rank].nodeId + , key = nodeInfos[rank].localRank + ; + + + MPI_Comm INTRA_COMM; + MPI_Comm_split(universe, color, key, &INTRA_COMM); +// Main:1 ends here + +// [[file:../../atrip.org::*Main][Main:2]] +const size_t + tuplesPerRankLocal + = nodeTuples.size() / nodeInfos[rank].ranksPerNode + + size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0) + ; + +size_t tuplesPerRankGlobal; + +MPI_Reduce(&tuplesPerRankLocal, + &tuplesPerRankGlobal, + 1, + MPI_UINT64_T, + MPI_MAX, + 0, + universe); + +MPI_Bcast(&tuplesPerRankGlobal, + 1, + MPI_UINT64_T, + 0, + universe); +// Main:2 ends here + +// [[file:../../atrip.org::*Main][Main:3]] +size_t const totalTuplesLocal + = tuplesPerRankLocal + * nodeInfos[rank].ranksPerNode; + +if (makeDistribution) + nodeTuples.insert(nodeTuples.end(), + totalTuplesLocal - nodeTuples.size(), + FAKE_TUPLE); +// Main:3 ends here + +// [[file:../../atrip.org::*Main][Main:4]] +{ + std::vector const + sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal); + + std::vector + displacements(nodeInfos[rank].ranksPerNode); + + std::iota(displacements.begin(), + displacements.end(), + tuplesPerRankLocal); + + // important! + result.resize(tuplesPerRankLocal); + + // construct mpi type for abctuple + MPI_Datatype MPI_ABCTUPLE; + MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE); + MPI_Type_commit(&MPI_ABCTUPLE); + + MPI_Scatterv(nodeTuples.data(), + sendCounts.data(), + displacements.data(), + MPI_ABCTUPLE, + result.data(), + tuplesPerRankLocal, + MPI_ABCTUPLE, + 0, + INTRA_COMM); + + // free type + MPI_Type_free(&MPI_ABCTUPLE); + +} +// Main:4 ends here + +// [[file:../../atrip.org::*Main][Main:5]] +result.insert(result.end(), + tuplesPerRankGlobal - result.size(), + FAKE_TUPLE); + + return result; + +} +// Main:5 ends here + +// [[file:../../atrip.org::*Epilog][Epilog:1]] +} +// Epilog:1 ends here + // [[file:../../atrip.org::*Epilog][Epilog:1]] } // Epilog:1 ends here