Add group and sort algorithm, it compiles at least

This commit is contained in:
Alejandro Gallo 2021-10-15 19:18:40 +02:00
parent 944e93dc33
commit 65f804e637
3 changed files with 909 additions and 0 deletions

4
.gitignore vendored
View File

@ -1,6 +1,10 @@
.emacs
doc/doxygen/
extern
lib
*.o
*.d
a.out
*~
config.mk

502
atrip.org
View File

@ -1400,6 +1400,16 @@ as well as their distribution to nodes and cores.
#include <array>
#include <numeric>
// TODO: remove some
#include <stdio.h>
#include <math.h>
#include <algorithm>
#include <map>
#include <cassert>
#include <chrono>
#include <climits>
#include <mpi.h>
#include <atrip/Utils.hpp>
#include <atrip/Debug.hpp>
@ -1491,6 +1501,498 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
#+end_src
*** Group and sort list
**** Prolog :noexport:
#+begin_src c++ :tangle (atrip-tuples-h)
namespace group_and_sort {
#+end_src
**** Node information
- nodeList ::
List of hostnames of size \( N_n \)
- nodeInfos ::
List of (hostname, local rank Id)
of size \( N_p \), i.e., size of ranks
where local rank id goes from 0 to 48.
=getNodeNames= gets the names of the nodes used,
i.e., the size of the resulting vector gives the
number of nodes.
#+begin_src c++ :tangle (atrip-tuples-h)
std::vector<std::string> getNodeNames(MPI_Comm comm){
int rank, np;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &np);
std::vector<std::string> nodeList(np);
char nodeName[MPI_MAX_PROCESSOR_NAME]
, nodeNames[np*MPI_MAX_PROCESSOR_NAME]
;
std::vector<int> nameLengths(np)
, off(np)
;
int nameLength;
MPI_Get_processor_name(nodeName, &nameLength);
MPI_Allgather(&nameLength,
1,
MPI_INT,
nameLengths.data(),
1,
MPI_INT,
comm);
for (int i(1); i < np; i++)
off[i] = off[i-1] + nameLengths[i-1];
MPI_Allgatherv(nodeName,
nameLengths[rank],
MPI_BYTE,
nodeNames,
nameLengths.data(),
off.data(),
MPI_BYTE,
comm);
for (int i(0); i < np; i++) {
std::string const s(&nodeNames[off[i]], nameLengths[i]);
nodeList[i] = s;
}
return nodeList;
}
#+end_src
=getNodeInfos=
#+begin_src c++ :tangle (atrip-tuples-h)
struct RankInfo {
const std::string name;
const size_t nodeId;
const size_t globalRank;
const size_t localRank;
const size_t ranksPerNode;
};
std::vector<RankInfo>
getNodeInfos(std::vector<string> const& nodeNames) {
std::vector<RankInfo> result;
auto uniqueNames = nodeNames;
{
std::sort(uniqueNames.begin(), uniqueNames.end());
auto const& last = std::unique(uniqueNames.begin(), uniqueNames.end());
uniqueNames.erase(last, uniqueNames.end());
}
const auto index = [&uniqueNames](std::string const& s) {
auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s);
return std::distance(uniqueNames.begin(), it);
};
std::vector<size_t> localRanks(uniqueNames.size(), 0);
size_t rank = 0;
for (auto const& name: nodeNames) {
const size_t nodeId = index(name);
result.push_back({name,
nodeId,
rank++,
localRanks[nodeId]++,
std::count(nodeNames.begin(),
nodeNames.end(),
name)
});
}
return result;
}
#+end_src
**** Utils
#+begin_src c++ :tangle (atrip-tuples-h)
// Provides the node on which the slice-element is found
// Right now we distribute the slices in a round robin fashion
// over the different nodes (NOTE: not mpi ranks but nodes)
size_t isOnNode(size_t tuple, size_t nodes) { return tuple % nodes; }
struct Info {
size_t nNodes;
size_t Nv;
size_t np;
size_t nodeId;
};
// return the node (or all nodes) where the elements of this
// tuple are located
std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
std::vector<size_t> result;
ABCTuple nTuple = { isOnNode(t[0], nNodes)
, isOnNode(t[1], nNodes)
, isOnNode(t[2], nNodes)
};
std::sort(nTuple.begin(), nTuple.end());
ABCTuple::iterator it = std::unique(nTuple.begin(), nTuple.end());
result.resize(it - nTuple.begin());
std::copy(nTuple.begin(), it, result.begin());
return result;
}
#+end_src
**** Distribution
wording: home element = element which is located on the given node
1. we distribute the tuples such that each tuple has at least one 'home element'
2. we sort each tuple in a way that the 'home element' are the fastest indices
3. we sort the list of tuples on every node
4. we resort the tuples that for every tuple abc the following holds: a<b<c
#+begin_src c++ :tangle (atrip-tuples-h)
std::vector<ABCTuple>
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
std::vector<ABCTuple> nodeTuples;
size_t nNodes(info.nNodes);
size_t np(info.np);
size_t N(allTuples.size());
size_t tuplePerNode( ceil( ((double)N) / nNodes) );
// nodeid tuple list
std::map<size_t, std::vector<ABCTuple> > container1d;
std::map<size_t, std::vector<ABCTuple> > container2d;
std::map<size_t, std::vector<ABCTuple> > container3d;
// build container-n-d's
for (auto t: allTuples) {
// one which node(s) are the tuple elements located...
// put them into the right container
auto nt = getTupleNodes(t, nNodes);
if ( nt.size() == 1) container1d[nt[0]].push_back(t);
if ( nt.size() == 2) container2d[nt[0] + nNodes*nt[1]].push_back(t);
if ( nt.size() == 3)
container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
}
// DISTRIBUTE 1-d containers
// every tuple which is only located at one node belongs to this node
{
auto const& tuplesVec = container1d[info.nodeId];
nodeTuples.resize(tuplesVec.size());
std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
}
// DISTRIBUTE 2-d containers
//the tuples which are located at two nodes are half/half given to these nodes
for (auto &m: container2d) {
size_t idx = m.first%nNodes;
size_t idy = m.first/nNodes;
size_t myNode = idx;
// either idx or idy is my node
if (idx != info.nodeId && idy != info.nodeId) continue;
if (idy == info.nodeId) myNode = idy;
auto tuplesVec = m.second;
auto n = tuplesVec.size() / 2;
auto size = nodeTuples.size();
if (myNode == idx) {
nodeTuples.resize(size + n);
std::copy(tuplesVec.begin(),
tuplesVec.begin() + n,
nodeTuples.begin() + size);
} else {
auto ny = tuplesVec.size() - n;
nodeTuples.resize(size + ny);
std::copy(tuplesVec.begin() + n,
tuplesVec.end(),
nodeTuples.begin() + size);
}
}
// DISTRIBUTE 3-d containers
// similar game for the tuples which belong to three different nodes
for (auto m: container3d){
auto tuplesVec = m.second;
auto idx = m.first%nNodes;
auto idy = (m.first/nNodes)%nNodes;
auto idz = m.first/nNodes/nNodes;
if (idx != info.nodeId && idy != info.nodeId && idz != info.nodeId) continue;
size_t nx = tuplesVec.size() / 3;
size_t n, nbegin, nend;
if (info.nodeId == idx) {
n = nx;
nbegin = 0;
nend = n;
} else if (info.nodeId == idy) {
n = nx;
nbegin = n;
nend = n + n;
} else {
n = tuplesVec.size() - 2 * nx;
nbegin = 2 * nx;
nend = 2 * nx + n;
}
auto size = nodeTuples.size();
nodeTuples.resize(size + n);
std::copy(tuplesVec.begin() + nbegin,
tuplesVec.begin() + nend,
nodeTuples.begin() + size);
}
// sort part of group-and-sort algorithm
// every tuple on a given node is sorted in a way that
// the 'home elements' are the fastest index.
// 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn
size_t n = info.nodeId;
for (auto &nt: nodeTuples){
if ( isOnNode(nt[0], nNodes) == n ){ // 1234
if ( isOnNode(nt[2], nNodes) != n ){ // 24
size_t x(nt[0]); nt[0] = nt[2]; nt[2] = x; // switch first and last
}
else if ( isOnNode(nt[1], nNodes) != n){ // 3
size_t x(nt[0]); nt[0] = nt[1]; nt[1] = x; // switch first two
}
} else {
if ( isOnNode(nt[1], nNodes) == n // 56
&& isOnNode(nt[2], nNodes) != n){ // 6
size_t x(nt[1]); nt[1] = nt[2]; nt[2] = x; // switch last two
}
}
}
//now we sort the list of tuples
std::sort(nodeTuples.begin(), nodeTuples.end());
// we bring the tuples abc back in the order a<b<c
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
return nodeTuples;
}
//determine which element has to be fetched from sources for the next iteration
std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
std::vector<size_t> result;
ABCTuple inter;
std::sort(cur.begin(), cur.end());
std::sort(suc.begin(), suc.end());
std::array<size_t,3>::iterator rit, cit, sit;
cit = std::unique(cur.begin(), cur.end());
sit = std::unique(suc.begin(), suc.end());
rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
result.resize(rit - inter.begin());
std::copy(inter.begin(), rit, result.begin());
return result;
}
#+end_src
**** Main
The main routine should return the list of tuples to be handled by the current rank.
Let \( N_p \) be the number of ranks or processes.
Let \( N_n \) be the number of nodes or sockets.
Then we have the following
#+begin_example
Global rank | 0 1 2 3 4 5 6 7 8
nodeId | 0 1 0 1 1 0 2 2 2
Local rank | 0 0 1 1 2 2 0 1 2
intra color | 0 1 0 1 1 0 2 2 2
key | global rank
#+end_example
#+begin_src c++ :tangle (atrip-tuples-h)
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
int rank, np;
MPI_Comm_rank(universe, &rank);
MPI_Comm_size(universe, &np);
std::vector<ABCTuple> result;
const auto nodeNames(getNodeNames(universe));
auto nodeNamesUnique(nodeNames);
{
const auto& last = std::unique(nodeNamesUnique.begin(),
nodeNamesUnique.end());
nodeNamesUnique.erase(last, nodeNamesUnique.end());
}
// we pick one rank from every node
auto const nodeInfos = getNodeInfos(nodeNames);
size_t const nNodes = nodeNamesUnique.size();
// We want to construct a communicator which only contains of one
// element per node
bool makeDistribution
= nodeInfos[rank].localRank == 0
? true
: false
;
std::vector<ABCTuple>
nodeTuples = makeDistribution
? specialDistribution(Info{ nNodes
, Nv
, np
, nodeInfos[rank].nodeId
},
getTuplesList(Nv))
: std::vector<ABCTuple>()
;
// now we have to send the data from **one** rank on each node
// to all others ranks of this node
const
int color = nodeInfos[rank].nodeId
, key = nodeInfos[rank].localRank
;
MPI_Comm INTRA_COMM;
MPI_Comm_split(universe, color, key, &INTRA_COMM);
#+end_src
Every node has to distribute **at least**
nodeTuples.size() / nodeInfos[rank].ranksPerNode
tuples among the ranks.
We have to communicate this quantity among all nodes.
#+begin_src c++ :tangle (atrip-tuples-h)
const size_t
tuplesPerRankLocal
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
+ size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
;
size_t tuplesPerRankGlobal;
MPI_Reduce(&tuplesPerRankLocal,
&tuplesPerRankGlobal,
1,
MPI_UINT64_T,
MPI_MAX,
0,
universe);
MPI_Bcast(&tuplesPerRankGlobal,
1,
MPI_UINT64_T,
0,
universe);
#+end_src
Now we have the tuples that every rank has to have, i.e.,
=tuplesPerRankGlobal=.
However before this,
the tuples in =nodeTuples= now have to be sent from the local rank
in every node to all the ranks in the given node,
and we have to make sure that every rank inside a given node
gets the same amount of tuples, in this case it should be
=tuplesPerRankLocal=, and in our node the total number
of tuples should be =tuplesPerRankLocal * nodeInfos[rank].ranksPerNode=,
however this might not be the case up to now due to divisibility issues.
Up to now we have exactly =nodeTuples.size()= tuples, we have to make sure by
resizing that the condition above is met, i.e., so we can resize
and add some fake tuples at the end as padding.
#+begin_src c++ :tangle (atrip-tuples-h)
size_t const totalTuplesLocal
= tuplesPerRankLocal
,* nodeInfos[rank].ranksPerNode;
if (makeDistribution)
nodeTuples.insert(nodeTuples.end(),
totalTuplesLocal - nodeTuples.size(),
FAKE_TUPLE);
#+end_src
The next step is sending the tuples in the local root rank
to the other ranks in the node, this we do with the MPI function
=MPI_Scatterv=.
Every rank gets =tuplesPerRankLocal= tuples and
the =nodeTuples= vector is now homogeneous and divisible by the number
of ranks per node in our node.
Therefore, the =displacements= are simply the vector
\begin{equation*}
\left\{
k * \mathrm{tuplesPerNodeLocal}
\mid
k \in
\left\{ 0
, \ldots
, \#\text{ranks in node} - 1
\right\}
\right\}
\end{equation*}
and the =sendCounts= vector is simply the constant vector
=tuplesPerRankLocal= of size =ranksPerNode=.
#+begin_src c++ :tangle (atrip-tuples-h)
{
std::vector<int> const
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
std::vector<int>
displacements(nodeInfos[rank].ranksPerNode);
std::iota(displacements.begin(),
displacements.end(),
tuplesPerRankLocal);
// important!
result.resize(tuplesPerRankLocal);
// construct mpi type for abctuple
MPI_Datatype MPI_ABCTUPLE;
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
MPI_Type_commit(&MPI_ABCTUPLE);
MPI_Scatterv(nodeTuples.data(),
sendCounts.data(),
displacements.data(),
MPI_ABCTUPLE,
result.data(),
tuplesPerRankLocal,
MPI_ABCTUPLE,
0,
INTRA_COMM);
// free type
MPI_Type_free(&MPI_ABCTUPLE);
}
#+end_src
and now we have to make sure that the size of the result
is the same with every rank in the universe communicator,
inserting fake tuples where needed
#+begin_src c++ :tangle (atrip-tuples-h)
result.insert(result.end(),
tuplesPerRankGlobal - result.size(),
FAKE_TUPLE);
return result;
}
#+end_src
**** Epilog :noexport:
#+begin_src c++ :tangle (atrip-tuples-h)
}
#+end_src
*** Epilog :noexport:
#+begin_src c++ :tangle (atrip-tuples-h)
}

View File

@ -5,6 +5,16 @@
#include <array>
#include <numeric>
// TODO: remove some
#include <stdio.h>
#include <math.h>
#include <algorithm>
#include <map>
#include <cassert>
#include <chrono>
#include <climits>
#include <mpi.h>
#include <atrip/Utils.hpp>
#include <atrip/Debug.hpp>
@ -68,6 +78,399 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
}
// Naive list:2 ends here
// [[file:../../atrip.org::*Prolog][Prolog:1]]
namespace group_and_sort {
// Prolog:1 ends here
// [[file:../../atrip.org::*Node information][Node information:1]]
std::vector<std::string> getNodeNames(MPI_Comm comm){
int rank, np;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &np);
std::vector<std::string> nodeList(np);
char nodeName[MPI_MAX_PROCESSOR_NAME]
, nodeNames[np*MPI_MAX_PROCESSOR_NAME]
;
std::vector<int> nameLengths(np)
, off(np)
;
int nameLength;
MPI_Get_processor_name(nodeName, &nameLength);
MPI_Allgather(&nameLength,
1,
MPI_INT,
nameLengths.data(),
1,
MPI_INT,
comm);
for (int i(1); i < np; i++)
off[i] = off[i-1] + nameLengths[i-1];
MPI_Allgatherv(nodeName,
nameLengths[rank],
MPI_BYTE,
nodeNames,
nameLengths.data(),
off.data(),
MPI_BYTE,
comm);
for (int i(0); i < np; i++) {
std::string const s(&nodeNames[off[i]], nameLengths[i]);
nodeList[i] = s;
}
return nodeList;
}
// Node information:1 ends here
// [[file:../../atrip.org::*Node information][Node information:2]]
struct RankInfo {
const std::string name;
const size_t nodeId;
const size_t globalRank;
const size_t localRank;
const size_t ranksPerNode;
};
std::vector<RankInfo>
getNodeInfos(std::vector<string> const& nodeNames) {
std::vector<RankInfo> result;
auto uniqueNames = nodeNames;
{
std::sort(uniqueNames.begin(), uniqueNames.end());
auto const& last = std::unique(uniqueNames.begin(), uniqueNames.end());
uniqueNames.erase(last, uniqueNames.end());
}
const auto index = [&uniqueNames](std::string const& s) {
auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s);
return std::distance(uniqueNames.begin(), it);
};
std::vector<size_t> localRanks(uniqueNames.size(), 0);
size_t rank = 0;
for (auto const& name: nodeNames) {
const size_t nodeId = index(name);
result.push_back({name,
nodeId,
rank++,
localRanks[nodeId]++,
std::count(nodeNames.begin(),
nodeNames.end(),
name)
});
}
return result;
}
// Node information:2 ends here
// [[file:../../atrip.org::*Utils][Utils:1]]
// Provides the node on which the slice-element is found
// Right now we distribute the slices in a round robin fashion
// over the different nodes (NOTE: not mpi ranks but nodes)
size_t isOnNode(size_t tuple, size_t nodes) { return tuple % nodes; }
struct Info {
size_t nNodes;
size_t Nv;
size_t np;
size_t nodeId;
};
// return the node (or all nodes) where the elements of this
// tuple are located
std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
std::vector<size_t> result;
ABCTuple nTuple = { isOnNode(t[0], nNodes)
, isOnNode(t[1], nNodes)
, isOnNode(t[2], nNodes)
};
std::sort(nTuple.begin(), nTuple.end());
ABCTuple::iterator it = std::unique(nTuple.begin(), nTuple.end());
result.resize(it - nTuple.begin());
std::copy(nTuple.begin(), it, result.begin());
return result;
}
// Utils:1 ends here
// [[file:../../atrip.org::*Distribution][Distribution:1]]
std::vector<ABCTuple>
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
std::vector<ABCTuple> nodeTuples;
size_t nNodes(info.nNodes);
size_t np(info.np);
size_t N(allTuples.size());
size_t tuplePerNode( ceil( ((double)N) / nNodes) );
// nodeid tuple list
std::map<size_t, std::vector<ABCTuple> > container1d;
std::map<size_t, std::vector<ABCTuple> > container2d;
std::map<size_t, std::vector<ABCTuple> > container3d;
// build container-n-d's
for (auto t: allTuples) {
// one which node(s) are the tuple elements located...
// put them into the right container
auto nt = getTupleNodes(t, nNodes);
if ( nt.size() == 1) container1d[nt[0]].push_back(t);
if ( nt.size() == 2) container2d[nt[0] + nNodes*nt[1]].push_back(t);
if ( nt.size() == 3)
container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
}
// DISTRIBUTE 1-d containers
// every tuple which is only located at one node belongs to this node
{
auto const& tuplesVec = container1d[info.nodeId];
nodeTuples.resize(tuplesVec.size());
std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
}
// DISTRIBUTE 2-d containers
//the tuples which are located at two nodes are half/half given to these nodes
for (auto &m: container2d) {
size_t idx = m.first%nNodes;
size_t idy = m.first/nNodes;
size_t myNode = idx;
// either idx or idy is my node
if (idx != info.nodeId && idy != info.nodeId) continue;
if (idy == info.nodeId) myNode = idy;
auto tuplesVec = m.second;
auto n = tuplesVec.size() / 2;
auto size = nodeTuples.size();
if (myNode == idx) {
nodeTuples.resize(size + n);
std::copy(tuplesVec.begin(),
tuplesVec.begin() + n,
nodeTuples.begin() + size);
} else {
auto ny = tuplesVec.size() - n;
nodeTuples.resize(size + ny);
std::copy(tuplesVec.begin() + n,
tuplesVec.end(),
nodeTuples.begin() + size);
}
}
// DISTRIBUTE 3-d containers
// similar game for the tuples which belong to three different nodes
for (auto m: container3d){
auto tuplesVec = m.second;
auto idx = m.first%nNodes;
auto idy = (m.first/nNodes)%nNodes;
auto idz = m.first/nNodes/nNodes;
if (idx != info.nodeId && idy != info.nodeId && idz != info.nodeId) continue;
size_t nx = tuplesVec.size() / 3;
size_t n, nbegin, nend;
if (info.nodeId == idx) {
n = nx;
nbegin = 0;
nend = n;
} else if (info.nodeId == idy) {
n = nx;
nbegin = n;
nend = n + n;
} else {
n = tuplesVec.size() - 2 * nx;
nbegin = 2 * nx;
nend = 2 * nx + n;
}
auto size = nodeTuples.size();
nodeTuples.resize(size + n);
std::copy(tuplesVec.begin() + nbegin,
tuplesVec.begin() + nend,
nodeTuples.begin() + size);
}
// sort part of group-and-sort algorithm
// every tuple on a given node is sorted in a way that
// the 'home elements' are the fastest index.
// 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn
size_t n = info.nodeId;
for (auto &nt: nodeTuples){
if ( isOnNode(nt[0], nNodes) == n ){ // 1234
if ( isOnNode(nt[2], nNodes) != n ){ // 24
size_t x(nt[0]); nt[0] = nt[2]; nt[2] = x; // switch first and last
}
else if ( isOnNode(nt[1], nNodes) != n){ // 3
size_t x(nt[0]); nt[0] = nt[1]; nt[1] = x; // switch first two
}
} else {
if ( isOnNode(nt[1], nNodes) == n // 56
&& isOnNode(nt[2], nNodes) != n){ // 6
size_t x(nt[1]); nt[1] = nt[2]; nt[2] = x; // switch last two
}
}
}
//now we sort the list of tuples
std::sort(nodeTuples.begin(), nodeTuples.end());
// we bring the tuples abc back in the order a<b<c
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
return nodeTuples;
}
//determine which element has to be fetched from sources for the next iteration
std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
std::vector<size_t> result;
ABCTuple inter;
std::sort(cur.begin(), cur.end());
std::sort(suc.begin(), suc.end());
std::array<size_t,3>::iterator rit, cit, sit;
cit = std::unique(cur.begin(), cur.end());
sit = std::unique(suc.begin(), suc.end());
rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
result.resize(rit - inter.begin());
std::copy(inter.begin(), rit, result.begin());
return result;
}
// Distribution:1 ends here
// [[file:../../atrip.org::*Main][Main:1]]
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
int rank, np;
MPI_Comm_rank(universe, &rank);
MPI_Comm_size(universe, &np);
std::vector<ABCTuple> result;
const auto nodeNames(getNodeNames(universe));
auto nodeNamesUnique(nodeNames);
{
const auto& last = std::unique(nodeNamesUnique.begin(),
nodeNamesUnique.end());
nodeNamesUnique.erase(last, nodeNamesUnique.end());
}
// we pick one rank from every node
auto const nodeInfos = getNodeInfos(nodeNames);
size_t const nNodes = nodeNamesUnique.size();
// We want to construct a communicator which only contains of one
// element per node
bool makeDistribution
= nodeInfos[rank].localRank == 0
? true
: false
;
std::vector<ABCTuple>
nodeTuples = makeDistribution
? specialDistribution(Info{ nNodes
, Nv
, np
, nodeInfos[rank].nodeId
},
getTuplesList(Nv))
: std::vector<ABCTuple>()
;
// now we have to send the data from **one** rank on each node
// to all others ranks of this node
const
int color = nodeInfos[rank].nodeId
, key = nodeInfos[rank].localRank
;
MPI_Comm INTRA_COMM;
MPI_Comm_split(universe, color, key, &INTRA_COMM);
// Main:1 ends here
// [[file:../../atrip.org::*Main][Main:2]]
const size_t
tuplesPerRankLocal
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
+ size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
;
size_t tuplesPerRankGlobal;
MPI_Reduce(&tuplesPerRankLocal,
&tuplesPerRankGlobal,
1,
MPI_UINT64_T,
MPI_MAX,
0,
universe);
MPI_Bcast(&tuplesPerRankGlobal,
1,
MPI_UINT64_T,
0,
universe);
// Main:2 ends here
// [[file:../../atrip.org::*Main][Main:3]]
size_t const totalTuplesLocal
= tuplesPerRankLocal
* nodeInfos[rank].ranksPerNode;
if (makeDistribution)
nodeTuples.insert(nodeTuples.end(),
totalTuplesLocal - nodeTuples.size(),
FAKE_TUPLE);
// Main:3 ends here
// [[file:../../atrip.org::*Main][Main:4]]
{
std::vector<int> const
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
std::vector<int>
displacements(nodeInfos[rank].ranksPerNode);
std::iota(displacements.begin(),
displacements.end(),
tuplesPerRankLocal);
// important!
result.resize(tuplesPerRankLocal);
// construct mpi type for abctuple
MPI_Datatype MPI_ABCTUPLE;
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
MPI_Type_commit(&MPI_ABCTUPLE);
MPI_Scatterv(nodeTuples.data(),
sendCounts.data(),
displacements.data(),
MPI_ABCTUPLE,
result.data(),
tuplesPerRankLocal,
MPI_ABCTUPLE,
0,
INTRA_COMM);
// free type
MPI_Type_free(&MPI_ABCTUPLE);
}
// Main:4 ends here
// [[file:../../atrip.org::*Main][Main:5]]
result.insert(result.end(),
tuplesPerRankGlobal - result.size(),
FAKE_TUPLE);
return result;
}
// Main:5 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here