Add group and sort algorithm, it compiles at least
This commit is contained in:
parent
944e93dc33
commit
65f804e637
4
.gitignore
vendored
4
.gitignore
vendored
@ -1,6 +1,10 @@
|
|||||||
.emacs
|
.emacs
|
||||||
|
|
||||||
doc/doxygen/
|
doc/doxygen/
|
||||||
extern
|
extern
|
||||||
lib
|
lib
|
||||||
*.o
|
*.o
|
||||||
*.d
|
*.d
|
||||||
|
a.out
|
||||||
|
*~
|
||||||
|
config.mk
|
||||||
|
|||||||
502
atrip.org
502
atrip.org
@ -1400,6 +1400,16 @@ as well as their distribution to nodes and cores.
|
|||||||
#include <array>
|
#include <array>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
||||||
|
// TODO: remove some
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <climits>
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
#include <atrip/Utils.hpp>
|
#include <atrip/Utils.hpp>
|
||||||
#include <atrip/Debug.hpp>
|
#include <atrip/Debug.hpp>
|
||||||
|
|
||||||
@ -1491,6 +1501,498 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
|
|||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
*** Group and sort list
|
*** Group and sort list
|
||||||
|
|
||||||
|
**** Prolog :noexport:
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
namespace group_and_sort {
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
**** Node information
|
||||||
|
|
||||||
|
- nodeList ::
|
||||||
|
List of hostnames of size \( N_n \)
|
||||||
|
- nodeInfos ::
|
||||||
|
List of (hostname, local rank Id)
|
||||||
|
of size \( N_p \), i.e., size of ranks
|
||||||
|
where local rank id goes from 0 to 48.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
=getNodeNames= gets the names of the nodes used,
|
||||||
|
i.e., the size of the resulting vector gives the
|
||||||
|
number of nodes.
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
std::vector<std::string> getNodeNames(MPI_Comm comm){
|
||||||
|
int rank, np;
|
||||||
|
MPI_Comm_rank(comm, &rank);
|
||||||
|
MPI_Comm_size(comm, &np);
|
||||||
|
|
||||||
|
std::vector<std::string> nodeList(np);
|
||||||
|
char nodeName[MPI_MAX_PROCESSOR_NAME]
|
||||||
|
, nodeNames[np*MPI_MAX_PROCESSOR_NAME]
|
||||||
|
;
|
||||||
|
std::vector<int> nameLengths(np)
|
||||||
|
, off(np)
|
||||||
|
;
|
||||||
|
int nameLength;
|
||||||
|
MPI_Get_processor_name(nodeName, &nameLength);
|
||||||
|
MPI_Allgather(&nameLength,
|
||||||
|
1,
|
||||||
|
MPI_INT,
|
||||||
|
nameLengths.data(),
|
||||||
|
1,
|
||||||
|
MPI_INT,
|
||||||
|
comm);
|
||||||
|
for (int i(1); i < np; i++)
|
||||||
|
off[i] = off[i-1] + nameLengths[i-1];
|
||||||
|
MPI_Allgatherv(nodeName,
|
||||||
|
nameLengths[rank],
|
||||||
|
MPI_BYTE,
|
||||||
|
nodeNames,
|
||||||
|
nameLengths.data(),
|
||||||
|
off.data(),
|
||||||
|
MPI_BYTE,
|
||||||
|
comm);
|
||||||
|
for (int i(0); i < np; i++) {
|
||||||
|
std::string const s(&nodeNames[off[i]], nameLengths[i]);
|
||||||
|
nodeList[i] = s;
|
||||||
|
}
|
||||||
|
return nodeList;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
=getNodeInfos=
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
struct RankInfo {
|
||||||
|
const std::string name;
|
||||||
|
const size_t nodeId;
|
||||||
|
const size_t globalRank;
|
||||||
|
const size_t localRank;
|
||||||
|
const size_t ranksPerNode;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<RankInfo>
|
||||||
|
getNodeInfos(std::vector<string> const& nodeNames) {
|
||||||
|
std::vector<RankInfo> result;
|
||||||
|
auto uniqueNames = nodeNames;
|
||||||
|
{
|
||||||
|
std::sort(uniqueNames.begin(), uniqueNames.end());
|
||||||
|
auto const& last = std::unique(uniqueNames.begin(), uniqueNames.end());
|
||||||
|
uniqueNames.erase(last, uniqueNames.end());
|
||||||
|
}
|
||||||
|
const auto index = [&uniqueNames](std::string const& s) {
|
||||||
|
auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s);
|
||||||
|
return std::distance(uniqueNames.begin(), it);
|
||||||
|
};
|
||||||
|
std::vector<size_t> localRanks(uniqueNames.size(), 0);
|
||||||
|
size_t rank = 0;
|
||||||
|
for (auto const& name: nodeNames) {
|
||||||
|
const size_t nodeId = index(name);
|
||||||
|
result.push_back({name,
|
||||||
|
nodeId,
|
||||||
|
rank++,
|
||||||
|
localRanks[nodeId]++,
|
||||||
|
std::count(nodeNames.begin(),
|
||||||
|
nodeNames.end(),
|
||||||
|
name)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
**** Utils
|
||||||
|
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
// Provides the node on which the slice-element is found
|
||||||
|
// Right now we distribute the slices in a round robin fashion
|
||||||
|
// over the different nodes (NOTE: not mpi ranks but nodes)
|
||||||
|
size_t isOnNode(size_t tuple, size_t nodes) { return tuple % nodes; }
|
||||||
|
|
||||||
|
|
||||||
|
struct Info {
|
||||||
|
size_t nNodes;
|
||||||
|
size_t Nv;
|
||||||
|
size_t np;
|
||||||
|
size_t nodeId;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// return the node (or all nodes) where the elements of this
|
||||||
|
// tuple are located
|
||||||
|
std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
|
||||||
|
std::vector<size_t> result;
|
||||||
|
ABCTuple nTuple = { isOnNode(t[0], nNodes)
|
||||||
|
, isOnNode(t[1], nNodes)
|
||||||
|
, isOnNode(t[2], nNodes)
|
||||||
|
};
|
||||||
|
std::sort(nTuple.begin(), nTuple.end());
|
||||||
|
ABCTuple::iterator it = std::unique(nTuple.begin(), nTuple.end());
|
||||||
|
result.resize(it - nTuple.begin());
|
||||||
|
std::copy(nTuple.begin(), it, result.begin());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
**** Distribution
|
||||||
|
wording: home element = element which is located on the given node
|
||||||
|
1. we distribute the tuples such that each tuple has at least one 'home element'
|
||||||
|
2. we sort each tuple in a way that the 'home element' are the fastest indices
|
||||||
|
3. we sort the list of tuples on every node
|
||||||
|
4. we resort the tuples that for every tuple abc the following holds: a<b<c
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
std::vector<ABCTuple>
|
||||||
|
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
|
||||||
|
|
||||||
|
std::vector<ABCTuple> nodeTuples;
|
||||||
|
size_t nNodes(info.nNodes);
|
||||||
|
size_t np(info.np);
|
||||||
|
size_t N(allTuples.size());
|
||||||
|
size_t tuplePerNode( ceil( ((double)N) / nNodes) );
|
||||||
|
|
||||||
|
// nodeid tuple list
|
||||||
|
std::map<size_t, std::vector<ABCTuple> > container1d;
|
||||||
|
std::map<size_t, std::vector<ABCTuple> > container2d;
|
||||||
|
std::map<size_t, std::vector<ABCTuple> > container3d;
|
||||||
|
|
||||||
|
// build container-n-d's
|
||||||
|
for (auto t: allTuples) {
|
||||||
|
// one which node(s) are the tuple elements located...
|
||||||
|
// put them into the right container
|
||||||
|
auto nt = getTupleNodes(t, nNodes);
|
||||||
|
if ( nt.size() == 1) container1d[nt[0]].push_back(t);
|
||||||
|
if ( nt.size() == 2) container2d[nt[0] + nNodes*nt[1]].push_back(t);
|
||||||
|
if ( nt.size() == 3)
|
||||||
|
container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DISTRIBUTE 1-d containers
|
||||||
|
// every tuple which is only located at one node belongs to this node
|
||||||
|
{
|
||||||
|
auto const& tuplesVec = container1d[info.nodeId];
|
||||||
|
nodeTuples.resize(tuplesVec.size());
|
||||||
|
std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
// DISTRIBUTE 2-d containers
|
||||||
|
//the tuples which are located at two nodes are half/half given to these nodes
|
||||||
|
for (auto &m: container2d) {
|
||||||
|
size_t idx = m.first%nNodes;
|
||||||
|
size_t idy = m.first/nNodes;
|
||||||
|
size_t myNode = idx;
|
||||||
|
|
||||||
|
// either idx or idy is my node
|
||||||
|
if (idx != info.nodeId && idy != info.nodeId) continue;
|
||||||
|
if (idy == info.nodeId) myNode = idy;
|
||||||
|
|
||||||
|
auto tuplesVec = m.second;
|
||||||
|
auto n = tuplesVec.size() / 2;
|
||||||
|
auto size = nodeTuples.size();
|
||||||
|
if (myNode == idx) {
|
||||||
|
nodeTuples.resize(size + n);
|
||||||
|
std::copy(tuplesVec.begin(),
|
||||||
|
tuplesVec.begin() + n,
|
||||||
|
nodeTuples.begin() + size);
|
||||||
|
} else {
|
||||||
|
auto ny = tuplesVec.size() - n;
|
||||||
|
nodeTuples.resize(size + ny);
|
||||||
|
std::copy(tuplesVec.begin() + n,
|
||||||
|
tuplesVec.end(),
|
||||||
|
nodeTuples.begin() + size);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// DISTRIBUTE 3-d containers
|
||||||
|
// similar game for the tuples which belong to three different nodes
|
||||||
|
for (auto m: container3d){
|
||||||
|
auto tuplesVec = m.second;
|
||||||
|
auto idx = m.first%nNodes;
|
||||||
|
auto idy = (m.first/nNodes)%nNodes;
|
||||||
|
auto idz = m.first/nNodes/nNodes;
|
||||||
|
if (idx != info.nodeId && idy != info.nodeId && idz != info.nodeId) continue;
|
||||||
|
|
||||||
|
size_t nx = tuplesVec.size() / 3;
|
||||||
|
size_t n, nbegin, nend;
|
||||||
|
if (info.nodeId == idx) {
|
||||||
|
n = nx;
|
||||||
|
nbegin = 0;
|
||||||
|
nend = n;
|
||||||
|
} else if (info.nodeId == idy) {
|
||||||
|
n = nx;
|
||||||
|
nbegin = n;
|
||||||
|
nend = n + n;
|
||||||
|
} else {
|
||||||
|
n = tuplesVec.size() - 2 * nx;
|
||||||
|
nbegin = 2 * nx;
|
||||||
|
nend = 2 * nx + n;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto size = nodeTuples.size();
|
||||||
|
nodeTuples.resize(size + n);
|
||||||
|
std::copy(tuplesVec.begin() + nbegin,
|
||||||
|
tuplesVec.begin() + nend,
|
||||||
|
nodeTuples.begin() + size);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// sort part of group-and-sort algorithm
|
||||||
|
// every tuple on a given node is sorted in a way that
|
||||||
|
// the 'home elements' are the fastest index.
|
||||||
|
// 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn
|
||||||
|
size_t n = info.nodeId;
|
||||||
|
for (auto &nt: nodeTuples){
|
||||||
|
if ( isOnNode(nt[0], nNodes) == n ){ // 1234
|
||||||
|
if ( isOnNode(nt[2], nNodes) != n ){ // 24
|
||||||
|
size_t x(nt[0]); nt[0] = nt[2]; nt[2] = x; // switch first and last
|
||||||
|
}
|
||||||
|
else if ( isOnNode(nt[1], nNodes) != n){ // 3
|
||||||
|
size_t x(nt[0]); nt[0] = nt[1]; nt[1] = x; // switch first two
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ( isOnNode(nt[1], nNodes) == n // 56
|
||||||
|
&& isOnNode(nt[2], nNodes) != n){ // 6
|
||||||
|
size_t x(nt[1]); nt[1] = nt[2]; nt[2] = x; // switch last two
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//now we sort the list of tuples
|
||||||
|
std::sort(nodeTuples.begin(), nodeTuples.end());
|
||||||
|
// we bring the tuples abc back in the order a<b<c
|
||||||
|
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
|
||||||
|
|
||||||
|
return nodeTuples;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//determine which element has to be fetched from sources for the next iteration
|
||||||
|
std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
|
||||||
|
std::vector<size_t> result;
|
||||||
|
ABCTuple inter;
|
||||||
|
std::sort(cur.begin(), cur.end());
|
||||||
|
std::sort(suc.begin(), suc.end());
|
||||||
|
std::array<size_t,3>::iterator rit, cit, sit;
|
||||||
|
cit = std::unique(cur.begin(), cur.end());
|
||||||
|
sit = std::unique(suc.begin(), suc.end());
|
||||||
|
rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
|
||||||
|
result.resize(rit - inter.begin());
|
||||||
|
std::copy(inter.begin(), rit, result.begin());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
**** Main
|
||||||
|
|
||||||
|
The main routine should return the list of tuples to be handled by the current rank.
|
||||||
|
|
||||||
|
Let \( N_p \) be the number of ranks or processes.
|
||||||
|
Let \( N_n \) be the number of nodes or sockets.
|
||||||
|
|
||||||
|
Then we have the following
|
||||||
|
|
||||||
|
#+begin_example
|
||||||
|
Global rank | 0 1 2 3 4 5 6 7 8
|
||||||
|
nodeId | 0 1 0 1 1 0 2 2 2
|
||||||
|
Local rank | 0 0 1 1 2 2 0 1 2
|
||||||
|
intra color | 0 1 0 1 1 0 2 2 2
|
||||||
|
key | global rank
|
||||||
|
#+end_example
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
|
||||||
|
|
||||||
|
int rank, np;
|
||||||
|
MPI_Comm_rank(universe, &rank);
|
||||||
|
MPI_Comm_size(universe, &np);
|
||||||
|
|
||||||
|
std::vector<ABCTuple> result;
|
||||||
|
|
||||||
|
const auto nodeNames(getNodeNames(universe));
|
||||||
|
auto nodeNamesUnique(nodeNames);
|
||||||
|
{
|
||||||
|
const auto& last = std::unique(nodeNamesUnique.begin(),
|
||||||
|
nodeNamesUnique.end());
|
||||||
|
nodeNamesUnique.erase(last, nodeNamesUnique.end());
|
||||||
|
}
|
||||||
|
// we pick one rank from every node
|
||||||
|
auto const nodeInfos = getNodeInfos(nodeNames);
|
||||||
|
size_t const nNodes = nodeNamesUnique.size();
|
||||||
|
|
||||||
|
// We want to construct a communicator which only contains of one
|
||||||
|
// element per node
|
||||||
|
bool makeDistribution
|
||||||
|
= nodeInfos[rank].localRank == 0
|
||||||
|
? true
|
||||||
|
: false
|
||||||
|
;
|
||||||
|
|
||||||
|
std::vector<ABCTuple>
|
||||||
|
nodeTuples = makeDistribution
|
||||||
|
? specialDistribution(Info{ nNodes
|
||||||
|
, Nv
|
||||||
|
, np
|
||||||
|
, nodeInfos[rank].nodeId
|
||||||
|
},
|
||||||
|
getTuplesList(Nv))
|
||||||
|
: std::vector<ABCTuple>()
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
// now we have to send the data from **one** rank on each node
|
||||||
|
// to all others ranks of this node
|
||||||
|
const
|
||||||
|
int color = nodeInfos[rank].nodeId
|
||||||
|
, key = nodeInfos[rank].localRank
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
MPI_Comm INTRA_COMM;
|
||||||
|
MPI_Comm_split(universe, color, key, &INTRA_COMM);
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
Every node has to distribute **at least**
|
||||||
|
nodeTuples.size() / nodeInfos[rank].ranksPerNode
|
||||||
|
tuples among the ranks.
|
||||||
|
|
||||||
|
We have to communicate this quantity among all nodes.
|
||||||
|
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
|
||||||
|
const size_t
|
||||||
|
tuplesPerRankLocal
|
||||||
|
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
|
||||||
|
+ size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
|
||||||
|
;
|
||||||
|
|
||||||
|
size_t tuplesPerRankGlobal;
|
||||||
|
|
||||||
|
MPI_Reduce(&tuplesPerRankLocal,
|
||||||
|
&tuplesPerRankGlobal,
|
||||||
|
1,
|
||||||
|
MPI_UINT64_T,
|
||||||
|
MPI_MAX,
|
||||||
|
0,
|
||||||
|
universe);
|
||||||
|
|
||||||
|
MPI_Bcast(&tuplesPerRankGlobal,
|
||||||
|
1,
|
||||||
|
MPI_UINT64_T,
|
||||||
|
0,
|
||||||
|
universe);
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
Now we have the tuples that every rank has to have, i.e.,
|
||||||
|
=tuplesPerRankGlobal=.
|
||||||
|
|
||||||
|
However before this,
|
||||||
|
the tuples in =nodeTuples= now have to be sent from the local rank
|
||||||
|
in every node to all the ranks in the given node,
|
||||||
|
and we have to make sure that every rank inside a given node
|
||||||
|
gets the same amount of tuples, in this case it should be
|
||||||
|
=tuplesPerRankLocal=, and in our node the total number
|
||||||
|
of tuples should be =tuplesPerRankLocal * nodeInfos[rank].ranksPerNode=,
|
||||||
|
however this might not be the case up to now due to divisibility issues.
|
||||||
|
|
||||||
|
Up to now we have exactly =nodeTuples.size()= tuples, we have to make sure by
|
||||||
|
resizing that the condition above is met, i.e., so we can resize
|
||||||
|
and add some fake tuples at the end as padding.
|
||||||
|
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
size_t const totalTuplesLocal
|
||||||
|
= tuplesPerRankLocal
|
||||||
|
,* nodeInfos[rank].ranksPerNode;
|
||||||
|
|
||||||
|
if (makeDistribution)
|
||||||
|
nodeTuples.insert(nodeTuples.end(),
|
||||||
|
totalTuplesLocal - nodeTuples.size(),
|
||||||
|
FAKE_TUPLE);
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
The next step is sending the tuples in the local root rank
|
||||||
|
to the other ranks in the node, this we do with the MPI function
|
||||||
|
=MPI_Scatterv=.
|
||||||
|
Every rank gets =tuplesPerRankLocal= tuples and
|
||||||
|
the =nodeTuples= vector is now homogeneous and divisible by the number
|
||||||
|
of ranks per node in our node.
|
||||||
|
Therefore, the =displacements= are simply the vector
|
||||||
|
\begin{equation*}
|
||||||
|
\left\{
|
||||||
|
k * \mathrm{tuplesPerNodeLocal}
|
||||||
|
\mid
|
||||||
|
k \in
|
||||||
|
\left\{ 0
|
||||||
|
, \ldots
|
||||||
|
, \#\text{ranks in node} - 1
|
||||||
|
\right\}
|
||||||
|
\right\}
|
||||||
|
\end{equation*}
|
||||||
|
|
||||||
|
and the =sendCounts= vector is simply the constant vector
|
||||||
|
=tuplesPerRankLocal= of size =ranksPerNode=.
|
||||||
|
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
{
|
||||||
|
std::vector<int> const
|
||||||
|
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
|
||||||
|
|
||||||
|
std::vector<int>
|
||||||
|
displacements(nodeInfos[rank].ranksPerNode);
|
||||||
|
|
||||||
|
std::iota(displacements.begin(),
|
||||||
|
displacements.end(),
|
||||||
|
tuplesPerRankLocal);
|
||||||
|
|
||||||
|
// important!
|
||||||
|
result.resize(tuplesPerRankLocal);
|
||||||
|
|
||||||
|
// construct mpi type for abctuple
|
||||||
|
MPI_Datatype MPI_ABCTUPLE;
|
||||||
|
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
|
||||||
|
MPI_Type_commit(&MPI_ABCTUPLE);
|
||||||
|
|
||||||
|
MPI_Scatterv(nodeTuples.data(),
|
||||||
|
sendCounts.data(),
|
||||||
|
displacements.data(),
|
||||||
|
MPI_ABCTUPLE,
|
||||||
|
result.data(),
|
||||||
|
tuplesPerRankLocal,
|
||||||
|
MPI_ABCTUPLE,
|
||||||
|
0,
|
||||||
|
INTRA_COMM);
|
||||||
|
|
||||||
|
// free type
|
||||||
|
MPI_Type_free(&MPI_ABCTUPLE);
|
||||||
|
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
and now we have to make sure that the size of the result
|
||||||
|
is the same with every rank in the universe communicator,
|
||||||
|
inserting fake tuples where needed
|
||||||
|
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
|
||||||
|
result.insert(result.end(),
|
||||||
|
tuplesPerRankGlobal - result.size(),
|
||||||
|
FAKE_TUPLE);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
|
**** Epilog :noexport:
|
||||||
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
|
}
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
*** Epilog :noexport:
|
*** Epilog :noexport:
|
||||||
#+begin_src c++ :tangle (atrip-tuples-h)
|
#+begin_src c++ :tangle (atrip-tuples-h)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,6 +5,16 @@
|
|||||||
#include <array>
|
#include <array>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
|
|
||||||
|
// TODO: remove some
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <cassert>
|
||||||
|
#include <chrono>
|
||||||
|
#include <climits>
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
#include <atrip/Utils.hpp>
|
#include <atrip/Utils.hpp>
|
||||||
#include <atrip/Debug.hpp>
|
#include <atrip/Debug.hpp>
|
||||||
|
|
||||||
@ -68,6 +78,399 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
|
|||||||
}
|
}
|
||||||
// Naive list:2 ends here
|
// Naive list:2 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Prolog][Prolog:1]]
|
||||||
|
namespace group_and_sort {
|
||||||
|
// Prolog:1 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Node information][Node information:1]]
|
||||||
|
std::vector<std::string> getNodeNames(MPI_Comm comm){
|
||||||
|
int rank, np;
|
||||||
|
MPI_Comm_rank(comm, &rank);
|
||||||
|
MPI_Comm_size(comm, &np);
|
||||||
|
|
||||||
|
std::vector<std::string> nodeList(np);
|
||||||
|
char nodeName[MPI_MAX_PROCESSOR_NAME]
|
||||||
|
, nodeNames[np*MPI_MAX_PROCESSOR_NAME]
|
||||||
|
;
|
||||||
|
std::vector<int> nameLengths(np)
|
||||||
|
, off(np)
|
||||||
|
;
|
||||||
|
int nameLength;
|
||||||
|
MPI_Get_processor_name(nodeName, &nameLength);
|
||||||
|
MPI_Allgather(&nameLength,
|
||||||
|
1,
|
||||||
|
MPI_INT,
|
||||||
|
nameLengths.data(),
|
||||||
|
1,
|
||||||
|
MPI_INT,
|
||||||
|
comm);
|
||||||
|
for (int i(1); i < np; i++)
|
||||||
|
off[i] = off[i-1] + nameLengths[i-1];
|
||||||
|
MPI_Allgatherv(nodeName,
|
||||||
|
nameLengths[rank],
|
||||||
|
MPI_BYTE,
|
||||||
|
nodeNames,
|
||||||
|
nameLengths.data(),
|
||||||
|
off.data(),
|
||||||
|
MPI_BYTE,
|
||||||
|
comm);
|
||||||
|
for (int i(0); i < np; i++) {
|
||||||
|
std::string const s(&nodeNames[off[i]], nameLengths[i]);
|
||||||
|
nodeList[i] = s;
|
||||||
|
}
|
||||||
|
return nodeList;
|
||||||
|
}
|
||||||
|
// Node information:1 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Node information][Node information:2]]
|
||||||
|
struct RankInfo {
|
||||||
|
const std::string name;
|
||||||
|
const size_t nodeId;
|
||||||
|
const size_t globalRank;
|
||||||
|
const size_t localRank;
|
||||||
|
const size_t ranksPerNode;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<RankInfo>
|
||||||
|
getNodeInfos(std::vector<string> const& nodeNames) {
|
||||||
|
std::vector<RankInfo> result;
|
||||||
|
auto uniqueNames = nodeNames;
|
||||||
|
{
|
||||||
|
std::sort(uniqueNames.begin(), uniqueNames.end());
|
||||||
|
auto const& last = std::unique(uniqueNames.begin(), uniqueNames.end());
|
||||||
|
uniqueNames.erase(last, uniqueNames.end());
|
||||||
|
}
|
||||||
|
const auto index = [&uniqueNames](std::string const& s) {
|
||||||
|
auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s);
|
||||||
|
return std::distance(uniqueNames.begin(), it);
|
||||||
|
};
|
||||||
|
std::vector<size_t> localRanks(uniqueNames.size(), 0);
|
||||||
|
size_t rank = 0;
|
||||||
|
for (auto const& name: nodeNames) {
|
||||||
|
const size_t nodeId = index(name);
|
||||||
|
result.push_back({name,
|
||||||
|
nodeId,
|
||||||
|
rank++,
|
||||||
|
localRanks[nodeId]++,
|
||||||
|
std::count(nodeNames.begin(),
|
||||||
|
nodeNames.end(),
|
||||||
|
name)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// Node information:2 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Utils][Utils:1]]
|
||||||
|
// Provides the node on which the slice-element is found
|
||||||
|
// Right now we distribute the slices in a round robin fashion
|
||||||
|
// over the different nodes (NOTE: not mpi ranks but nodes)
|
||||||
|
size_t isOnNode(size_t tuple, size_t nodes) { return tuple % nodes; }
|
||||||
|
|
||||||
|
|
||||||
|
struct Info {
|
||||||
|
size_t nNodes;
|
||||||
|
size_t Nv;
|
||||||
|
size_t np;
|
||||||
|
size_t nodeId;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// return the node (or all nodes) where the elements of this
|
||||||
|
// tuple are located
|
||||||
|
std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
|
||||||
|
std::vector<size_t> result;
|
||||||
|
ABCTuple nTuple = { isOnNode(t[0], nNodes)
|
||||||
|
, isOnNode(t[1], nNodes)
|
||||||
|
, isOnNode(t[2], nNodes)
|
||||||
|
};
|
||||||
|
std::sort(nTuple.begin(), nTuple.end());
|
||||||
|
ABCTuple::iterator it = std::unique(nTuple.begin(), nTuple.end());
|
||||||
|
result.resize(it - nTuple.begin());
|
||||||
|
std::copy(nTuple.begin(), it, result.begin());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// Utils:1 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Distribution][Distribution:1]]
|
||||||
|
std::vector<ABCTuple>
|
||||||
|
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
|
||||||
|
|
||||||
|
std::vector<ABCTuple> nodeTuples;
|
||||||
|
size_t nNodes(info.nNodes);
|
||||||
|
size_t np(info.np);
|
||||||
|
size_t N(allTuples.size());
|
||||||
|
size_t tuplePerNode( ceil( ((double)N) / nNodes) );
|
||||||
|
|
||||||
|
// nodeid tuple list
|
||||||
|
std::map<size_t, std::vector<ABCTuple> > container1d;
|
||||||
|
std::map<size_t, std::vector<ABCTuple> > container2d;
|
||||||
|
std::map<size_t, std::vector<ABCTuple> > container3d;
|
||||||
|
|
||||||
|
// build container-n-d's
|
||||||
|
for (auto t: allTuples) {
|
||||||
|
// one which node(s) are the tuple elements located...
|
||||||
|
// put them into the right container
|
||||||
|
auto nt = getTupleNodes(t, nNodes);
|
||||||
|
if ( nt.size() == 1) container1d[nt[0]].push_back(t);
|
||||||
|
if ( nt.size() == 2) container2d[nt[0] + nNodes*nt[1]].push_back(t);
|
||||||
|
if ( nt.size() == 3)
|
||||||
|
container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DISTRIBUTE 1-d containers
|
||||||
|
// every tuple which is only located at one node belongs to this node
|
||||||
|
{
|
||||||
|
auto const& tuplesVec = container1d[info.nodeId];
|
||||||
|
nodeTuples.resize(tuplesVec.size());
|
||||||
|
std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
// DISTRIBUTE 2-d containers
|
||||||
|
//the tuples which are located at two nodes are half/half given to these nodes
|
||||||
|
for (auto &m: container2d) {
|
||||||
|
size_t idx = m.first%nNodes;
|
||||||
|
size_t idy = m.first/nNodes;
|
||||||
|
size_t myNode = idx;
|
||||||
|
|
||||||
|
// either idx or idy is my node
|
||||||
|
if (idx != info.nodeId && idy != info.nodeId) continue;
|
||||||
|
if (idy == info.nodeId) myNode = idy;
|
||||||
|
|
||||||
|
auto tuplesVec = m.second;
|
||||||
|
auto n = tuplesVec.size() / 2;
|
||||||
|
auto size = nodeTuples.size();
|
||||||
|
if (myNode == idx) {
|
||||||
|
nodeTuples.resize(size + n);
|
||||||
|
std::copy(tuplesVec.begin(),
|
||||||
|
tuplesVec.begin() + n,
|
||||||
|
nodeTuples.begin() + size);
|
||||||
|
} else {
|
||||||
|
auto ny = tuplesVec.size() - n;
|
||||||
|
nodeTuples.resize(size + ny);
|
||||||
|
std::copy(tuplesVec.begin() + n,
|
||||||
|
tuplesVec.end(),
|
||||||
|
nodeTuples.begin() + size);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// DISTRIBUTE 3-d containers
|
||||||
|
// similar game for the tuples which belong to three different nodes
|
||||||
|
for (auto m: container3d){
|
||||||
|
auto tuplesVec = m.second;
|
||||||
|
auto idx = m.first%nNodes;
|
||||||
|
auto idy = (m.first/nNodes)%nNodes;
|
||||||
|
auto idz = m.first/nNodes/nNodes;
|
||||||
|
if (idx != info.nodeId && idy != info.nodeId && idz != info.nodeId) continue;
|
||||||
|
|
||||||
|
size_t nx = tuplesVec.size() / 3;
|
||||||
|
size_t n, nbegin, nend;
|
||||||
|
if (info.nodeId == idx) {
|
||||||
|
n = nx;
|
||||||
|
nbegin = 0;
|
||||||
|
nend = n;
|
||||||
|
} else if (info.nodeId == idy) {
|
||||||
|
n = nx;
|
||||||
|
nbegin = n;
|
||||||
|
nend = n + n;
|
||||||
|
} else {
|
||||||
|
n = tuplesVec.size() - 2 * nx;
|
||||||
|
nbegin = 2 * nx;
|
||||||
|
nend = 2 * nx + n;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto size = nodeTuples.size();
|
||||||
|
nodeTuples.resize(size + n);
|
||||||
|
std::copy(tuplesVec.begin() + nbegin,
|
||||||
|
tuplesVec.begin() + nend,
|
||||||
|
nodeTuples.begin() + size);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// sort part of group-and-sort algorithm
|
||||||
|
// every tuple on a given node is sorted in a way that
|
||||||
|
// the 'home elements' are the fastest index.
|
||||||
|
// 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn
|
||||||
|
size_t n = info.nodeId;
|
||||||
|
for (auto &nt: nodeTuples){
|
||||||
|
if ( isOnNode(nt[0], nNodes) == n ){ // 1234
|
||||||
|
if ( isOnNode(nt[2], nNodes) != n ){ // 24
|
||||||
|
size_t x(nt[0]); nt[0] = nt[2]; nt[2] = x; // switch first and last
|
||||||
|
}
|
||||||
|
else if ( isOnNode(nt[1], nNodes) != n){ // 3
|
||||||
|
size_t x(nt[0]); nt[0] = nt[1]; nt[1] = x; // switch first two
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if ( isOnNode(nt[1], nNodes) == n // 56
|
||||||
|
&& isOnNode(nt[2], nNodes) != n){ // 6
|
||||||
|
size_t x(nt[1]); nt[1] = nt[2]; nt[2] = x; // switch last two
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//now we sort the list of tuples
|
||||||
|
std::sort(nodeTuples.begin(), nodeTuples.end());
|
||||||
|
// we bring the tuples abc back in the order a<b<c
|
||||||
|
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
|
||||||
|
|
||||||
|
return nodeTuples;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//determine which element has to be fetched from sources for the next iteration
|
||||||
|
std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
|
||||||
|
std::vector<size_t> result;
|
||||||
|
ABCTuple inter;
|
||||||
|
std::sort(cur.begin(), cur.end());
|
||||||
|
std::sort(suc.begin(), suc.end());
|
||||||
|
std::array<size_t,3>::iterator rit, cit, sit;
|
||||||
|
cit = std::unique(cur.begin(), cur.end());
|
||||||
|
sit = std::unique(suc.begin(), suc.end());
|
||||||
|
rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
|
||||||
|
result.resize(rit - inter.begin());
|
||||||
|
std::copy(inter.begin(), rit, result.begin());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
// Distribution:1 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Main][Main:1]]
|
||||||
|
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
|
||||||
|
|
||||||
|
int rank, np;
|
||||||
|
MPI_Comm_rank(universe, &rank);
|
||||||
|
MPI_Comm_size(universe, &np);
|
||||||
|
|
||||||
|
std::vector<ABCTuple> result;
|
||||||
|
|
||||||
|
const auto nodeNames(getNodeNames(universe));
|
||||||
|
auto nodeNamesUnique(nodeNames);
|
||||||
|
{
|
||||||
|
const auto& last = std::unique(nodeNamesUnique.begin(),
|
||||||
|
nodeNamesUnique.end());
|
||||||
|
nodeNamesUnique.erase(last, nodeNamesUnique.end());
|
||||||
|
}
|
||||||
|
// we pick one rank from every node
|
||||||
|
auto const nodeInfos = getNodeInfos(nodeNames);
|
||||||
|
size_t const nNodes = nodeNamesUnique.size();
|
||||||
|
|
||||||
|
// We want to construct a communicator which only contains of one
|
||||||
|
// element per node
|
||||||
|
bool makeDistribution
|
||||||
|
= nodeInfos[rank].localRank == 0
|
||||||
|
? true
|
||||||
|
: false
|
||||||
|
;
|
||||||
|
|
||||||
|
std::vector<ABCTuple>
|
||||||
|
nodeTuples = makeDistribution
|
||||||
|
? specialDistribution(Info{ nNodes
|
||||||
|
, Nv
|
||||||
|
, np
|
||||||
|
, nodeInfos[rank].nodeId
|
||||||
|
},
|
||||||
|
getTuplesList(Nv))
|
||||||
|
: std::vector<ABCTuple>()
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
// now we have to send the data from **one** rank on each node
|
||||||
|
// to all others ranks of this node
|
||||||
|
const
|
||||||
|
int color = nodeInfos[rank].nodeId
|
||||||
|
, key = nodeInfos[rank].localRank
|
||||||
|
;
|
||||||
|
|
||||||
|
|
||||||
|
MPI_Comm INTRA_COMM;
|
||||||
|
MPI_Comm_split(universe, color, key, &INTRA_COMM);
|
||||||
|
// Main:1 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Main][Main:2]]
|
||||||
|
const size_t
|
||||||
|
tuplesPerRankLocal
|
||||||
|
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
|
||||||
|
+ size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
|
||||||
|
;
|
||||||
|
|
||||||
|
size_t tuplesPerRankGlobal;
|
||||||
|
|
||||||
|
MPI_Reduce(&tuplesPerRankLocal,
|
||||||
|
&tuplesPerRankGlobal,
|
||||||
|
1,
|
||||||
|
MPI_UINT64_T,
|
||||||
|
MPI_MAX,
|
||||||
|
0,
|
||||||
|
universe);
|
||||||
|
|
||||||
|
MPI_Bcast(&tuplesPerRankGlobal,
|
||||||
|
1,
|
||||||
|
MPI_UINT64_T,
|
||||||
|
0,
|
||||||
|
universe);
|
||||||
|
// Main:2 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Main][Main:3]]
|
||||||
|
size_t const totalTuplesLocal
|
||||||
|
= tuplesPerRankLocal
|
||||||
|
* nodeInfos[rank].ranksPerNode;
|
||||||
|
|
||||||
|
if (makeDistribution)
|
||||||
|
nodeTuples.insert(nodeTuples.end(),
|
||||||
|
totalTuplesLocal - nodeTuples.size(),
|
||||||
|
FAKE_TUPLE);
|
||||||
|
// Main:3 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Main][Main:4]]
|
||||||
|
{
|
||||||
|
std::vector<int> const
|
||||||
|
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
|
||||||
|
|
||||||
|
std::vector<int>
|
||||||
|
displacements(nodeInfos[rank].ranksPerNode);
|
||||||
|
|
||||||
|
std::iota(displacements.begin(),
|
||||||
|
displacements.end(),
|
||||||
|
tuplesPerRankLocal);
|
||||||
|
|
||||||
|
// important!
|
||||||
|
result.resize(tuplesPerRankLocal);
|
||||||
|
|
||||||
|
// construct mpi type for abctuple
|
||||||
|
MPI_Datatype MPI_ABCTUPLE;
|
||||||
|
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
|
||||||
|
MPI_Type_commit(&MPI_ABCTUPLE);
|
||||||
|
|
||||||
|
MPI_Scatterv(nodeTuples.data(),
|
||||||
|
sendCounts.data(),
|
||||||
|
displacements.data(),
|
||||||
|
MPI_ABCTUPLE,
|
||||||
|
result.data(),
|
||||||
|
tuplesPerRankLocal,
|
||||||
|
MPI_ABCTUPLE,
|
||||||
|
0,
|
||||||
|
INTRA_COMM);
|
||||||
|
|
||||||
|
// free type
|
||||||
|
MPI_Type_free(&MPI_ABCTUPLE);
|
||||||
|
|
||||||
|
}
|
||||||
|
// Main:4 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Main][Main:5]]
|
||||||
|
result.insert(result.end(),
|
||||||
|
tuplesPerRankGlobal - result.size(),
|
||||||
|
FAKE_TUPLE);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
|
||||||
|
}
|
||||||
|
// Main:5 ends here
|
||||||
|
|
||||||
|
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||||
|
}
|
||||||
|
// Epilog:1 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||||
}
|
}
|
||||||
// Epilog:1 ends here
|
// Epilog:1 ends here
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user