Add group-and-sort with MPI_Scatter and not MPI_Scatterv

This commit is contained in:
Alejandro Gallo 2021-11-03 19:46:38 +01:00
parent 12f8c6294e
commit a5619146f0
2 changed files with 99 additions and 62 deletions

View File

@ -1694,6 +1694,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
}
if (info.nodeId == 0)
std::cout << "\tBuilding 1-d containers\n";
// DISTRIBUTE 1-d containers
// every tuple which is only located at one node belongs to this node
{
@ -1702,6 +1704,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
}
if (info.nodeId == 0)
std::cout << "\tBuilding 2-d containers\n";
// DISTRIBUTE 2-d containers
//the tuples which are located at two nodes are half/half given to these nodes
for (auto &m: container2d) {
@ -1731,6 +1735,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
}
if (info.nodeId == 0)
std::cout << "\tBuilding 3-d containers\n";
// DISTRIBUTE 3-d containers
// similar game for the tuples which belong to three different nodes
for (auto m: container3d){
@ -1765,6 +1771,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
}
if (info.nodeId == 0)
std::cout << "\tsorting...\n";
// sort part of group-and-sort algorithm
// every tuple on a given node is sorted in a way that
// the 'home elements' are the fastest index.
@ -1785,15 +1793,23 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
}
}
}
if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
//now we sort the list of tuples
std::sort(nodeTuples.begin(), nodeTuples.end());
if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
// we bring the tuples abc back in the order a<b<c
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
return nodeTuples;
}
#+end_src
TODO: ERASE THIS CODE BLOCK
#+begin_src c++
//determine which element has to be fetched from sources for the next iteration
std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
std::vector<size_t> result;
@ -1821,10 +1837,10 @@ Then we have the following
#+begin_example
Global rank | 0 1 2 3 4 5 6 7 8
key | global rank
nodeId | 0 1 0 1 1 0 2 2 2
Local rank | 0 0 1 1 2 2 0 1 2
intra color | 0 1 0 1 1 0 2 2 2
key | global rank
#+end_example
@ -1853,11 +1869,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
// We want to construct a communicator which only contains of one
// element per node
bool makeDistribution
= nodeInfos[rank].localRank == 0
? true
: false
;
bool const makeDistribution
= nodeInfos[rank].localRank == 0;
std::vector<ABCTuple>
nodeTuples = makeDistribution
@ -1870,6 +1883,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
: std::vector<ABCTuple>()
;
LOG(1,"Atrip") << "got nodeTuples\n";
// now we have to send the data from **one** rank on each node
// to all others ranks of this node
@ -1884,7 +1898,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
#+end_src
Every node has to distribute **at least**
nodeTuples.size() / nodeInfos[rank].ranksPerNode
=nodeTuples.size() / nodeInfos[rank].ranksPerNode=
tuples among the ranks.
We have to communicate this quantity among all nodes.
@ -1912,6 +1926,10 @@ We have to communicate this quantity among all nodes.
MPI_UINT64_T,
0,
universe);
LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
#+end_src
Now we have the tuples that every rank has to have, i.e.,
@ -1931,16 +1949,44 @@ resizing that the condition above is met, i.e., so we can resize
and add some fake tuples at the end as padding.
#+begin_src c++ :tangle (atrip-tuples-h)
size_t const totalTuplesLocal
= tuplesPerRankLocal
,* nodeInfos[rank].ranksPerNode;
size_t const totalTuples
= tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
if (makeDistribution)
if (makeDistribution) {
nodeTuples.insert(nodeTuples.end(),
totalTuplesLocal - nodeTuples.size(),
totalTuples - nodeTuples.size(),
FAKE_TUPLE);
}
#+end_src
And now we can simply scatter the tuples in nodeTuples and send
=tuplesPerRankGlobal= to the different ranks in the node,
#+begin_src c++ :tangle (atrip-tuples-h)
{
// construct mpi type for abctuple
MPI_Datatype MPI_ABCTUPLE;
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
MPI_Type_commit(&MPI_ABCTUPLE);
LOG(1,"Atrip") << "scattering tuples \n";
result.resize(tuplesPerRankGlobal);
MPI_Scatter(nodeTuples.data(),
tuplesPerRankGlobal,
MPI_ABCTUPLE,
result.data(),
tuplesPerRankGlobal,
MPI_ABCTUPLE,
0,
INTRA_COMM);
MPI_Type_free(&MPI_ABCTUPLE);
}
#+end_src
The next step is sending the tuples in the local root rank
to the other ranks in the node, this we do with the MPI function
=MPI_Scatterv=.
@ -1963,7 +2009,8 @@ Therefore, the =displacements= are simply the vector
and the =sendCounts= vector is simply the constant vector
=tuplesPerRankLocal= of size =ranksPerNode=.
#+begin_src c++ :tangle (atrip-tuples-h)
TODO: Remove
#+begin_src c++
{
std::vector<int> const
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
@ -1983,6 +2030,7 @@ and the =sendCounts= vector is simply the constant vector
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
MPI_Type_commit(&MPI_ABCTUPLE);
LOG(1,"Atrip") << "scattering tuples \n";
MPI_Scatterv(nodeTuples.data(),
sendCounts.data(),
displacements.data(),
@ -2005,9 +2053,7 @@ inserting fake tuples where needed
#+begin_src c++ :tangle (atrip-tuples-h)
result.insert(result.end(),
tuplesPerRankGlobal - result.size(),
FAKE_TUPLE);
LOG(1,"Atrip") << "scattering tuples \n";
return result;

View File

@ -233,6 +233,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
}
if (info.nodeId == 0)
std::cout << "\tBuilding 1-d containers\n";
// DISTRIBUTE 1-d containers
// every tuple which is only located at one node belongs to this node
{
@ -241,6 +243,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
}
if (info.nodeId == 0)
std::cout << "\tBuilding 2-d containers\n";
// DISTRIBUTE 2-d containers
//the tuples which are located at two nodes are half/half given to these nodes
for (auto &m: container2d) {
@ -270,6 +274,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
}
if (info.nodeId == 0)
std::cout << "\tBuilding 3-d containers\n";
// DISTRIBUTE 3-d containers
// similar game for the tuples which belong to three different nodes
for (auto m: container3d){
@ -304,6 +310,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
}
if (info.nodeId == 0)
std::cout << "\tsorting...\n";
// sort part of group-and-sort algorithm
// every tuple on a given node is sorted in a way that
// the 'home elements' are the fastest index.
@ -324,29 +332,18 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
}
}
}
if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
//now we sort the list of tuples
std::sort(nodeTuples.begin(), nodeTuples.end());
if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
// we bring the tuples abc back in the order a<b<c
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
return nodeTuples;
}
//determine which element has to be fetched from sources for the next iteration
std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
std::vector<size_t> result;
ABCTuple inter;
std::sort(cur.begin(), cur.end());
std::sort(suc.begin(), suc.end());
std::array<size_t,3>::iterator rit, cit, sit;
cit = std::unique(cur.begin(), cur.end());
sit = std::unique(suc.begin(), suc.end());
rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
result.resize(rit - inter.begin());
std::copy(inter.begin(), rit, result.begin());
return result;
}
// Distribution:1 ends here
// [[file:~/atrip/atrip.org::*Main][Main:1]]
@ -371,11 +368,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
// We want to construct a communicator which only contains of one
// element per node
bool makeDistribution
= nodeInfos[rank].localRank == 0
? true
: false
;
bool const makeDistribution
= nodeInfos[rank].localRank == 0;
std::vector<ABCTuple>
nodeTuples = makeDistribution
@ -388,6 +382,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
: std::vector<ABCTuple>()
;
LOG(1,"Atrip") << "got nodeTuples\n";
// now we have to send the data from **one** rank on each node
// to all others ranks of this node
@ -423,64 +418,60 @@ MPI_Bcast(&tuplesPerRankGlobal,
MPI_UINT64_T,
0,
universe);
LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
// Main:2 ends here
// [[file:~/atrip/atrip.org::*Main][Main:3]]
size_t const totalTuplesLocal
= tuplesPerRankLocal
* nodeInfos[rank].ranksPerNode;
size_t const totalTuples
= tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
if (makeDistribution)
if (makeDistribution) {
nodeTuples.insert(nodeTuples.end(),
totalTuplesLocal - nodeTuples.size(),
totalTuples - nodeTuples.size(),
FAKE_TUPLE);
}
// Main:3 ends here
// [[file:~/atrip/atrip.org::*Main][Main:4]]
{
std::vector<int> const
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
std::vector<int>
displacements(nodeInfos[rank].ranksPerNode);
std::iota(displacements.begin(),
displacements.end(),
tuplesPerRankLocal);
// important!
result.resize(tuplesPerRankLocal);
// construct mpi type for abctuple
MPI_Datatype MPI_ABCTUPLE;
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
MPI_Type_commit(&MPI_ABCTUPLE);
MPI_Scatterv(nodeTuples.data(),
sendCounts.data(),
displacements.data(),
LOG(1,"Atrip") << "scattering tuples \n";
result.resize(tuplesPerRankGlobal);
MPI_Scatter(nodeTuples.data(),
tuplesPerRankGlobal,
MPI_ABCTUPLE,
result.data(),
tuplesPerRankLocal,
tuplesPerRankGlobal,
MPI_ABCTUPLE,
0,
INTRA_COMM);
// free type
MPI_Type_free(&MPI_ABCTUPLE);
}
// Main:4 ends here
// [[file:~/atrip/atrip.org::*Main][Main:5]]
result.insert(result.end(),
// [[file:~/atrip/atrip.org::*Main][Main:6]]
/*
result.insert(result.end(),
tuplesPerRankGlobal - result.size(),
FAKE_TUPLE);
*/
LOG(1,"Atrip") << "scattering tuples \n";
return result;
}
// Main:5 ends here
// Main:6 ends here
// [[file:~/atrip/atrip.org::*Interface][Interface:1]]
struct Distribution : public TuplesDistribution {