Add group-and-sort with MPI_Scatter and not MPI_Scatterv

2021-11-03 19:46:38 +01:00
parent 12f8c6294e
commit a5619146f0
2 changed files with 99 additions and 62 deletions
--- a/atrip.org
+++ b/atrip.org
@@ -1694,6 +1694,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 1-d containers\n";
  // DISTRIBUTE 1-d containers
  // every tuple which is only located at one node belongs to this node
  {
@@ -1702,6 +1704,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
    std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 2-d containers\n";
  // DISTRIBUTE 2-d containers
  //the tuples which are located at two nodes are half/half given to these nodes
  for (auto &m: container2d) {
@@ -1731,6 +1735,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 3-d containers\n";
  // DISTRIBUTE 3-d containers
  // similar game for the tuples which belong to three different nodes
  for (auto m: container3d){
@@ -1765,6 +1771,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
  }
  if (info.nodeId == 0)
    std::cout << "\tsorting...\n";
  // sort part of group-and-sort algorithm
  // every tuple on a given node is sorted in a way that
  // the 'home elements' are the fastest index.
@@ -1785,15 +1793,23 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      }
    }
  }
  if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
  //now we sort the list of tuples
  std::sort(nodeTuples.begin(), nodeTuples.end());
  if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
  // we bring the tuples abc back in the order a<b<c
  for (auto &t: nodeTuples)  std::sort(t.begin(), t.end());
  return nodeTuples;
 }
 #+end_src
 TODO: ERASE THIS CODE BLOCK
 #+begin_src c++
 //determine which element has to be fetched from sources for the next iteration
 std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
  std::vector<size_t> result;
@@ -1821,10 +1837,10 @@ Then we have the following
 #+begin_example
 Global rank | 0 1 2 3 4 5 6 7 8
 key         | global rank
 nodeId      | 0 1 0 1 1 0 2 2 2
 Local rank  | 0 0 1 1 2 2 0 1 2
 intra color | 0 1 0 1 1 0 2 2 2
 key         | global rank
 #+end_example
@@ -1853,11 +1869,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
  // We want to construct a communicator which only contains of one
  // element per node
-  bool makeDistribution
+  bool const makeDistribution
-    = nodeInfos[rank].localRank == 0
+    = nodeInfos[rank].localRank == 0;
    ? true
    : false
    ;
  std::vector<ABCTuple>
    nodeTuples = makeDistribution
@@ -1870,6 +1883,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
               : std::vector<ABCTuple>()
               ;
  LOG(1,"Atrip") << "got nodeTuples\n";
  // now we have to send the data from **one** rank on each node
  // to all others ranks of this node
@@ -1884,7 +1898,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
 #+end_src
 Every node has to distribute **at least**
-nodeTuples.size() / nodeInfos[rank].ranksPerNode
+=nodeTuples.size() / nodeInfos[rank].ranksPerNode=
 tuples among the ranks.
 We have to communicate this quantity among all nodes.
@@ -1912,6 +1926,10 @@ We have to communicate this quantity among all nodes.
            MPI_UINT64_T,
            0,
            universe);
  LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
  LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
  LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
 #+end_src
 Now we have the tuples that every rank has to have, i.e.,
@@ -1931,16 +1949,44 @@ resizing that the condition above is met, i.e., so we can resize
 and add some fake tuples at the end as padding.
 #+begin_src c++ :tangle (atrip-tuples-h)
-size_t const totalTuplesLocal
+size_t const totalTuples
-  = tuplesPerRankLocal
+  = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
  ,* nodeInfos[rank].ranksPerNode;
-if (makeDistribution)
+if (makeDistribution) {
  nodeTuples.insert(nodeTuples.end(),
-                    totalTuplesLocal - nodeTuples.size(),
+                    totalTuples - nodeTuples.size(),
                    FAKE_TUPLE);
 }
 #+end_src
 And now we can simply scatter the tuples in nodeTuples and send
 =tuplesPerRankGlobal= to the different ranks in the node,
 #+begin_src c++ :tangle (atrip-tuples-h)
 {
  // construct mpi type for abctuple
  MPI_Datatype MPI_ABCTUPLE;
  MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
  MPI_Type_commit(&MPI_ABCTUPLE);
  LOG(1,"Atrip") << "scattering tuples \n";
  result.resize(tuplesPerRankGlobal);
  MPI_Scatter(nodeTuples.data(),
              tuplesPerRankGlobal,
              MPI_ABCTUPLE,
              result.data(),
              tuplesPerRankGlobal,
              MPI_ABCTUPLE,
              0,
              INTRA_COMM);
  MPI_Type_free(&MPI_ABCTUPLE);
 }
 #+end_src
 The next step is sending the tuples in the local root rank
 to the other ranks in the node, this we do with the MPI function
 =MPI_Scatterv=.
@@ -1963,7 +2009,8 @@ Therefore, the =displacements= are simply the vector
 and the =sendCounts= vector is simply the constant vector
 =tuplesPerRankLocal= of size =ranksPerNode=.
-#+begin_src c++ :tangle (atrip-tuples-h)
+TODO: Remove
 #+begin_src c++
 {
  std::vector<int> const
    sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
@@ -1983,6 +2030,7 @@ and the =sendCounts= vector is simply the constant vector
  MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
  MPI_Type_commit(&MPI_ABCTUPLE);
  LOG(1,"Atrip") << "scattering tuples \n";
  MPI_Scatterv(nodeTuples.data(),
              sendCounts.data(),
              displacements.data(),
@@ -2005,9 +2053,7 @@ inserting fake tuples where needed
 #+begin_src c++ :tangle (atrip-tuples-h)
-  result.insert(result.end(),
+  LOG(1,"Atrip") << "scattering tuples \n";
                tuplesPerRankGlobal - result.size(),
                FAKE_TUPLE);
  return result;
--- a/include/atrip/Tuples.hpp
+++ b/include/atrip/Tuples.hpp
@@ -233,6 +233,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 1-d containers\n";
  // DISTRIBUTE 1-d containers
  // every tuple which is only located at one node belongs to this node
  {
@@ -241,6 +243,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
    std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 2-d containers\n";
  // DISTRIBUTE 2-d containers
  //the tuples which are located at two nodes are half/half given to these nodes
  for (auto &m: container2d) {
@@ -270,6 +274,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 3-d containers\n";
  // DISTRIBUTE 3-d containers
  // similar game for the tuples which belong to three different nodes
  for (auto m: container3d){
@@ -304,6 +310,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
  }
  if (info.nodeId == 0)
    std::cout << "\tsorting...\n";
  // sort part of group-and-sort algorithm
  // every tuple on a given node is sorted in a way that
  // the 'home elements' are the fastest index.
@@ -324,29 +332,18 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      }
    }
  }
  if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
  //now we sort the list of tuples
  std::sort(nodeTuples.begin(), nodeTuples.end());
  if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
  // we bring the tuples abc back in the order a<b<c
  for (auto &t: nodeTuples)  std::sort(t.begin(), t.end());
  return nodeTuples;
 }
 //determine which element has to be fetched from sources for the next iteration
 std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
  std::vector<size_t> result;
  ABCTuple inter;
  std::sort(cur.begin(), cur.end());
  std::sort(suc.begin(), suc.end());
  std::array<size_t,3>::iterator rit, cit, sit;
  cit = std::unique(cur.begin(), cur.end());
  sit = std::unique(suc.begin(), suc.end());
  rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
  result.resize(rit - inter.begin());
  std::copy(inter.begin(), rit, result.begin());
  return result;
 }
 // Distribution:1 ends here
 // [[file:~/atrip/atrip.org::*Main][Main:1]]
@@ -371,11 +368,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
  // We want to construct a communicator which only contains of one
  // element per node
-  bool makeDistribution
+  bool const makeDistribution
-    = nodeInfos[rank].localRank == 0
+    = nodeInfos[rank].localRank == 0;
    ? true
    : false
    ;
  std::vector<ABCTuple>
    nodeTuples = makeDistribution
@@ -388,6 +382,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
               : std::vector<ABCTuple>()
               ;
  LOG(1,"Atrip") << "got nodeTuples\n";
  // now we have to send the data from **one** rank on each node
  // to all others ranks of this node
@@ -423,64 +418,60 @@ MPI_Bcast(&tuplesPerRankGlobal,
          MPI_UINT64_T,
          0,
          universe);
 LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
 LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
 LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
 // Main:2 ends here
 // [[file:~/atrip/atrip.org::*Main][Main:3]]
-size_t const totalTuplesLocal
+size_t const totalTuples
-  = tuplesPerRankLocal
+  = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
  * nodeInfos[rank].ranksPerNode;
-if (makeDistribution)
+if (makeDistribution) {
  nodeTuples.insert(nodeTuples.end(),
-                    totalTuplesLocal - nodeTuples.size(),
+                    totalTuples - nodeTuples.size(),
                    FAKE_TUPLE);
 }
 // Main:3 ends here
 // [[file:~/atrip/atrip.org::*Main][Main:4]]
 {
  std::vector<int> const
    sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
  std::vector<int>
    displacements(nodeInfos[rank].ranksPerNode);
  std::iota(displacements.begin(),
            displacements.end(),
            tuplesPerRankLocal);
  // important!
  result.resize(tuplesPerRankLocal);
  // construct mpi type for abctuple
  MPI_Datatype MPI_ABCTUPLE;
  MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
  MPI_Type_commit(&MPI_ABCTUPLE);
-  MPI_Scatterv(nodeTuples.data(),
+  LOG(1,"Atrip") << "scattering tuples \n";
-              sendCounts.data(),
+
-              displacements.data(),
+  result.resize(tuplesPerRankGlobal);
  MPI_Scatter(nodeTuples.data(),
              tuplesPerRankGlobal,
              MPI_ABCTUPLE,
              result.data(),
-              tuplesPerRankLocal,
+              tuplesPerRankGlobal,
              MPI_ABCTUPLE,
              0,
              INTRA_COMM);
  // free type
  MPI_Type_free(&MPI_ABCTUPLE);
 }
 // Main:4 ends here
-// [[file:~/atrip/atrip.org::*Main][Main:5]]
+// [[file:~/atrip/atrip.org::*Main][Main:6]]
-result.insert(result.end(),
+/*
  result.insert(result.end(),
                tuplesPerRankGlobal - result.size(),
                FAKE_TUPLE);
 */
  LOG(1,"Atrip") << "scattering tuples \n";
  return result;
 }
-// Main:5 ends here
+// Main:6 ends here
 // [[file:~/atrip/atrip.org::*Interface][Interface:1]]
 struct Distribution : public TuplesDistribution {