Add group-and-sort with MPI_Scatter and not MPI_Scatterv

2021-11-03 19:46:38 +01:00
parent 12f8c6294e
commit a5619146f0
2 changed files with 99 additions and 62 deletions
--- a/atrip.org
+++ b/atrip.org
@@ -1694,6 +1694,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
  }

+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 1-d containers\n";
  // DISTRIBUTE 1-d containers
  // every tuple which is only located at one node belongs to this node
  {
@@ -1702,6 +1704,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
    std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
  }

+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 2-d containers\n";
  // DISTRIBUTE 2-d containers
  //the tuples which are located at two nodes are half/half given to these nodes
  for (auto &m: container2d) {
@@ -1731,6 +1735,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {

  }

+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 3-d containers\n";
  // DISTRIBUTE 3-d containers
  // similar game for the tuples which belong to three different nodes
  for (auto m: container3d){
@@ -1765,6 +1771,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
  }


+  if (info.nodeId == 0)
+    std::cout << "\tsorting...\n";
  // sort part of group-and-sort algorithm
  // every tuple on a given node is sorted in a way that
  // the 'home elements' are the fastest index.
@@ -1785,15 +1793,23 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      }
    }
  }
+
+  if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
  //now we sort the list of tuples
  std::sort(nodeTuples.begin(), nodeTuples.end());
+
+  if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
  // we bring the tuples abc back in the order a<b<c
  for (auto &t: nodeTuples)  std::sort(t.begin(), t.end());

  return nodeTuples;

 }
+#+end_src

+
+TODO: ERASE THIS CODE BLOCK
+#+begin_src c++
 //determine which element has to be fetched from sources for the next iteration
 std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
  std::vector<size_t> result;
@@ -1821,10 +1837,10 @@ Then we have the following

 #+begin_example
 Global rank | 0 1 2 3 4 5 6 7 8
+key         | global rank
 nodeId      | 0 1 0 1 1 0 2 2 2
 Local rank  | 0 0 1 1 2 2 0 1 2
 intra color | 0 1 0 1 1 0 2 2 2
-key         | global rank
 #+end_example


@@ -1853,11 +1869,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {

  // We want to construct a communicator which only contains of one
  // element per node
-  bool makeDistribution
-    = nodeInfos[rank].localRank == 0
-    ? true
-    : false
-    ;
+  bool const makeDistribution
+    = nodeInfos[rank].localRank == 0;

  std::vector<ABCTuple>
    nodeTuples = makeDistribution
@@ -1870,6 +1883,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
               : std::vector<ABCTuple>()
               ;

+  LOG(1,"Atrip") << "got nodeTuples\n";

  // now we have to send the data from **one** rank on each node
  // to all others ranks of this node
@@ -1884,7 +1898,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
 #+end_src

 Every node has to distribute **at least**
-nodeTuples.size() / nodeInfos[rank].ranksPerNode
+=nodeTuples.size() / nodeInfos[rank].ranksPerNode=
 tuples among the ranks.

 We have to communicate this quantity among all nodes.
@@ -1912,6 +1926,10 @@ We have to communicate this quantity among all nodes.
            MPI_UINT64_T,
            0,
            universe);
+
+  LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
+  LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
+  LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
 #+end_src

 Now we have the tuples that every rank has to have, i.e.,
@@ -1931,16 +1949,44 @@ resizing that the condition above is met, i.e., so we can resize
 and add some fake tuples at the end as padding.

 #+begin_src c++ :tangle (atrip-tuples-h)
-size_t const totalTuplesLocal
-  = tuplesPerRankLocal
-  ,* nodeInfos[rank].ranksPerNode;
+size_t const totalTuples
+  = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;

-if (makeDistribution)
+if (makeDistribution) {
  nodeTuples.insert(nodeTuples.end(),
-                    totalTuplesLocal - nodeTuples.size(),
+                    totalTuples - nodeTuples.size(),
                    FAKE_TUPLE);
+}
 #+end_src

+And now we can simply scatter the tuples in nodeTuples and send
+=tuplesPerRankGlobal= to the different ranks in the node,
+
+#+begin_src c++ :tangle (atrip-tuples-h)
+{
+  // construct mpi type for abctuple
+  MPI_Datatype MPI_ABCTUPLE;
+  MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
+  MPI_Type_commit(&MPI_ABCTUPLE);
+
+  LOG(1,"Atrip") << "scattering tuples \n";
+
+  result.resize(tuplesPerRankGlobal);
+  MPI_Scatter(nodeTuples.data(),
+              tuplesPerRankGlobal,
+              MPI_ABCTUPLE,
+              result.data(),
+              tuplesPerRankGlobal,
+              MPI_ABCTUPLE,
+              0,
+              INTRA_COMM);
+
+  MPI_Type_free(&MPI_ABCTUPLE);
+
+}
+#+end_src
+
+
 The next step is sending the tuples in the local root rank
 to the other ranks in the node, this we do with the MPI function
 =MPI_Scatterv=.
@@ -1963,7 +2009,8 @@ Therefore, the =displacements= are simply the vector
 and the =sendCounts= vector is simply the constant vector
 =tuplesPerRankLocal= of size =ranksPerNode=.

-#+begin_src c++ :tangle (atrip-tuples-h)
+TODO: Remove
+#+begin_src c++
 {
  std::vector<int> const
    sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
@@ -1983,6 +2030,7 @@ and the =sendCounts= vector is simply the constant vector
  MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
  MPI_Type_commit(&MPI_ABCTUPLE);

+  LOG(1,"Atrip") << "scattering tuples \n";
  MPI_Scatterv(nodeTuples.data(),
              sendCounts.data(),
              displacements.data(),
@@ -2005,9 +2053,7 @@ inserting fake tuples where needed

 #+begin_src c++ :tangle (atrip-tuples-h)

-  result.insert(result.end(),
-                tuplesPerRankGlobal - result.size(),
-                FAKE_TUPLE);
+  LOG(1,"Atrip") << "scattering tuples \n";

  return result;

--- a/include/atrip/Tuples.hpp
+++ b/include/atrip/Tuples.hpp
@@ -233,6 +233,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
  }

+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 1-d containers\n";
  // DISTRIBUTE 1-d containers
  // every tuple which is only located at one node belongs to this node
  {
@@ -241,6 +243,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
    std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
  }

+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 2-d containers\n";
  // DISTRIBUTE 2-d containers
  //the tuples which are located at two nodes are half/half given to these nodes
  for (auto &m: container2d) {
@@ -270,6 +274,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {

  }

+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 3-d containers\n";
  // DISTRIBUTE 3-d containers
  // similar game for the tuples which belong to three different nodes
  for (auto m: container3d){
@@ -304,6 +310,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
  }


+  if (info.nodeId == 0)
+    std::cout << "\tsorting...\n";
  // sort part of group-and-sort algorithm
  // every tuple on a given node is sorted in a way that
  // the 'home elements' are the fastest index.
@@ -324,29 +332,18 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
      }
    }
  }
+
+  if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
  //now we sort the list of tuples
  std::sort(nodeTuples.begin(), nodeTuples.end());
+
+  if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
  // we bring the tuples abc back in the order a<b<c
  for (auto &t: nodeTuples)  std::sort(t.begin(), t.end());

  return nodeTuples;

 }
-
-//determine which element has to be fetched from sources for the next iteration
-std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
-  std::vector<size_t> result;
-  ABCTuple inter;
-  std::sort(cur.begin(), cur.end());
-  std::sort(suc.begin(), suc.end());
-  std::array<size_t,3>::iterator rit, cit, sit;
-  cit = std::unique(cur.begin(), cur.end());
-  sit = std::unique(suc.begin(), suc.end());
-  rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
-  result.resize(rit - inter.begin());
-  std::copy(inter.begin(), rit, result.begin());
-  return result;
-}
 // Distribution:1 ends here

 // [[file:~/atrip/atrip.org::*Main][Main:1]]
@@ -371,11 +368,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {

  // We want to construct a communicator which only contains of one
  // element per node
-  bool makeDistribution
-    = nodeInfos[rank].localRank == 0
-    ? true
-    : false
-    ;
+  bool const makeDistribution
+    = nodeInfos[rank].localRank == 0;

  std::vector<ABCTuple>
    nodeTuples = makeDistribution
@@ -388,6 +382,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
               : std::vector<ABCTuple>()
               ;

+  LOG(1,"Atrip") << "got nodeTuples\n";

  // now we have to send the data from **one** rank on each node
  // to all others ranks of this node
@@ -423,64 +418,60 @@ MPI_Bcast(&tuplesPerRankGlobal,
          MPI_UINT64_T,
          0,
          universe);
+
+LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
+LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
+LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
 // Main:2 ends here

 // [[file:~/atrip/atrip.org::*Main][Main:3]]
-size_t const totalTuplesLocal
-  = tuplesPerRankLocal
-  * nodeInfos[rank].ranksPerNode;
+size_t const totalTuples
+  = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;

-if (makeDistribution)
+if (makeDistribution) {
  nodeTuples.insert(nodeTuples.end(),
-                    totalTuplesLocal - nodeTuples.size(),
+                    totalTuples - nodeTuples.size(),
                    FAKE_TUPLE);
+}
 // Main:3 ends here

 // [[file:~/atrip/atrip.org::*Main][Main:4]]
 {
-  std::vector<int> const
-    sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
-
-  std::vector<int>
-    displacements(nodeInfos[rank].ranksPerNode);
-
-  std::iota(displacements.begin(),
-            displacements.end(),
-            tuplesPerRankLocal);
-
-  // important!
-  result.resize(tuplesPerRankLocal);
-
  // construct mpi type for abctuple
  MPI_Datatype MPI_ABCTUPLE;
  MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
  MPI_Type_commit(&MPI_ABCTUPLE);

-  MPI_Scatterv(nodeTuples.data(),
-              sendCounts.data(),
-              displacements.data(),
+  LOG(1,"Atrip") << "scattering tuples \n";
+
+  result.resize(tuplesPerRankGlobal);
+  MPI_Scatter(nodeTuples.data(),
+              tuplesPerRankGlobal,
              MPI_ABCTUPLE,
              result.data(),
-              tuplesPerRankLocal,
+              tuplesPerRankGlobal,
              MPI_ABCTUPLE,
              0,
              INTRA_COMM);

-  // free type
  MPI_Type_free(&MPI_ABCTUPLE);

 }
 // Main:4 ends here

-// [[file:~/atrip/atrip.org::*Main][Main:5]]
+// [[file:~/atrip/atrip.org::*Main][Main:6]]
+/*
  result.insert(result.end(),
                tuplesPerRankGlobal - result.size(),
                FAKE_TUPLE);
+*/
+
+  LOG(1,"Atrip") << "scattering tuples \n";

  return result;

 }
-// Main:5 ends here
+// Main:6 ends here

 // [[file:~/atrip/atrip.org::*Interface][Interface:1]]
 struct Distribution : public TuplesDistribution {