Update sources

2021-11-08 17:33:13 +01:00 · 2021-11-08 17:33:13 +01:00 · 1d6d14c398
commit 1d6d14c398
parent 7e5feccca9
6 changed files with 273 additions and 123 deletions
--- a/include/atrip/Atrip.hpp
+++ b/include/atrip/Atrip.hpp
@ -22,6 +22,8 @@ namespace atrip {
    static int rank;
    static int np;
    static Timings chrono;
    static size_t networkSend;
    static size_t localSend;
    static void init();
    struct Input {
@ -46,6 +48,7 @@ namespace atrip {
        GROUP_AND_SORT,
      };
      ADD_ATTRIBUTE(bool, rankRoundRobin, false)
      ADD_ATTRIBUTE(bool, chrono, false)
      ADD_ATTRIBUTE(bool, barrier, false)
      ADD_ATTRIBUTE(int, maxIterations, 0)
--- a/include/atrip/RankMap.hpp
+++ b/include/atrip/RankMap.hpp
@ -5,22 +5,36 @@
 #include <algorithm>
 #include <atrip/Slice.hpp>
 #include <atrip/Tuples.hpp>
 namespace atrip {
  struct RankMap {
    static bool RANK_ROUND_ROBIN;
    std::vector<size_t> const lengths;
    size_t const np, size;
    ClusterInfo const clusterInfo;
-    RankMap(std::vector<size_t> lens, size_t np_)
+    RankMap(std::vector<size_t> lens, size_t np_, MPI_Comm comm)
      : lengths(lens)
      , np(np_)
      , size(std::accumulate(lengths.begin(), lengths.end(),
                            1UL, std::multiplies<size_t>()))
      , clusterInfo(getClusterInfo(comm))
    { assert(lengths.size() <= 2); }
    size_t find(Slice::Location const& p) const noexcept {
-      return p.source * np + p.rank;
+      if (RANK_ROUND_ROBIN) {
        return p.source * np + p.rank;
      } else {
        const size_t
          rankPosition = p.source * clusterInfo.ranksPerNode
                       + clusterInfo.rankInfos[p.rank].localRank
                       ;
        return rankPosition * clusterInfo.nNodes
             + clusterInfo.rankInfos[p.rank].nodeId
             ;
      }
    }
    size_t nSources() const noexcept {
@ -40,8 +54,9 @@ namespace atrip {
    }
    Slice::Location
-    find(ABCTuple const& abc, Slice::Type sliceType) const noexcept {
+    find(ABCTuple const& abc, Slice::Type sliceType) const {
      // tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
      // tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A
      const auto tuple = Slice::subtupleBySlice(abc, sliceType);
      const size_t index
@ -49,9 +64,51 @@ namespace atrip {
        + tuple[1] * (lengths.size() > 1 ? lengths[0] : 0)
        ;
      size_t rank, source;
      if (RANK_ROUND_ROBIN) {
        rank = index % np;
        source = index / np;
      } else {
        size_t const
          // the node that will be assigned to
            nodeId = index % clusterInfo.nNodes
          // how many times it has been assigned to the node
          , s_n = index / clusterInfo.nNodes
          // which local rank in the node should be
          , localRank = s_n % clusterInfo.ranksPerNode
          // and the local source (how many times we chose this local rank)
          , localSource = s_n / clusterInfo.ranksPerNode
          ;
        // find the localRank-th entry in clusterInfo
        auto const& it =
          std::find_if(clusterInfo.rankInfos.begin(),
                       clusterInfo.rankInfos.end(),
                       [nodeId, localRank](RankInfo const& ri) {
                         return ri.nodeId == nodeId
                             && ri.localRank == localRank
                             ;
                       });
        if (it == clusterInfo.rankInfos.end()) {
          throw "FATAL! Error in node distribution of the slices";
        }
        rank = (*it).globalRank;
        source = localSource;
      }
      return
-        { index % np
+        { rank
-        , index / np
+        , source
        };
    }
--- a/include/atrip/SliceUnion.hpp
+++ b/include/atrip/SliceUnion.hpp
@ -178,8 +178,14 @@ namespace atrip {
          if (blank.info.state == Slice::SelfSufficient) {
            blank.data = sources[from.source].data();
          } else {
-            if (freePointers.size() == 0)
+            if (freePointers.size() == 0) {
-              throw std::domain_error("No more free pointers!");
+              std::stringstream stream;
              stream << "No more free pointers "
                     << "for type " << type
                     << " and name " << name
                      ;
              throw std::domain_error(stream.str());
            }
            auto dataPointer = freePointers.begin();
            freePointers.erase(dataPointer);
            blank.data = *dataPointer;
@ -332,7 +338,7 @@ namespace atrip {
              , Slice::Name name_
              , size_t nSliceBuffers = 4
              )
-              : rankMap(paramLength, np)
+              : rankMap(paramLength, np, global_world)
              , world(child_world)
              , universe(global_world)
              , sliceLength(sliceLength_)
@ -419,16 +425,27 @@ namespace atrip {
     * \brief Send asynchronously only if the state is Fetch
     */
    void send( size_t otherRank
-             , Slice::Info const& info
+             , Slice::LocalDatabaseElement const& el
             , size_t tag) const noexcept {
      MPI_Request request;
      bool sendData_p = false;
      auto const& info = el.info;
      if (info.state == Slice::Fetch) sendData_p = true;
      // TODO: remove this because I have SelfSufficient
      if (otherRank == info.from.rank)      sendData_p = false;
      if (!sendData_p) return;
      switch (el.name) {
        case Slice::Name::TA:
        case Slice::Name::VIJKA:
          if (otherRank / 48 == Atrip::rank / 48) {
            Atrip::localSend++;
          } else {
            Atrip::networkSend++;
          }
      }
      MPI_Isend( sources[info.from.source].data()
               , sources[info.from.source].size()
               , MPI_DOUBLE /* TODO: adapt this with traits */
--- a/include/atrip/Tuples.hpp
+++ b/include/atrip/Tuples.hpp
@ -85,26 +85,30 @@ struct RankInfo {
  const size_t ranksPerNode;
 };
 template <typename A>
 std::vector<A> unique(std::vector<A> const &xs) {
  auto result = xs;
  std::sort(result.begin(), result.end());
  auto const& last = std::unique(result.begin(), result.end());
  result.erase(last, result.end());
  return result;
 }
 std::vector<RankInfo>
 getNodeInfos(std::vector<string> const& nodeNames) {
  std::vector<RankInfo> result;
-  auto uniqueNames = nodeNames;
+  auto const uniqueNames = unique(nodeNames);
-  {
+  auto const index = [&uniqueNames](std::string const& s) {
    std::sort(uniqueNames.begin(), uniqueNames.end());
    auto const& last = std::unique(uniqueNames.begin(), uniqueNames.end());
    uniqueNames.erase(last, uniqueNames.end());
  }
  const auto index = [&uniqueNames](std::string const& s) {
    auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s);
    return std::distance(uniqueNames.begin(), it);
  };
  std::vector<size_t> localRanks(uniqueNames.size(), 0);
-  size_t rank = 0;
+  size_t globalRank = 0;
  for (auto const& name: nodeNames) {
    const size_t nodeId = index(name);
    result.push_back({name,
                      nodeId,
-                      rank++,
+                      globalRank++,
                      localRanks[nodeId]++,
                      std::count(nodeNames.begin(),
                                 nodeNames.end(),
@ -113,6 +117,25 @@ getNodeInfos(std::vector<string> const& nodeNames) {
  }
  return result;
 }
 struct ClusterInfo {
  const size_t nNodes, np, ranksPerNode;
  const std::vector<RankInfo> rankInfos;
 };
 ClusterInfo
 getClusterInfo(MPI_Comm comm) {
  auto const names = getNodeNames(comm);
  auto const rankInfos = getNodeInfos(names);
  return ClusterInfo {
    unique(names).size(),
    names.size(),
    rankInfos[0].ranksPerNode,
    rankInfos
  };
 }
 // Node information:2 ends here
 // [[file:~/atrip/atrip.org::*Naive%20list][Naive list:1]]
@ -189,8 +212,6 @@ size_t isOnNode(size_t tuple, size_t nodes) { return tuple % nodes; }
 struct Info {
  size_t nNodes;
  size_t Nv;
  size_t np;
  size_t nodeId;
 };
@ -212,28 +233,33 @@ std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
 // Utils:1 ends here
 // [[file:~/atrip/atrip.org::*Distribution][Distribution:1]]
-std::vector<ABCTuple>
+ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
 specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
-  std::vector<ABCTuple> nodeTuples;
+  ABCTuples nodeTuples;
-  size_t nNodes(info.nNodes);
+  size_t const nNodes(info.nNodes);
  size_t np(info.np);
  size_t N(allTuples.size());
-  //      nodeid          tuple list
+  std::map< size_t /* nodeId */, ABCTuples >
-  std::map<size_t, std::vector<ABCTuple> > container1d;
+    container1d, container2d, container3d;
  std::map<size_t, std::vector<ABCTuple> > container2d;
  std::map<size_t, std::vector<ABCTuple> > container3d;
  // build container-n-d's
-  for (auto t: allTuples) {
+  for (auto const& t: allTuples) {
    // one which node(s) are the tuple elements located...
    // put them into the right container
-    auto nt = getTupleNodes(t, nNodes);
+    auto const _nodes = getTupleNodes(t, nNodes);
-    if ( nt.size() == 1) container1d[nt[0]].push_back(t);
+    switch (_nodes.size()) {
-    if ( nt.size() == 2) container2d[nt[0] + nNodes*nt[1]].push_back(t);
+      case 1:
-    if ( nt.size() == 3)
+        container1d[_nodes[0]].push_back(t);
-      container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
+      case 2:
        container2d[ _nodes[0]
                   + nNodes * _nodes[1]
                   ].push_back(t);
      case 3:
        container3d[ _nodes[0]
                   + nNodes * _nodes[1]
                   + nNodes * nNodes * _nodes[2]
                   ].push_back(t);
    }
  }
  if (info.nodeId == 0)
@ -241,97 +267,114 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
  // DISTRIBUTE 1-d containers
  // every tuple which is only located at one node belongs to this node
  {
-    auto const& tuplesVec = container1d[info.nodeId];
+    auto const& _tuplesVec = container1d[info.nodeId];
-    nodeTuples.resize(tuplesVec.size());
+    nodeTuples.resize(_tuplesVec.size());
-    std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
+    std::copy(_tuplesVec.begin(), _tuplesVec.end(), nodeTuples.begin());
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 2-d containers\n";
  // DISTRIBUTE 2-d containers
  //the tuples which are located at two nodes are half/half given to these nodes
-  for (auto &m: container2d) {
+  for (auto const& m: container2d) {
    size_t idx = m.first%nNodes;
    size_t idy = m.first/nNodes;
    size_t myNode = idx;
-    // either idx or idy is my node
+    auto const& _tuplesVec = m.second;
-    if (idx != info.nodeId && idy != info.nodeId) continue;
+      const
-    if (idy == info.nodeId) myNode = idy;
+    size_t idx = m.first % nNodes
         // remeber: m.first = idy * nNodes + idx
         , idy = m.first / nNodes
         , n_half = _tuplesVec.size() / 2
         , size = nodeTuples.size()
         ;
-    auto tuplesVec = m.second;
+    size_t nextra, nbegin, nend;
-    auto n = tuplesVec.size() / 2;
+    if (info.nodeId == idx) {
-    auto size = nodeTuples.size();
+      nextra = n_half;
-    if (myNode == idx) {
+      nbegin = 0 * n_half;
-      nodeTuples.resize(size + n);
+      nend   = n_half;
-      std::copy(tuplesVec.begin(),
+    } else if (info.nodeId == idy) {
-                tuplesVec.begin() + n,
+      nextra = _tuplesVec.size() - n_half;
-                nodeTuples.begin() + size);
+      nbegin = 1 * n_half;
      nend   = _tuplesVec.size();
    } else {
-      auto ny = tuplesVec.size() - n;
+      // either idx or idy is my node
-      nodeTuples.resize(size + ny);
+      continue;
      std::copy(tuplesVec.begin() + n,
                tuplesVec.end(),
                nodeTuples.begin() + size);
    }
    nodeTuples.resize(size + nextra);
    std::copy(_tuplesVec.begin() + nbegin,
              _tuplesVec.begin() + nend,
              nodeTuples.begin() + size);
  }
  if (info.nodeId == 0)
    std::cout << "\tBuilding 3-d containers\n";
  // DISTRIBUTE 3-d containers
-  // similar game for the tuples which belong to three different nodes
+  for (auto const& m: container3d){
-  for (auto m: container3d){
+    auto const& _tuplesVec = m.second;
    auto tuplesVec = m.second;
    auto idx = m.first%nNodes;
    auto idy = (m.first/nNodes)%nNodes;
    auto idz = m.first/nNodes/nNodes;
    if (idx != info.nodeId && idy != info.nodeId && idz != info.nodeId) continue;
-    size_t nx = tuplesVec.size() / 3;
+      const
-    size_t n, nbegin, nend;
+    size_t idx = m.first % nNodes
         , idy = (m.first / nNodes) % nNodes
         // remember: m.first = idx + idy * nNodes + idz * nNodes^2
         , idz = m.first / nNodes / nNodes
         , n_third = _tuplesVec.size() / 3
         , size = nodeTuples.size()
         ;
    size_t nextra, nbegin, nend;
    if (info.nodeId == idx) {
-      n = nx;
+      nextra = n_third;
-      nbegin = 0;
+      nbegin = 0 * n_third;
-      nend = n;
+      nend   = nextra;
    } else if (info.nodeId == idy) {
-      n = nx;
+      nextra = n_third;
-      nbegin = n;
+      nbegin = 1 * n_third;
-      nend = n + n;
+      nend   = 2 * nextra;
    } else if (info.nodeId == idz) {
      nextra = _tuplesVec.size() - 2 * n_third;
      nbegin = 2 * n_third;
      nend   = _tuplesVec.size();
    } else {
-      n = tuplesVec.size() - 2 * nx;
+      // either idx or idy or idz is my node
-      nbegin = 2 * nx;
+      continue;
      nend = 2 * nx + n;
    }
-    auto size = nodeTuples.size();
+    nodeTuples.resize(size + nextra);
-    nodeTuples.resize(size + n);
+    std::copy(_tuplesVec.begin() + nbegin,
-    std::copy(tuplesVec.begin() + nbegin,
+              _tuplesVec.begin() + nend,
              tuplesVec.begin() + nend,
              nodeTuples.begin() + size);
  }
-  if (info.nodeId == 0)
+  if (info.nodeId == 0) std::cout << "\tswapping tuples...\n";
-    std::cout << "\tsorting...\n";
+  /*
-  // sort part of group-and-sort algorithm
+   *  sort part of group-and-sort algorithm
-  // every tuple on a given node is sorted in a way that
+   *  every tuple on a given node is sorted in a way that
-  // the 'home elements' are the fastest index.
+   *  the 'home elements' are the fastest index.
-  // 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn
+   *  1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn
-  size_t n = info.nodeId;
+   */
  for (auto &nt: nodeTuples){
-    if ( isOnNode(nt[0], nNodes) == n ){ // 1234
+    if ( isOnNode(nt[0], nNodes) == info.nodeId ){ // 1234
-      if ( isOnNode(nt[2], nNodes) != n ){ // 24
+      if ( isOnNode(nt[2], nNodes) != info.nodeId ){ // 24
-        size_t x(nt[0]); nt[0] = nt[2]; nt[2] = x; // switch first and last
+        size_t const x(nt[0]);
        nt[0] = nt[2];         // switch first and last
        nt[2] = x;
      }
-      else if ( isOnNode(nt[1], nNodes) != n){ // 3
+      else if ( isOnNode(nt[1], nNodes) != info.nodeId){ // 3
-        size_t x(nt[0]); nt[0] = nt[1]; nt[1] = x; // switch first two
+        size_t const x(nt[0]);
        nt[0] = nt[1];         // switch first two
        nt[1] = x;
      }
    } else {
-      if ( isOnNode(nt[1], nNodes) == n   // 56
+      if ( isOnNode(nt[1], nNodes) == info.nodeId   // 56
-        && isOnNode(nt[2], nNodes) != n){ // 6
+        && isOnNode(nt[2], nNodes) != info.nodeId
-        size_t x(nt[1]); nt[1] = nt[2]; nt[2] = x; // switch last two
+        ) { // 6
        size_t const x(nt[1]);
        nt[1] = nt[2];         // switch last two
        nt[2] = x;
      }
    }
  }
@ -358,32 +401,22 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
  std::vector<ABCTuple> result;
-  const auto nodeNames(getNodeNames(universe));
+  auto const nodeNames(getNodeNames(universe));
-  auto nodeNamesUnique(nodeNames);
+  size_t const nNodes = unique(nodeNames).size();
  {
    const auto& last = std::unique(nodeNamesUnique.begin(),
                                   nodeNamesUnique.end());
    nodeNamesUnique.erase(last, nodeNamesUnique.end());
  }
  // we pick one rank from every node
  auto const nodeInfos = getNodeInfos(nodeNames);
  size_t const nNodes = nodeNamesUnique.size();
  // We want to construct a communicator which only contains of one
  // element per node
-  bool const makeDistribution
+  bool const computeDistribution
    = nodeInfos[rank].localRank == 0;
  std::vector<ABCTuple>
-    nodeTuples = makeDistribution
+    nodeTuples
-               ? specialDistribution(Info{ nNodes
+      = computeDistribution
-                                         , Nv
+      ? specialDistribution(Info{nNodes, nodeInfos[rank].nodeId},
-                                         , np
+                            getAllTuplesList(Nv))
-                                         , nodeInfos[rank].nodeId
+      : std::vector<ABCTuple>()
-                                         },
+      ;
                                      getAllTuplesList(Nv))
               : std::vector<ABCTuple>()
               ;
  LOG(1,"Atrip") << "got nodeTuples\n";
@ -400,7 +433,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
 // Main:1 ends here
 // [[file:~/atrip/atrip.org::*Main][Main:2]]
-const size_t
+size_t const
  tuplesPerRankLocal
     = nodeTuples.size() / nodeInfos[rank].ranksPerNode
     + size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
@ -431,7 +464,8 @@ LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
 size_t const totalTuples
  = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
-if (makeDistribution) {
+if (computeDistribution) {
  // pad with FAKE_TUPLEs
  nodeTuples.insert(nodeTuples.end(),
                    totalTuples - nodeTuples.size(),
                    FAKE_TUPLE);
@ -462,13 +496,13 @@ if (makeDistribution) {
 }
 // Main:4 ends here
-// [[file:~/atrip/atrip.org::*Main][Main:6]]
+// [[file:~/atrip/atrip.org::*Main][Main:5]]
 LOG(1,"Atrip") << "scattering tuples \n";
  return result;
 }
-// Main:6 ends here
+// Main:5 ends here
 // [[file:~/atrip/atrip.org::*Interface][Interface:1]]
 struct Distribution : public TuplesDistribution {
--- a/include/atrip/Unions.hpp
+++ b/include/atrip/Unions.hpp
@ -57,7 +57,7 @@ namespace atrip {
                       , child_world
                       , global_world
                       , Slice::TA
-                       , 4) {
+                       , 5) {
           init(sourceTensor);
         }
@ -94,7 +94,7 @@ namespace atrip {
                      , child_world
                      , global_world
                      , Slice::VIJKA
-                      , 4) {
+                      , 5) {
           init(sourceTensor);
         }
--- a/src/atrip/Atrip.cxx
+++ b/src/atrip/Atrip.cxx
@ -9,13 +9,18 @@
 using namespace atrip;
 bool RankMap::RANK_ROUND_ROBIN;
 int Atrip::rank;
 int Atrip::np;
 Timings Atrip::chrono;
 size_t Atrip::networkSend;
 size_t Atrip::localSend;
 void Atrip::init()  {
  MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
  MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
  Atrip::networkSend = 0;
  Atrip::localSend = 0;
 }
 Atrip::Output Atrip::run(Atrip::Input const& in) {
@ -43,6 +48,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
  in.ea->read_all(epsa.data());
  in.Tph->read_all(Tai.data());
  RankMap::RANK_ROUND_ROBIN = in.rankRoundRobin;
  if (RankMap::RANK_ROUND_ROBIN) {
    LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
  } else {
    LOG(0,"Atrip")
      << "Doing node > local rank round robin slices distribution" << "\n";
  }
  // COMMUNICATOR CONSTRUCTION ========================================={{{1
  //
  // Construct a new communicator living only on a single rank
@ -224,7 +238,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
          ;
        WITH_CHRONO("db:io:send",
-          u.send(otherRank, el.info, sendTag);
+          u.send(otherRank, el, sendTag);
        )
      } // send phase
@ -268,6 +282,25 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
    ))
    if (iteration % in.iterationMod == 0) {
      size_t networkSend;
      MPI_Reduce(&Atrip::networkSend,
                 &networkSend,
                 1,
                 MPI_UINT64_T,
                 MPI_SUM,
                 0,
                 universe);
      size_t localSend;
      MPI_Reduce(&Atrip::localSend,
                 &localSend,
                 1,
                 MPI_UINT64_T,
                 MPI_SUM,
                 0,
                 universe);
      LOG(0,"Atrip")
        << "iteration " << iteration
        << " [" << 100 * iteration / nIterations << "%]"
@ -275,7 +308,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
        << "GF)"
        << " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
        << "GF)"
-        << " ===========================\n";
+        << " :net " << networkSend
        << " :loc " << localSend
        << " :loc/net " << (double(localSend) / double(networkSend))
        //<< " ===========================\n"
        << "\n";
      // PRINT TIMINGS
      if (in.chrono)
@ -415,6 +453,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
    }
    // TODO: remove this
    if (isFakeTuple(i)) {
      // fake iterations should also unwrap whatever they got
      WITH_RANK << iteration