diff --git a/atrip.org b/atrip.org index 1c07750..6b4f2c8 100644 --- a/atrip.org +++ b/atrip.org @@ -786,22 +786,36 @@ rank. #include #include +#include namespace atrip { struct RankMap { + static bool RANK_ROUND_ROBIN; std::vector const lengths; size_t const np, size; + ClusterInfo const clusterInfo; - RankMap(std::vector lens, size_t np_) + RankMap(std::vector lens, size_t np_, MPI_Comm comm) : lengths(lens) , np(np_) , size(std::accumulate(lengths.begin(), lengths.end(), 1UL, std::multiplies())) + , clusterInfo(getClusterInfo(comm)) { assert(lengths.size() <= 2); } size_t find(Slice::Location const& p) const noexcept { - return p.source * np + p.rank; + if (RANK_ROUND_ROBIN) { + return p.source * np + p.rank; + } else { + const size_t + rankPosition = p.source * clusterInfo.ranksPerNode + + clusterInfo.rankInfos[p.rank].localRank + ; + return rankPosition * clusterInfo.nNodes + + clusterInfo.rankInfos[p.rank].nodeId + ; + } } size_t nSources() const noexcept { @@ -821,8 +835,9 @@ namespace atrip { } Slice::Location - find(ABCTuple const& abc, Slice::Type sliceType) const noexcept { + find(ABCTuple const& abc, Slice::Type sliceType) const { // tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB + // tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A const auto tuple = Slice::subtupleBySlice(abc, sliceType); const size_t index @@ -830,9 +845,50 @@ namespace atrip { + tuple[1] * (lengths.size() > 1 ? lengths[0] : 0) ; + size_t rank, source; + + if (RANK_ROUND_ROBIN) { + + rank = index % np; + source = index / np; + + } else { + + size_t const + + // the node that will be assigned to + nodeId = index % clusterInfo.nNodes + + // how many times it has been assigned to the node + , s_n = index / clusterInfo.nNodes + + // which local rank in the node should be + , localRank = s_n % clusterInfo.ranksPerNode + + // and the local source (how many times we chose this local rank) + , localSource = s_n / clusterInfo.ranksPerNode + ; + + source = localSource; + // find the localRank-th entry in clusterInfo + auto const& it = + std::find_if(clusterInfo.rankInfos.begin(), + clusterInfo.rankInfos.end(), + [nodeId, localRank](RankInfo const& ri) { + return ri.nodeId == nodeId + && ri.localRank == localRank + ; + }); + if (it == clusterInfo.rankInfos.end()) { + throw "FATAL! Error in node distribution of the slices"; + } + rank = std::distance(clusterInfo.rankInfos.begin(), it); + + } + return - { index % np - , index / np + { rank + , source }; } @@ -1022,8 +1078,14 @@ namespace atrip { if (blank.info.state == Slice::SelfSufficient) { blank.data = sources[from.source].data(); } else { - if (freePointers.size() == 0) - throw std::domain_error("No more free pointers!"); + if (freePointers.size() == 0) { + std::stringstream stream; + stream << "No more free pointers " + << "for type " << type + << " and name " << name + ; + throw std::domain_error(stream.str()); + } auto dataPointer = freePointers.begin(); freePointers.erase(dataPointer); blank.data = *dataPointer; @@ -1176,7 +1238,7 @@ namespace atrip { , Slice::Name name_ , size_t nSliceBuffers = 4 ) - : rankMap(paramLength, np) + : rankMap(paramLength, np, global_world) , world(child_world) , universe(global_world) , sliceLength(sliceLength_) @@ -1513,9 +1575,19 @@ struct RankInfo { const size_t ranksPerNode; }; +template +std::vector unique(std::vector const &xs) { + auto result = xs; + std::sort(result.begin(), result.end()); + auto const& last = std::unique(result.begin(), result.end()); + result.erase(last, result.end()); + return result; +} + std::vector getNodeInfos(std::vector const& nodeNames) { std::vector result; + // TODO: replace it with unique call auto uniqueNames = nodeNames; { std::sort(uniqueNames.begin(), uniqueNames.end()); @@ -1541,6 +1613,25 @@ getNodeInfos(std::vector const& nodeNames) { } return result; } + +struct ClusterInfo { + const size_t nNodes, np, ranksPerNode; + const std::vector rankInfos; +}; + +ClusterInfo +getClusterInfo(MPI_Comm comm) { + auto const names = getNodeNames(comm); + auto const rankInfos = getNodeInfos(names); + + return ClusterInfo { + unique(names).size(), + names.size(), + rankInfos[0].ranksPerNode, + rankInfos + }; + +} #+end_src *** Naive list @@ -2740,6 +2831,7 @@ namespace atrip { GROUP_AND_SORT, }; + ADD_ATTRIBUTE(bool, rankRoundRobin, false) ADD_ATTRIBUTE(bool, chrono, false) ADD_ATTRIBUTE(bool, barrier, false) ADD_ATTRIBUTE(int, maxIterations, 0) @@ -2773,6 +2865,7 @@ namespace atrip { using namespace atrip; +bool RankMap::RANK_ROUND_ROBIN; int Atrip::rank; int Atrip::np; Timings Atrip::chrono; @@ -2807,6 +2900,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { in.ea->read_all(epsa.data()); in.Tph->read_all(Tai.data()); + RankMap::RANK_ROUND_ROBIN = in.rankRoundRobin; + if (RankMap::RANK_ROUND_ROBIN) { + LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n"; + } else { + LOG(0,"Atrip") + << "Doing node > local rank round robin slices distribution" << "\n"; + } + + // COMMUNICATOR CONSTRUCTION ========================================={{{1 // // Construct a new communicator living only on a single rank