Implement RANK_ROUND_ROBIN

This commit is contained in:
Alejandro Gallo 2021-11-08 11:48:06 +01:00
parent 7b617930a6
commit cc4029a3f9

118
atrip.org
View File

@ -786,22 +786,36 @@ rank.
#include <algorithm> #include <algorithm>
#include <atrip/Slice.hpp> #include <atrip/Slice.hpp>
#include <atrip/Tuples.hpp>
namespace atrip { namespace atrip {
struct RankMap { struct RankMap {
static bool RANK_ROUND_ROBIN;
std::vector<size_t> const lengths; std::vector<size_t> const lengths;
size_t const np, size; size_t const np, size;
ClusterInfo const clusterInfo;
RankMap(std::vector<size_t> lens, size_t np_) RankMap(std::vector<size_t> lens, size_t np_, MPI_Comm comm)
: lengths(lens) : lengths(lens)
, np(np_) , np(np_)
, size(std::accumulate(lengths.begin(), lengths.end(), , size(std::accumulate(lengths.begin(), lengths.end(),
1UL, std::multiplies<size_t>())) 1UL, std::multiplies<size_t>()))
, clusterInfo(getClusterInfo(comm))
{ assert(lengths.size() <= 2); } { assert(lengths.size() <= 2); }
size_t find(Slice::Location const& p) const noexcept { size_t find(Slice::Location const& p) const noexcept {
return p.source * np + p.rank; if (RANK_ROUND_ROBIN) {
return p.source * np + p.rank;
} else {
const size_t
rankPosition = p.source * clusterInfo.ranksPerNode
+ clusterInfo.rankInfos[p.rank].localRank
;
return rankPosition * clusterInfo.nNodes
+ clusterInfo.rankInfos[p.rank].nodeId
;
}
} }
size_t nSources() const noexcept { size_t nSources() const noexcept {
@ -821,8 +835,9 @@ namespace atrip {
} }
Slice::Location Slice::Location
find(ABCTuple const& abc, Slice::Type sliceType) const noexcept { find(ABCTuple const& abc, Slice::Type sliceType) const {
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB // tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
// tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A
const auto tuple = Slice::subtupleBySlice(abc, sliceType); const auto tuple = Slice::subtupleBySlice(abc, sliceType);
const size_t index const size_t index
@ -830,9 +845,50 @@ namespace atrip {
+ tuple[1] * (lengths.size() > 1 ? lengths[0] : 0) + tuple[1] * (lengths.size() > 1 ? lengths[0] : 0)
; ;
size_t rank, source;
if (RANK_ROUND_ROBIN) {
rank = index % np;
source = index / np;
} else {
size_t const
// the node that will be assigned to
nodeId = index % clusterInfo.nNodes
// how many times it has been assigned to the node
, s_n = index / clusterInfo.nNodes
// which local rank in the node should be
, localRank = s_n % clusterInfo.ranksPerNode
// and the local source (how many times we chose this local rank)
, localSource = s_n / clusterInfo.ranksPerNode
;
source = localSource;
// find the localRank-th entry in clusterInfo
auto const& it =
std::find_if(clusterInfo.rankInfos.begin(),
clusterInfo.rankInfos.end(),
[nodeId, localRank](RankInfo const& ri) {
return ri.nodeId == nodeId
&& ri.localRank == localRank
;
});
if (it == clusterInfo.rankInfos.end()) {
throw "FATAL! Error in node distribution of the slices";
}
rank = std::distance(clusterInfo.rankInfos.begin(), it);
}
return return
{ index % np { rank
, index / np , source
}; };
} }
@ -1022,8 +1078,14 @@ namespace atrip {
if (blank.info.state == Slice::SelfSufficient) { if (blank.info.state == Slice::SelfSufficient) {
blank.data = sources[from.source].data(); blank.data = sources[from.source].data();
} else { } else {
if (freePointers.size() == 0) if (freePointers.size() == 0) {
throw std::domain_error("No more free pointers!"); std::stringstream stream;
stream << "No more free pointers "
<< "for type " << type
<< " and name " << name
;
throw std::domain_error(stream.str());
}
auto dataPointer = freePointers.begin(); auto dataPointer = freePointers.begin();
freePointers.erase(dataPointer); freePointers.erase(dataPointer);
blank.data = *dataPointer; blank.data = *dataPointer;
@ -1176,7 +1238,7 @@ namespace atrip {
, Slice::Name name_ , Slice::Name name_
, size_t nSliceBuffers = 4 , size_t nSliceBuffers = 4
) )
: rankMap(paramLength, np) : rankMap(paramLength, np, global_world)
, world(child_world) , world(child_world)
, universe(global_world) , universe(global_world)
, sliceLength(sliceLength_) , sliceLength(sliceLength_)
@ -1513,9 +1575,19 @@ struct RankInfo {
const size_t ranksPerNode; const size_t ranksPerNode;
}; };
template <typename A>
std::vector<A> unique(std::vector<A> const &xs) {
auto result = xs;
std::sort(result.begin(), result.end());
auto const& last = std::unique(result.begin(), result.end());
result.erase(last, result.end());
return result;
}
std::vector<RankInfo> std::vector<RankInfo>
getNodeInfos(std::vector<string> const& nodeNames) { getNodeInfos(std::vector<string> const& nodeNames) {
std::vector<RankInfo> result; std::vector<RankInfo> result;
// TODO: replace it with unique call
auto uniqueNames = nodeNames; auto uniqueNames = nodeNames;
{ {
std::sort(uniqueNames.begin(), uniqueNames.end()); std::sort(uniqueNames.begin(), uniqueNames.end());
@ -1541,6 +1613,25 @@ getNodeInfos(std::vector<string> const& nodeNames) {
} }
return result; return result;
} }
struct ClusterInfo {
const size_t nNodes, np, ranksPerNode;
const std::vector<RankInfo> rankInfos;
};
ClusterInfo
getClusterInfo(MPI_Comm comm) {
auto const names = getNodeNames(comm);
auto const rankInfos = getNodeInfos(names);
return ClusterInfo {
unique(names).size(),
names.size(),
rankInfos[0].ranksPerNode,
rankInfos
};
}
#+end_src #+end_src
*** Naive list *** Naive list
@ -2740,6 +2831,7 @@ namespace atrip {
GROUP_AND_SORT, GROUP_AND_SORT,
}; };
ADD_ATTRIBUTE(bool, rankRoundRobin, false)
ADD_ATTRIBUTE(bool, chrono, false) ADD_ATTRIBUTE(bool, chrono, false)
ADD_ATTRIBUTE(bool, barrier, false) ADD_ATTRIBUTE(bool, barrier, false)
ADD_ATTRIBUTE(int, maxIterations, 0) ADD_ATTRIBUTE(int, maxIterations, 0)
@ -2773,6 +2865,7 @@ namespace atrip {
using namespace atrip; using namespace atrip;
bool RankMap::RANK_ROUND_ROBIN;
int Atrip::rank; int Atrip::rank;
int Atrip::np; int Atrip::np;
Timings Atrip::chrono; Timings Atrip::chrono;
@ -2807,6 +2900,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
in.ea->read_all(epsa.data()); in.ea->read_all(epsa.data());
in.Tph->read_all(Tai.data()); in.Tph->read_all(Tai.data());
RankMap::RANK_ROUND_ROBIN = in.rankRoundRobin;
if (RankMap::RANK_ROUND_ROBIN) {
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
} else {
LOG(0,"Atrip")
<< "Doing node > local rank round robin slices distribution" << "\n";
}
// COMMUNICATOR CONSTRUCTION ========================================={{{1 // COMMUNICATOR CONSTRUCTION ========================================={{{1
// //
// Construct a new communicator living only on a single rank // Construct a new communicator living only on a single rank