Implement RANK_ROUND_ROBIN
This commit is contained in:
parent
7b617930a6
commit
cc4029a3f9
118
atrip.org
118
atrip.org
@ -786,22 +786,36 @@ rank.
|
||||
#include <algorithm>
|
||||
|
||||
#include <atrip/Slice.hpp>
|
||||
#include <atrip/Tuples.hpp>
|
||||
|
||||
namespace atrip {
|
||||
struct RankMap {
|
||||
|
||||
static bool RANK_ROUND_ROBIN;
|
||||
std::vector<size_t> const lengths;
|
||||
size_t const np, size;
|
||||
ClusterInfo const clusterInfo;
|
||||
|
||||
RankMap(std::vector<size_t> lens, size_t np_)
|
||||
RankMap(std::vector<size_t> lens, size_t np_, MPI_Comm comm)
|
||||
: lengths(lens)
|
||||
, np(np_)
|
||||
, size(std::accumulate(lengths.begin(), lengths.end(),
|
||||
1UL, std::multiplies<size_t>()))
|
||||
, clusterInfo(getClusterInfo(comm))
|
||||
{ assert(lengths.size() <= 2); }
|
||||
|
||||
size_t find(Slice::Location const& p) const noexcept {
|
||||
return p.source * np + p.rank;
|
||||
if (RANK_ROUND_ROBIN) {
|
||||
return p.source * np + p.rank;
|
||||
} else {
|
||||
const size_t
|
||||
rankPosition = p.source * clusterInfo.ranksPerNode
|
||||
+ clusterInfo.rankInfos[p.rank].localRank
|
||||
;
|
||||
return rankPosition * clusterInfo.nNodes
|
||||
+ clusterInfo.rankInfos[p.rank].nodeId
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
size_t nSources() const noexcept {
|
||||
@ -821,8 +835,9 @@ namespace atrip {
|
||||
}
|
||||
|
||||
Slice::Location
|
||||
find(ABCTuple const& abc, Slice::Type sliceType) const noexcept {
|
||||
find(ABCTuple const& abc, Slice::Type sliceType) const {
|
||||
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
|
||||
// tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A
|
||||
const auto tuple = Slice::subtupleBySlice(abc, sliceType);
|
||||
|
||||
const size_t index
|
||||
@ -830,9 +845,50 @@ namespace atrip {
|
||||
+ tuple[1] * (lengths.size() > 1 ? lengths[0] : 0)
|
||||
;
|
||||
|
||||
size_t rank, source;
|
||||
|
||||
if (RANK_ROUND_ROBIN) {
|
||||
|
||||
rank = index % np;
|
||||
source = index / np;
|
||||
|
||||
} else {
|
||||
|
||||
size_t const
|
||||
|
||||
// the node that will be assigned to
|
||||
nodeId = index % clusterInfo.nNodes
|
||||
|
||||
// how many times it has been assigned to the node
|
||||
, s_n = index / clusterInfo.nNodes
|
||||
|
||||
// which local rank in the node should be
|
||||
, localRank = s_n % clusterInfo.ranksPerNode
|
||||
|
||||
// and the local source (how many times we chose this local rank)
|
||||
, localSource = s_n / clusterInfo.ranksPerNode
|
||||
;
|
||||
|
||||
source = localSource;
|
||||
// find the localRank-th entry in clusterInfo
|
||||
auto const& it =
|
||||
std::find_if(clusterInfo.rankInfos.begin(),
|
||||
clusterInfo.rankInfos.end(),
|
||||
[nodeId, localRank](RankInfo const& ri) {
|
||||
return ri.nodeId == nodeId
|
||||
&& ri.localRank == localRank
|
||||
;
|
||||
});
|
||||
if (it == clusterInfo.rankInfos.end()) {
|
||||
throw "FATAL! Error in node distribution of the slices";
|
||||
}
|
||||
rank = std::distance(clusterInfo.rankInfos.begin(), it);
|
||||
|
||||
}
|
||||
|
||||
return
|
||||
{ index % np
|
||||
, index / np
|
||||
{ rank
|
||||
, source
|
||||
};
|
||||
}
|
||||
|
||||
@ -1022,8 +1078,14 @@ namespace atrip {
|
||||
if (blank.info.state == Slice::SelfSufficient) {
|
||||
blank.data = sources[from.source].data();
|
||||
} else {
|
||||
if (freePointers.size() == 0)
|
||||
throw std::domain_error("No more free pointers!");
|
||||
if (freePointers.size() == 0) {
|
||||
std::stringstream stream;
|
||||
stream << "No more free pointers "
|
||||
<< "for type " << type
|
||||
<< " and name " << name
|
||||
;
|
||||
throw std::domain_error(stream.str());
|
||||
}
|
||||
auto dataPointer = freePointers.begin();
|
||||
freePointers.erase(dataPointer);
|
||||
blank.data = *dataPointer;
|
||||
@ -1176,7 +1238,7 @@ namespace atrip {
|
||||
, Slice::Name name_
|
||||
, size_t nSliceBuffers = 4
|
||||
)
|
||||
: rankMap(paramLength, np)
|
||||
: rankMap(paramLength, np, global_world)
|
||||
, world(child_world)
|
||||
, universe(global_world)
|
||||
, sliceLength(sliceLength_)
|
||||
@ -1513,9 +1575,19 @@ struct RankInfo {
|
||||
const size_t ranksPerNode;
|
||||
};
|
||||
|
||||
template <typename A>
|
||||
std::vector<A> unique(std::vector<A> const &xs) {
|
||||
auto result = xs;
|
||||
std::sort(result.begin(), result.end());
|
||||
auto const& last = std::unique(result.begin(), result.end());
|
||||
result.erase(last, result.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<RankInfo>
|
||||
getNodeInfos(std::vector<string> const& nodeNames) {
|
||||
std::vector<RankInfo> result;
|
||||
// TODO: replace it with unique call
|
||||
auto uniqueNames = nodeNames;
|
||||
{
|
||||
std::sort(uniqueNames.begin(), uniqueNames.end());
|
||||
@ -1541,6 +1613,25 @@ getNodeInfos(std::vector<string> const& nodeNames) {
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
struct ClusterInfo {
|
||||
const size_t nNodes, np, ranksPerNode;
|
||||
const std::vector<RankInfo> rankInfos;
|
||||
};
|
||||
|
||||
ClusterInfo
|
||||
getClusterInfo(MPI_Comm comm) {
|
||||
auto const names = getNodeNames(comm);
|
||||
auto const rankInfos = getNodeInfos(names);
|
||||
|
||||
return ClusterInfo {
|
||||
unique(names).size(),
|
||||
names.size(),
|
||||
rankInfos[0].ranksPerNode,
|
||||
rankInfos
|
||||
};
|
||||
|
||||
}
|
||||
#+end_src
|
||||
|
||||
*** Naive list
|
||||
@ -2740,6 +2831,7 @@ namespace atrip {
|
||||
GROUP_AND_SORT,
|
||||
};
|
||||
|
||||
ADD_ATTRIBUTE(bool, rankRoundRobin, false)
|
||||
ADD_ATTRIBUTE(bool, chrono, false)
|
||||
ADD_ATTRIBUTE(bool, barrier, false)
|
||||
ADD_ATTRIBUTE(int, maxIterations, 0)
|
||||
@ -2773,6 +2865,7 @@ namespace atrip {
|
||||
|
||||
using namespace atrip;
|
||||
|
||||
bool RankMap::RANK_ROUND_ROBIN;
|
||||
int Atrip::rank;
|
||||
int Atrip::np;
|
||||
Timings Atrip::chrono;
|
||||
@ -2807,6 +2900,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
in.ea->read_all(epsa.data());
|
||||
in.Tph->read_all(Tai.data());
|
||||
|
||||
RankMap::RANK_ROUND_ROBIN = in.rankRoundRobin;
|
||||
if (RankMap::RANK_ROUND_ROBIN) {
|
||||
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
|
||||
} else {
|
||||
LOG(0,"Atrip")
|
||||
<< "Doing node > local rank round robin slices distribution" << "\n";
|
||||
}
|
||||
|
||||
|
||||
// COMMUNICATOR CONSTRUCTION ========================================={{{1
|
||||
//
|
||||
// Construct a new communicator living only on a single rank
|
||||
|
||||
Loading…
Reference in New Issue
Block a user