Implement RANK_ROUND_ROBIN
This commit is contained in:
parent
7b617930a6
commit
cc4029a3f9
118
atrip.org
118
atrip.org
@ -786,22 +786,36 @@ rank.
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
|
||||||
#include <atrip/Slice.hpp>
|
#include <atrip/Slice.hpp>
|
||||||
|
#include <atrip/Tuples.hpp>
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
struct RankMap {
|
struct RankMap {
|
||||||
|
|
||||||
|
static bool RANK_ROUND_ROBIN;
|
||||||
std::vector<size_t> const lengths;
|
std::vector<size_t> const lengths;
|
||||||
size_t const np, size;
|
size_t const np, size;
|
||||||
|
ClusterInfo const clusterInfo;
|
||||||
|
|
||||||
RankMap(std::vector<size_t> lens, size_t np_)
|
RankMap(std::vector<size_t> lens, size_t np_, MPI_Comm comm)
|
||||||
: lengths(lens)
|
: lengths(lens)
|
||||||
, np(np_)
|
, np(np_)
|
||||||
, size(std::accumulate(lengths.begin(), lengths.end(),
|
, size(std::accumulate(lengths.begin(), lengths.end(),
|
||||||
1UL, std::multiplies<size_t>()))
|
1UL, std::multiplies<size_t>()))
|
||||||
|
, clusterInfo(getClusterInfo(comm))
|
||||||
{ assert(lengths.size() <= 2); }
|
{ assert(lengths.size() <= 2); }
|
||||||
|
|
||||||
size_t find(Slice::Location const& p) const noexcept {
|
size_t find(Slice::Location const& p) const noexcept {
|
||||||
return p.source * np + p.rank;
|
if (RANK_ROUND_ROBIN) {
|
||||||
|
return p.source * np + p.rank;
|
||||||
|
} else {
|
||||||
|
const size_t
|
||||||
|
rankPosition = p.source * clusterInfo.ranksPerNode
|
||||||
|
+ clusterInfo.rankInfos[p.rank].localRank
|
||||||
|
;
|
||||||
|
return rankPosition * clusterInfo.nNodes
|
||||||
|
+ clusterInfo.rankInfos[p.rank].nodeId
|
||||||
|
;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t nSources() const noexcept {
|
size_t nSources() const noexcept {
|
||||||
@ -821,8 +835,9 @@ namespace atrip {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Slice::Location
|
Slice::Location
|
||||||
find(ABCTuple const& abc, Slice::Type sliceType) const noexcept {
|
find(ABCTuple const& abc, Slice::Type sliceType) const {
|
||||||
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
|
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
|
||||||
|
// tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A
|
||||||
const auto tuple = Slice::subtupleBySlice(abc, sliceType);
|
const auto tuple = Slice::subtupleBySlice(abc, sliceType);
|
||||||
|
|
||||||
const size_t index
|
const size_t index
|
||||||
@ -830,9 +845,50 @@ namespace atrip {
|
|||||||
+ tuple[1] * (lengths.size() > 1 ? lengths[0] : 0)
|
+ tuple[1] * (lengths.size() > 1 ? lengths[0] : 0)
|
||||||
;
|
;
|
||||||
|
|
||||||
|
size_t rank, source;
|
||||||
|
|
||||||
|
if (RANK_ROUND_ROBIN) {
|
||||||
|
|
||||||
|
rank = index % np;
|
||||||
|
source = index / np;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
|
||||||
|
size_t const
|
||||||
|
|
||||||
|
// the node that will be assigned to
|
||||||
|
nodeId = index % clusterInfo.nNodes
|
||||||
|
|
||||||
|
// how many times it has been assigned to the node
|
||||||
|
, s_n = index / clusterInfo.nNodes
|
||||||
|
|
||||||
|
// which local rank in the node should be
|
||||||
|
, localRank = s_n % clusterInfo.ranksPerNode
|
||||||
|
|
||||||
|
// and the local source (how many times we chose this local rank)
|
||||||
|
, localSource = s_n / clusterInfo.ranksPerNode
|
||||||
|
;
|
||||||
|
|
||||||
|
source = localSource;
|
||||||
|
// find the localRank-th entry in clusterInfo
|
||||||
|
auto const& it =
|
||||||
|
std::find_if(clusterInfo.rankInfos.begin(),
|
||||||
|
clusterInfo.rankInfos.end(),
|
||||||
|
[nodeId, localRank](RankInfo const& ri) {
|
||||||
|
return ri.nodeId == nodeId
|
||||||
|
&& ri.localRank == localRank
|
||||||
|
;
|
||||||
|
});
|
||||||
|
if (it == clusterInfo.rankInfos.end()) {
|
||||||
|
throw "FATAL! Error in node distribution of the slices";
|
||||||
|
}
|
||||||
|
rank = std::distance(clusterInfo.rankInfos.begin(), it);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
return
|
return
|
||||||
{ index % np
|
{ rank
|
||||||
, index / np
|
, source
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1022,8 +1078,14 @@ namespace atrip {
|
|||||||
if (blank.info.state == Slice::SelfSufficient) {
|
if (blank.info.state == Slice::SelfSufficient) {
|
||||||
blank.data = sources[from.source].data();
|
blank.data = sources[from.source].data();
|
||||||
} else {
|
} else {
|
||||||
if (freePointers.size() == 0)
|
if (freePointers.size() == 0) {
|
||||||
throw std::domain_error("No more free pointers!");
|
std::stringstream stream;
|
||||||
|
stream << "No more free pointers "
|
||||||
|
<< "for type " << type
|
||||||
|
<< " and name " << name
|
||||||
|
;
|
||||||
|
throw std::domain_error(stream.str());
|
||||||
|
}
|
||||||
auto dataPointer = freePointers.begin();
|
auto dataPointer = freePointers.begin();
|
||||||
freePointers.erase(dataPointer);
|
freePointers.erase(dataPointer);
|
||||||
blank.data = *dataPointer;
|
blank.data = *dataPointer;
|
||||||
@ -1176,7 +1238,7 @@ namespace atrip {
|
|||||||
, Slice::Name name_
|
, Slice::Name name_
|
||||||
, size_t nSliceBuffers = 4
|
, size_t nSliceBuffers = 4
|
||||||
)
|
)
|
||||||
: rankMap(paramLength, np)
|
: rankMap(paramLength, np, global_world)
|
||||||
, world(child_world)
|
, world(child_world)
|
||||||
, universe(global_world)
|
, universe(global_world)
|
||||||
, sliceLength(sliceLength_)
|
, sliceLength(sliceLength_)
|
||||||
@ -1513,9 +1575,19 @@ struct RankInfo {
|
|||||||
const size_t ranksPerNode;
|
const size_t ranksPerNode;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename A>
|
||||||
|
std::vector<A> unique(std::vector<A> const &xs) {
|
||||||
|
auto result = xs;
|
||||||
|
std::sort(result.begin(), result.end());
|
||||||
|
auto const& last = std::unique(result.begin(), result.end());
|
||||||
|
result.erase(last, result.end());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<RankInfo>
|
std::vector<RankInfo>
|
||||||
getNodeInfos(std::vector<string> const& nodeNames) {
|
getNodeInfos(std::vector<string> const& nodeNames) {
|
||||||
std::vector<RankInfo> result;
|
std::vector<RankInfo> result;
|
||||||
|
// TODO: replace it with unique call
|
||||||
auto uniqueNames = nodeNames;
|
auto uniqueNames = nodeNames;
|
||||||
{
|
{
|
||||||
std::sort(uniqueNames.begin(), uniqueNames.end());
|
std::sort(uniqueNames.begin(), uniqueNames.end());
|
||||||
@ -1541,6 +1613,25 @@ getNodeInfos(std::vector<string> const& nodeNames) {
|
|||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ClusterInfo {
|
||||||
|
const size_t nNodes, np, ranksPerNode;
|
||||||
|
const std::vector<RankInfo> rankInfos;
|
||||||
|
};
|
||||||
|
|
||||||
|
ClusterInfo
|
||||||
|
getClusterInfo(MPI_Comm comm) {
|
||||||
|
auto const names = getNodeNames(comm);
|
||||||
|
auto const rankInfos = getNodeInfos(names);
|
||||||
|
|
||||||
|
return ClusterInfo {
|
||||||
|
unique(names).size(),
|
||||||
|
names.size(),
|
||||||
|
rankInfos[0].ranksPerNode,
|
||||||
|
rankInfos
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
*** Naive list
|
*** Naive list
|
||||||
@ -2740,6 +2831,7 @@ namespace atrip {
|
|||||||
GROUP_AND_SORT,
|
GROUP_AND_SORT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ADD_ATTRIBUTE(bool, rankRoundRobin, false)
|
||||||
ADD_ATTRIBUTE(bool, chrono, false)
|
ADD_ATTRIBUTE(bool, chrono, false)
|
||||||
ADD_ATTRIBUTE(bool, barrier, false)
|
ADD_ATTRIBUTE(bool, barrier, false)
|
||||||
ADD_ATTRIBUTE(int, maxIterations, 0)
|
ADD_ATTRIBUTE(int, maxIterations, 0)
|
||||||
@ -2773,6 +2865,7 @@ namespace atrip {
|
|||||||
|
|
||||||
using namespace atrip;
|
using namespace atrip;
|
||||||
|
|
||||||
|
bool RankMap::RANK_ROUND_ROBIN;
|
||||||
int Atrip::rank;
|
int Atrip::rank;
|
||||||
int Atrip::np;
|
int Atrip::np;
|
||||||
Timings Atrip::chrono;
|
Timings Atrip::chrono;
|
||||||
@ -2807,6 +2900,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
in.ea->read_all(epsa.data());
|
in.ea->read_all(epsa.data());
|
||||||
in.Tph->read_all(Tai.data());
|
in.Tph->read_all(Tai.data());
|
||||||
|
|
||||||
|
RankMap::RANK_ROUND_ROBIN = in.rankRoundRobin;
|
||||||
|
if (RankMap::RANK_ROUND_ROBIN) {
|
||||||
|
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
|
||||||
|
} else {
|
||||||
|
LOG(0,"Atrip")
|
||||||
|
<< "Doing node > local rank round robin slices distribution" << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// COMMUNICATOR CONSTRUCTION ========================================={{{1
|
// COMMUNICATOR CONSTRUCTION ========================================={{{1
|
||||||
//
|
//
|
||||||
// Construct a new communicator living only on a single rank
|
// Construct a new communicator living only on a single rank
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user