Add tangled sources

This commit is contained in:
Alejandro Gallo 2022-02-18 12:54:59 +01:00
parent 3dc38a43b5
commit bbbfb30c6f
10 changed files with 1298 additions and 727 deletions

View File

@ -7,12 +7,22 @@
#include <ctf.hpp> #include <ctf.hpp>
#include <atrip/Utils.hpp>
#define ADD_ATTRIBUTE(_type, _name, _default) \
_type _name = _default; \
Input& with_ ## _name(_type i) { \
_name = i; \
return *this; \
}
namespace atrip { namespace atrip {
struct Atrip { struct Atrip {
static int rank; static int rank;
static int np; static int np;
static Timings chrono;
static void init(); static void init();
template <typename F=double> template <typename F=double>
@ -25,9 +35,6 @@ namespace atrip {
, *Vhhhp = nullptr , *Vhhhp = nullptr
, *Vppph = nullptr , *Vppph = nullptr
; ;
int maxIterations = 0, iterationMod = -1, percentageMod = -1;
bool barrier = false;
bool chrono = false;
Input& with_epsilon_i(CTF::Tensor<F> * t) { ei = t; return *this; } Input& with_epsilon_i(CTF::Tensor<F> * t) { ei = t; return *this; }
Input& with_epsilon_a(CTF::Tensor<F> * t) { ea = t; return *this; } Input& with_epsilon_a(CTF::Tensor<F> * t) { ea = t; return *this; }
Input& with_Tai(CTF::Tensor<F> * t) { Tph = t; return *this; } Input& with_Tai(CTF::Tensor<F> * t) { Tph = t; return *this; }
@ -35,11 +42,20 @@ namespace atrip {
Input& with_Vabij(CTF::Tensor<F> * t) { Vpphh = t; return *this; } Input& with_Vabij(CTF::Tensor<F> * t) { Vpphh = t; return *this; }
Input& with_Vijka(CTF::Tensor<F> * t) { Vhhhp = t; return *this; } Input& with_Vijka(CTF::Tensor<F> * t) { Vhhhp = t; return *this; }
Input& with_Vabci(CTF::Tensor<F> * t) { Vppph = t; return *this; } Input& with_Vabci(CTF::Tensor<F> * t) { Vppph = t; return *this; }
Input& with_maxIterations(int i) { maxIterations = i; return *this; }
Input& with_iterationMod(int i) { iterationMod = i; return *this; } enum TuplesDistribution {
Input& with_percentageMod(int i) { percentageMod = i; return *this; } NAIVE,
Input& with_barrier(bool i) { barrier = i; return *this; } GROUP_AND_SORT,
Input& with_chrono(bool i) { chrono = i; return *this; } };
ADD_ATTRIBUTE(bool, rankRoundRobin, false)
ADD_ATTRIBUTE(bool, chrono, false)
ADD_ATTRIBUTE(bool, barrier, false)
ADD_ATTRIBUTE(int, maxIterations, 0)
ADD_ATTRIBUTE(int, iterationMod, -1)
ADD_ATTRIBUTE(int, percentageMod, -1)
ADD_ATTRIBUTE(TuplesDistribution, tuplesDistribution, NAIVE)
}; };
struct Output { struct Output {

View File

@ -41,7 +41,6 @@
# define DBG(...) dbg(__VA_ARGS__) # define DBG(...) dbg(__VA_ARGS__)
#elif ATRIP_DEBUG == 2 #elif ATRIP_DEBUG == 2
# pragma message("WARNING: You have some debugging info for ABC triples") # pragma message("WARNING: You have some debugging info for ABC triples")
# include <dbg.h>
# define OCD_Barrier(com) # define OCD_Barrier(com)
# define WITH_OCD if (false) # define WITH_OCD if (false)
# define WITH_ROOT if (atrip::Atrip::rank == 0) # define WITH_ROOT if (atrip::Atrip::rank == 0)

View File

@ -40,12 +40,12 @@ namespace atrip {
, X(Zijk_[j + No*k + No*No*i]) , X(Zijk_[j + No*k + No*No*i])
, Y(Zijk_[k + No*i + No*No*j]) , Y(Zijk_[k + No*i + No*No*j])
, Z(Zijk_[k + No*j + No*No*i]) , Z(Zijk_[k + No*j + No*No*i])
, A(std::conj(Tijk_[i + No*j + No*No*k])) , A(maybeConjugate<F>(Tijk_[i + No*j + No*No*k]))
, B(std::conj(Tijk_[i + No*k + No*No*j])) , B(maybeConjugate<F>(Tijk_[i + No*k + No*No*j]))
, C(std::conj(Tijk_[j + No*i + No*No*k])) , C(maybeConjugate<F>(Tijk_[j + No*i + No*No*k]))
, D(std::conj(Tijk_[j + No*k + No*No*i])) , D(maybeConjugate<F>(Tijk_[j + No*k + No*No*i]))
, E(std::conj(Tijk_[k + No*i + No*No*j])) , E(maybeConjugate<F>(Tijk_[k + No*i + No*No*j]))
, F(std::conj(Tijk_[k + No*j + No*No*i])) , F(maybeConjugate<F>(Tijk_[k + No*j + No*No*i]))
, value , value
= 3.0 * ( A * U = 3.0 * ( A * U
+ B * V + B * V
@ -102,9 +102,9 @@ namespace atrip {
, U(Zijk_[i + No*j + No*No*k]) , U(Zijk_[i + No*j + No*No*k])
, V(Zijk_[j + No*k + No*No*i]) , V(Zijk_[j + No*k + No*No*i])
, W(Zijk_[k + No*i + No*No*j]) , W(Zijk_[k + No*i + No*No*j])
, A(std::conj(Tijk_[i + No*j + No*No*k])) , A(maybeConjugate<F>(Tijk_[i + No*j + No*No*k]))
, B(std::conj(Tijk_[j + No*k + No*No*i])) , B(maybeConjugate<F>(Tijk_[j + No*k + No*No*i]))
, C(std::conj(Tijk_[k + No*i + No*No*j])) , C(maybeConjugate<F>(Tijk_[k + No*i + No*No*j]))
, value , value
= F(3.0) * ( A * U = F(3.0) * ( A * U
+ B * V + B * V
@ -172,10 +172,8 @@ namespace atrip {
, F const* TBChh , F const* TBChh
// -- TIJK // -- TIJK
, F *Tijk , F *Tijk
, atrip::Timings& chrono
) { ) {
auto& t_reorder = chrono["doubles:reorder"];
const size_t a = abc[0], b = abc[1], c = abc[2] const size_t a = abc[0], b = abc[1], c = abc[2]
, NoNo = No*No, NoNv = No*Nv , NoNo = No*No, NoNv = No*Nv
; ;
@ -183,13 +181,13 @@ namespace atrip {
#if defined(ATRIP_USE_DGEMM) #if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo #define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \ #define REORDER(__II, __JJ, __KK) \
t_reorder.start(); \ WITH_CHRONO("doubles:reorder", \
for (size_t k = 0; k < No; k++) \ for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \ for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \ for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
} \ } \
t_reorder.stop(); )
#define DGEMM_PARTICLES(__A, __B) \ #define DGEMM_PARTICLES(__A, __B) \
atrip::xgemm<F>( "T" \ atrip::xgemm<F>( "T" \
, "N" \ , "N" \
@ -220,106 +218,100 @@ namespace atrip {
, _t_buffer.data() \ , _t_buffer.data() \
, (int const*)&NoNo \ , (int const*)&NoNo \
); );
#define MAYBE_CONJ(_conj, _buffer) \ #define MAYBE_CONJ(_conj, _buffer) \
if (traits::isComplex<F>()) { \ for (size_t __i = 0; __i < NoNoNo; ++__i) \
for (size_t __i = 0; __i < NoNoNo; ++__i) \ _conj[__i] = maybeConjugate<F>(_buffer[__i]); \
_conj[__i] = std::conj(_buffer[__i]); \
} else { \
for (size_t __i = 0; __i < NoNoNo; ++__i) \
_conj[__i] = _buffer[__i]; \
}
const size_t NoNoNo = No*NoNo; const size_t NoNoNo = No*NoNo;
std::vector<F> _t_buffer; std::vector<F> _t_buffer;
_t_buffer.reserve(NoNoNo); _t_buffer.reserve(NoNoNo);
F one{1.0}, m_one{-1.0}, zero{0.0}; F one{1.0}, m_one{-1.0}, zero{0.0};
t_reorder.start(); WITH_CHRONO("double:reorder",
for (size_t k = 0; k < NoNoNo; k++) { for (size_t k = 0; k < NoNoNo; k++) {
// zero the Tijk Tijk[k] = 0.0;
Tijk[k] = 0.0; })
}
t_reorder.stop();
chrono["doubles:holes"].start(); // TOMERGE: replace chronos
{ // Holes part ============================================================ WITH_CHRONO("doubles:holes",
{ // Holes part ========================================================
std::vector<F> _vhhh(NoNoNo); std::vector<F> _vhhh(NoNoNo);
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
MAYBE_CONJ(_vhhh, VhhhC) MAYBE_CONJ(_vhhh, VhhhC)
chrono["doubles:holes:1"].start(); WITH_CHRONO("doubles:holes:1",
DGEMM_HOLES(_vhhh.data(), TABhh, "N") DGEMM_HOLES(_vhhh.data(), TABhh, "N")
REORDER(i, k, j) REORDER(i, k, j)
chrono["doubles:holes:1"].stop(); )
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
chrono["doubles:holes:2"].start(); WITH_CHRONO("doubles:holes:2",
DGEMM_HOLES(_vhhh.data(), TABhh, "T") DGEMM_HOLES(_vhhh.data(), TABhh, "T")
REORDER(j, k, i) REORDER(j, k, i)
chrono["doubles:holes:2"].stop(); )
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
MAYBE_CONJ(_vhhh, VhhhB) MAYBE_CONJ(_vhhh, VhhhB)
chrono["doubles:holes:3"].start(); WITH_CHRONO("doubles:holes:3",
DGEMM_HOLES(_vhhh.data(), TAChh, "N") DGEMM_HOLES(_vhhh.data(), TAChh, "N")
REORDER(i, j, k) REORDER(i, j, k)
chrono["doubles:holes:3"].stop(); )
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
chrono["doubles:holes:4"].start(); WITH_CHRONO("doubles:holes:4",
DGEMM_HOLES(_vhhh.data(), TAChh, "T") DGEMM_HOLES(_vhhh.data(), TAChh, "T")
REORDER(k, j, i) REORDER(k, j, i)
chrono["doubles:holes:4"].stop(); )
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
MAYBE_CONJ(_vhhh, VhhhA) MAYBE_CONJ(_vhhh, VhhhA)
chrono["doubles:holes:5"].start(); WITH_CHRONO("doubles:holes:5",
DGEMM_HOLES(_vhhh.data(), TBChh, "N") DGEMM_HOLES(_vhhh.data(), TBChh, "N")
REORDER(j, i, k) REORDER(j, i, k)
chrono["doubles:holes:5"].stop(); )
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
chrono["doubles:holes:6"].start(); WITH_CHRONO("doubles:holes:6",
DGEMM_HOLES(_vhhh.data(), TBChh, "T") DGEMM_HOLES(_vhhh.data(), TBChh, "T")
REORDER(k, i, j) REORDER(k, i, j)
chrono["doubles:holes:6"].stop(); )
} }
chrono["doubles:holes"].stop(); )
#undef MAYBE_CONJ #undef MAYBE_CONJ
chrono["doubles:particles"].start(); WITH_CHRONO("doubles:particles",
{ // Particle part ========================================================= { // Particle part =====================================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0 // TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
chrono["doubles:particles:1"].start(); WITH_CHRONO("doubles:particles:1",
DGEMM_PARTICLES(TAphh, VBCph) DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k) REORDER(i, j, k)
chrono["doubles:particles:1"].stop(); )
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3 // TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
chrono["doubles:particles:2"].start(); WITH_CHRONO("doubles:particles:2",
DGEMM_PARTICLES(TAphh, VCBph) DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j) REORDER(i, k, j)
chrono["doubles:particles:2"].stop(); )
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5 // TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
chrono["doubles:particles:3"].start(); WITH_CHRONO("doubles:particles:3",
DGEMM_PARTICLES(TCphh, VABph) DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j) REORDER(k, i, j)
chrono["doubles:particles:3"].stop(); )
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2 // TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
chrono["doubles:particles:4"].start(); WITH_CHRONO("doubles:particles:4",
DGEMM_PARTICLES(TCphh, VBAph) DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i) REORDER(k, j, i)
chrono["doubles:particles:4"].stop(); )
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1 // TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
chrono["doubles:particles:5"].start(); WITH_CHRONO("doubles:particles:5",
DGEMM_PARTICLES(TBphh, VACph) DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k) REORDER(j, i, k)
chrono["doubles:particles:5"].stop(); )
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4 // TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
chrono["doubles:particles:6"].start(); WITH_CHRONO("doubles:particles:6",
DGEMM_PARTICLES(TBphh, VCAph) DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i) REORDER(j, k, i)
chrono["doubles:particles:6"].stop(); )
} }
chrono["doubles:particles"].stop(); )
#undef REORDER #undef REORDER
#undef DGEMM_HOLES #undef DGEMM_HOLES

View File

@ -5,24 +5,38 @@
#include <algorithm> #include <algorithm>
#include <atrip/Slice.hpp> #include <atrip/Slice.hpp>
#include <atrip/Tuples.hpp>
namespace atrip { namespace atrip {
template <typename F=double> template <typename F=double>
struct RankMap { struct RankMap {
static bool RANK_ROUND_ROBIN;
std::vector<size_t> const lengths; std::vector<size_t> const lengths;
size_t const np, size; size_t const np, size;
ClusterInfo const clusterInfo;
RankMap(std::vector<size_t> lens, size_t np_) RankMap(std::vector<size_t> lens, size_t np_, MPI_Comm comm)
: lengths(lens) : lengths(lens)
, np(np_) , np(np_)
, size(std::accumulate(lengths.begin(), lengths.end(), , size(std::accumulate(lengths.begin(), lengths.end(),
1UL, std::multiplies<size_t>())) 1UL, std::multiplies<size_t>()))
, clusterInfo(getClusterInfo(comm))
{ assert(lengths.size() <= 2); } { assert(lengths.size() <= 2); }
size_t find(typename Slice<F>::Location const& p) const noexcept { size_t find(typename Slice<F>::Location const& p) const noexcept {
return p.source * np + p.rank; if (RANK_ROUND_ROBIN) {
return p.source * np + p.rank;
} else {
const size_t
rankPosition = p.source * clusterInfo.ranksPerNode
+ clusterInfo.rankInfos[p.rank].localRank
;
return rankPosition * clusterInfo.nNodes
+ clusterInfo.rankInfos[p.rank].nodeId
;
}
} }
size_t nSources() const noexcept { size_t nSources() const noexcept {
@ -42,8 +56,9 @@ namespace atrip {
} }
typename Slice<F>::Location typename Slice<F>::Location
find(ABCTuple const& abc, typename Slice<F>::Type sliceType) const noexcept { find(ABCTuple const& abc, typename Slice<F>::Type sliceType) const {
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB // tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
// tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A
const auto tuple = Slice<F>::subtupleBySlice(abc, sliceType); const auto tuple = Slice<F>::subtupleBySlice(abc, sliceType);
const size_t index const size_t index
@ -51,9 +66,51 @@ namespace atrip {
+ tuple[1] * (lengths.size() > 1 ? lengths[0] : 0) + tuple[1] * (lengths.size() > 1 ? lengths[0] : 0)
; ;
size_t rank, source;
if (RANK_ROUND_ROBIN) {
rank = index % np;
source = index / np;
} else {
size_t const
// the node that will be assigned to
nodeId = index % clusterInfo.nNodes
// how many times it has been assigned to the node
, s_n = index / clusterInfo.nNodes
// which local rank in the node should be
, localRank = s_n % clusterInfo.ranksPerNode
// and the local source (how many times we chose this local rank)
, localSource = s_n / clusterInfo.ranksPerNode
;
// find the localRank-th entry in clusterInfo
auto const& it =
std::find_if(clusterInfo.rankInfos.begin(),
clusterInfo.rankInfos.end(),
[nodeId, localRank](RankInfo const& ri) {
return ri.nodeId == nodeId
&& ri.localRank == localRank
;
});
if (it == clusterInfo.rankInfos.end()) {
throw "FATAL! Error in node distribution of the slices";
}
rank = (*it).globalRank;
source = localSource;
}
return return
{ index % np { rank
, index / np , source
}; };
} }

View File

@ -1,4 +1,4 @@
// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice][The slice:1]] // [[file:~/cc4s/src/atrip/complex/atrip.org::*Prolog][Prolog:1]]
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <algorithm> #include <algorithm>
@ -11,6 +11,9 @@
namespace atrip { namespace atrip {
template <typename FF> FF maybeConjugate(const FF a) { return a; }
template <> Complex maybeConjugate(const Complex a) { return std::conj(a); }
namespace traits { namespace traits {
template <typename FF> bool isComplex() { return false; }; template <typename FF> bool isComplex() { return false; };
template <> bool isComplex<Complex>() { return true; }; template <> bool isComplex<Complex>() { return true; };
@ -24,401 +27,409 @@ namespace mpi {
template <typename F=double> template <typename F=double>
struct Slice { struct Slice {
// The slice:1 ends here // Prolog:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice][The slice:2]] // [[file:~/cc4s/src/atrip/complex/atrip.org::*Location][Location:1]]
// ASSOCIATED TYPES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% struct Location { size_t rank; size_t source; };
// Location:1 ends here
struct Location { size_t rank; size_t source; }; // [[file:~/cc4s/src/atrip/complex/atrip.org::*Type][Type:1]]
enum Type
enum Type { A = 10
{ A = 10 , B
, B , C
, C // Two-parameter slices
// Two-parameter slices , AB = 20
, AB = 20 , BC
, BC , AC
, AC // for abci and the doubles
// for abci and the doubles , CB
, CB , BA
, BA , CA
, CA // The non-typed slice
// The non-typed slice , Blank = 404
, Blank = 404
};
enum State {
// Fetch represents the state where a slice is to be fetched
// and has a valid data pointer that can be written to
Fetch = 0,
// Dispatches represents the state that an MPI call has been
// dispatched in order to get the data, but the data has not been
// yet unwrapped, the data might be there or we might have to wait.
Dispatched = 2,
// Ready means that the data pointer can be read from
Ready = 1,
// Self sufficient is a slice when its contents are located
// in the same rank that it lives, so that it does not have to
// fetch from no one else.
SelfSufficient = 911,
// Recycled means that this slice gets its data pointer from another
// slice, so it should not be written to
Recycled = 123,
// Acceptor means that the Slice can accept a new Slice, it is
// the counterpart of the Blank type, but for states
Acceptor = 405
}; };
// Type:1 ends here
struct Info { // [[file:~/cc4s/src/atrip/complex/atrip.org::*State][State:1]]
// which part of a,b,c the slice holds enum State {
PartialTuple tuple; Fetch = 0,
// The type of slice for the user to retrieve the correct one Dispatched = 2,
Type type; Ready = 1,
// What is the state of the slice SelfSufficient = 911,
State state; Recycled = 123,
// Where the slice is to be retrieved Acceptor = 405
// NOTE: this can actually be computed from tuple };
Location from; // State:1 ends here
// If the data are actually to be found in this other slice
Type recycling;
Info() : tuple{0,0} // [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20Info%20structure][The Info structure:1]]
, type{Blank} struct Info {
, state{Acceptor} // which part of a,b,c the slice holds
, from{0,0} PartialTuple tuple;
, recycling{Blank} // The type of slice for the user to retrieve the correct one
{} Type type;
// What is the state of the slice
State state;
// Where the slice is to be retrieved
Location from;
// If the data are actually to be found in this other slice
Type recycling;
Info() : tuple{0,0}
, type{Blank}
, state{Acceptor}
, from{0,0}
, recycling{Blank}
{}
};
using Ty_x_Tu = std::pair< Type, PartialTuple >;
// The Info structure:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Name][Name:1]]
enum Name
{ TA = 100
, VIJKA = 101
, VABCI = 200
, TABIJ = 201
, VABIJ = 202
}; };
// Name:1 ends here
using Ty_x_Tu = std::pair< Type, PartialTuple >; // [[file:~/cc4s/src/atrip/complex/atrip.org::*Database][Database:1]]
struct LocalDatabaseElement {
Slice<F>::Name name;
Slice<F>::Info info;
};
// Database:1 ends here
// Names of the integrals that are considered in CCSD(T) // [[file:~/cc4s/src/atrip/complex/atrip.org::*Database][Database:2]]
enum Name using LocalDatabase = std::vector<LocalDatabaseElement>;
{ TA = 100 using Database = LocalDatabase;
, VIJKA = 101 // Database:2 ends here
, VABCI = 200
, TABIJ = 201
, VABIJ = 202
};
// DATABASE ==========================================================={{{1 // [[file:~/cc4s/src/atrip/complex/atrip.org::*MPI%20Types][MPI Types:1]]
struct LocalDatabaseElement { struct mpi {
Slice<F>::Name name;
Slice<F>::Info info;
};
using LocalDatabase = std::vector<LocalDatabaseElement>;
using Database = LocalDatabase;
static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) {
// STATIC METHODS =========================================================== MPI_Datatype dt;
// MPI_Type_vector(n, 1, 1, DT, &dt);
// They are useful to organize the structure of slices MPI_Type_commit(&dt);
return dt;
struct mpi {
static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) {
MPI_Datatype dt;
MPI_Type_vector(n, 1, 1, DT, &dt);
MPI_Type_commit(&dt);
return dt;
}
static MPI_Datatype sliceLocation () {
constexpr int n = 2;
// create a sliceLocation to measure in the current architecture
// the packing of the struct
Slice<F>::Location measure;
MPI_Datatype dt;
const std::vector<int> lengths(n, 1);
const MPI_Datatype types[n] = {usizeDt(), usizeDt()};
// measure the displacements in the struct
size_t j = 0;
MPI_Aint displacements[n];
MPI_Get_address(&measure.rank, &displacements[j++]);
MPI_Get_address(&measure.source, &displacements[j++]);
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
displacements[0] = 0;
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
MPI_Type_commit(&dt);
return dt;
}
static MPI_Datatype enumDt() { return MPI_INT; }
static MPI_Datatype usizeDt() { return MPI_UINT64_T; }
static MPI_Datatype sliceInfo () {
constexpr int n = 5;
MPI_Datatype dt;
Slice<F>::Info measure;
const std::vector<int> lengths(n, 1);
const MPI_Datatype types[n]
= { vector(2, usizeDt())
, enumDt()
, enumDt()
, sliceLocation()
, enumDt()
};
// create the displacements from the info measurement struct
size_t j = 0;
MPI_Aint displacements[n];
MPI_Get_address(measure.tuple.data(), &displacements[j++]);
MPI_Get_address(&measure.type, &displacements[j++]);
MPI_Get_address(&measure.state, &displacements[j++]);
MPI_Get_address(&measure.from, &displacements[j++]);
MPI_Get_address(&measure.recycling, &displacements[j++]);
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
displacements[0] = 0;
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
MPI_Type_commit(&dt);
return dt;
}
static MPI_Datatype localDatabaseElement () {
constexpr int n = 2;
MPI_Datatype dt;
LocalDatabaseElement measure;
const std::vector<int> lengths(n, 1);
const MPI_Datatype types[n]
= { enumDt()
, sliceInfo()
};
// measure the displacements in the struct
size_t j = 0;
MPI_Aint displacements[n];
MPI_Get_address(&measure.name, &displacements[j++]);
MPI_Get_address(&measure.info, &displacements[j++]);
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
displacements[0] = 0;
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
MPI_Type_commit(&dt);
return dt;
}
};
static
PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
switch (sliceType) {
case AB: return {abc[0], abc[1]};
case BC: return {abc[1], abc[2]};
case AC: return {abc[0], abc[2]};
case CB: return {abc[2], abc[1]};
case BA: return {abc[1], abc[0]};
case CA: return {abc[2], abc[0]};
case A: return {abc[0], 0};
case B: return {abc[1], 0};
case C: return {abc[2], 0};
default: throw "Switch statement not exhaustive!";
}
} }
static MPI_Datatype sliceLocation () {
constexpr int n = 2;
// create a sliceLocation to measure in the current architecture
// the packing of the struct
Slice<F>::Location measure;
MPI_Datatype dt;
const std::vector<int> lengths(n, 1);
const MPI_Datatype types[n] = {usizeDt(), usizeDt()};
/** static_assert(sizeof(Slice<F>::Location) == 2 * sizeof(size_t),
* It is important here to return a reference to a Slice "The Location packing is wrong in your compiler");
* not to accidentally copy the associated buffer of the slice.
*/
static Slice<F>& findOneByType(std::vector<Slice<F>> &slices, Slice<F>::Type type) {
const auto sliceIt
= std::find_if(slices.begin(), slices.end(),
[&type](Slice<F> const& s) {
return type == s.info.type;
});
WITH_CRAZY_DEBUG
WITH_RANK
<< "\t__ looking for " << type << "\n";
if (sliceIt == slices.end())
throw std::domain_error("Slice by type not found!");
return *sliceIt;
}
/* // measure the displacements in the struct
* Check if an info has size_t j = 0;
* MPI_Aint base_address, displacements[n];
*/ MPI_Get_address(&measure, &base_address);
static std::vector<Slice<F>*> hasRecycledReferencingToIt MPI_Get_address(&measure.rank, &displacements[j++]);
( std::vector<Slice<F>> &slices MPI_Get_address(&measure.source, &displacements[j++]);
, Info const& info for (size_t i = 0; i < n; i++)
) { displacements[i] = MPI_Aint_diff(displacements[i], base_address);
std::vector<Slice<F>*> result;
for (auto& s: slices) MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
if ( s.info.recycling == info.type MPI_Type_commit(&dt);
&& s.info.tuple == info.tuple return dt;
&& s.info.state == Recycled }
) result.push_back(&s);
return result; static MPI_Datatype usizeDt() { return MPI_UINT64_T; }
}
static Slice<F>& static MPI_Datatype sliceInfo () {
findRecycledSource (std::vector<Slice<F>> &slices, Slice<F>::Info info) { constexpr int n = 5;
const auto sliceIt MPI_Datatype dt;
= std::find_if(slices.begin(), slices.end(), Slice<F>::Info measure;
[&info](Slice<F> const& s) { const std::vector<int> lengths(n, 1);
return info.recycling == s.info.type const MPI_Datatype types[n]
&& info.tuple == s.info.tuple = { vector(2, usizeDt())
&& State::Recycled != s.info.state , vector(sizeof(enum Type), MPI_CHAR)
; , vector(sizeof(enum State), MPI_CHAR)
}); , sliceLocation()
, vector(sizeof(enum Type), MPI_CHAR)
// TODO: Why this does not work on intel mpi?
/*, MPI_UINT64_T*/
};
WITH_CRAZY_DEBUG static_assert(sizeof(enum Type) == 4, "Enum type not 4 bytes long");
WITH_RANK << "__slice__:find: recycling source of " static_assert(sizeof(enum State) == 4, "Enum State not 4 bytes long");
<< pretty_print(info) << "\n"; static_assert(sizeof(enum Name) == 4, "Enum Name not 4 bytes long");
if (sliceIt == slices.end())
throw std::domain_error( "Slice not found: "
+ pretty_print(info)
+ " rank: "
+ pretty_print(Atrip::rank)
);
WITH_RANK << "__slice__:find: " << pretty_print(sliceIt->info) << "\n";
return *sliceIt;
}
static Slice<F>& findByTypeAbc // create the displacements from the info measurement struct
( std::vector<Slice<F>> &slices size_t j = 0;
, Slice<F>::Type type MPI_Aint base_address, displacements[n];
, ABCTuple const& abc MPI_Get_address(&measure, &base_address);
) { MPI_Get_address(&measure.tuple[0], &displacements[j++]);
const auto tuple = Slice<F>::subtupleBySlice(abc, type); MPI_Get_address(&measure.type, &displacements[j++]);
const auto sliceIt MPI_Get_address(&measure.state, &displacements[j++]);
= std::find_if(slices.begin(), slices.end(), MPI_Get_address(&measure.from, &displacements[j++]);
[&type, &tuple](Slice<F> const& s) { MPI_Get_address(&measure.recycling, &displacements[j++]);
return type == s.info.type for (size_t i = 0; i < n; i++)
&& tuple == s.info.tuple displacements[i] = MPI_Aint_diff(displacements[i], base_address);
;
});
WITH_CRAZY_DEBUG
WITH_RANK << "__slice__:find:" << type << " and tuple "
<< pretty_print(tuple)
<< "\n";
if (sliceIt == slices.end())
throw std::domain_error( "Slice not found: "
+ pretty_print(tuple)
+ ", "
+ pretty_print(type)
+ " rank: "
+ pretty_print(Atrip::rank)
);
return *sliceIt;
}
static Slice<F>& findByInfo(std::vector<Slice<F>> &slices, MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
Slice<F>::Info const& info) { MPI_Type_commit(&dt);
const auto sliceIt return dt;
= std::find_if(slices.begin(), slices.end(), }
[&info](Slice<F> const& s) {
// TODO: maybe implement comparison in Info struct
return info.type == s.info.type
&& info.state == s.info.state
&& info.tuple == s.info.tuple
&& info.from.rank == s.info.from.rank
&& info.from.source == s.info.from.source
;
});
WITH_CRAZY_DEBUG
WITH_RANK << "__slice__:find:looking for " << pretty_print(info) << "\n";
if (sliceIt == slices.end())
throw std::domain_error( "Slice by info not found: "
+ pretty_print(info));
return *sliceIt;
}
// SLICE DEFINITION =================================================={{{1 static MPI_Datatype localDatabaseElement () {
constexpr int n = 2;
MPI_Datatype dt;
LocalDatabaseElement measure;
const std::vector<int> lengths(n, 1);
const MPI_Datatype types[n]
= { vector(sizeof(enum Name), MPI_CHAR)
, sliceInfo()
};
// ATTRIBUTES ============================================================ // measure the displacements in the struct
Info info; size_t j = 0;
F *data; MPI_Aint base_address, displacements[n];
MPI_Request request; MPI_Get_address(&measure, &base_address);
const size_t size; MPI_Get_address(&measure.name, &displacements[j++]);
MPI_Get_address(&measure.info, &displacements[j++]);
for (size_t i = 0; i < n; i++)
displacements[i] = MPI_Aint_diff(displacements[i], base_address);
void markReady() noexcept { static_assert( sizeof(LocalDatabaseElement) == sizeof(measure)
info.state = Ready; , "Measure has bad size");
info.recycling = Blank;
}
/* MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
* This means that the data is there MPI_Type_commit(&dt);
*/ return vector(sizeof(LocalDatabaseElement), MPI_CHAR);
bool isUnwrapped() const noexcept { // TODO: write tests in order to know if this works
return info.state == Ready return dt;
|| info.state == SelfSufficient }
;
}
bool isUnwrappable() const noexcept { };
return isUnwrapped() // MPI Types:1 ends here
|| info.state == Recycled
|| info.state == Dispatched
;
}
inline bool isDirectlyFetchable() const noexcept { // [[file:~/cc4s/src/atrip/complex/atrip.org::*Static%20utilities][Static utilities:1]]
return info.state == Ready || info.state == Dispatched; static
} PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
switch (sliceType) {
case AB: return {abc[0], abc[1]};
case BC: return {abc[1], abc[2]};
case AC: return {abc[0], abc[2]};
case CB: return {abc[2], abc[1]};
case BA: return {abc[1], abc[0]};
case CA: return {abc[2], abc[0]};
case A: return {abc[0], 0};
case B: return {abc[1], 0};
case C: return {abc[2], 0};
default: throw "Switch statement not exhaustive!";
}
}
// Static utilities:1 ends here
void free() noexcept { // [[file:~/cc4s/src/atrip/complex/atrip.org::*Static%20utilities][Static utilities:2]]
info.tuple = {0, 0}; static std::vector<Slice<F>*> hasRecycledReferencingToIt
info.type = Blank; ( std::vector<Slice<F>> &slices
info.state = Acceptor; , Info const& info
info.from = {0, 0}; ) {
info.recycling = Blank; std::vector<Slice<F>*> result;
data = nullptr;
}
inline bool isFree() const noexcept { for (auto& s: slices)
return info.tuple == PartialTuple{0, 0} if ( s.info.recycling == info.type
&& info.type == Blank && s.info.tuple == info.tuple
&& info.state == Acceptor && s.info.state == Recycled
&& info.from.rank == 0 ) result.push_back(&s);
&& info.from.source == 0
&& info.recycling == Blank
&& data == nullptr
;
}
return result;
}
// Static utilities:2 ends here
/* // [[file:~/cc4s/src/atrip/complex/atrip.org::*Static%20utilities][Static utilities:3]]
* This function answers the question, which slices can be recycled. static Slice<F>& findOneByType(std::vector<Slice<F>> &slices, Slice<F>::Type type) {
* const auto sliceIt
* A slice can only be recycled if it is Fetch or Ready and has = std::find_if(slices.begin(), slices.end(),
* a valid datapointer. [&type](Slice<F> const& s) {
* return type == s.info.type;
* In particular, SelfSufficient are not recyclable, since it is easier });
* just to create a SelfSufficient slice than deal with data dependencies. WITH_CRAZY_DEBUG
* WITH_RANK
* Furthermore, a recycled slice is not recyclable, if this is the case << "\t__ looking for " << type << "\n";
* then it is either bad design or a bug. if (sliceIt == slices.end())
*/ throw std::domain_error("Slice by type not found!");
inline bool isRecyclable() const noexcept { return *sliceIt;
return ( info.state == Dispatched }
|| info.state == Ready // Static utilities:3 ends here
|| info.state == Fetch
)
&& hasValidDataPointer()
;
}
/* // [[file:~/cc4s/src/atrip/complex/atrip.org::*Static%20utilities][Static utilities:4]]
* This function describes if a slice has a valid data pointer. static Slice<F>&
* findRecycledSource (std::vector<Slice<F>> &slices, Slice<F>::Info info) {
* This is important to know if the slice has some data to it, also const auto sliceIt
* some structural checks are done, so that it should not be Acceptor = std::find_if(slices.begin(), slices.end(),
* or Blank, if this is the case then it is a bug. [&info](Slice<F> const& s) {
*/ return info.recycling == s.info.type
inline bool hasValidDataPointer() const noexcept { && info.tuple == s.info.tuple
return data != nullptr && State::Recycled != s.info.state
&& info.state != Acceptor ;
&& info.type != Blank });
;
}
void unwrapAndMarkReady() { WITH_CRAZY_DEBUG
WITH_RANK << "__slice__:find: recycling source of "
<< pretty_print(info) << "\n";
if (sliceIt == slices.end())
throw std::domain_error( "Slice not found: "
+ pretty_print(info)
+ " rank: "
+ pretty_print(Atrip::rank)
);
WITH_RANK << "__slice__:find: " << pretty_print(sliceIt->info) << "\n";
return *sliceIt;
}
// Static utilities:4 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Static%20utilities][Static utilities:5]]
static Slice<F>& findByTypeAbc
( std::vector<Slice<F>> &slices
, Slice<F>::Type type
, ABCTuple const& abc
) {
const auto tuple = Slice<F>::subtupleBySlice(abc, type);
const auto sliceIt
= std::find_if(slices.begin(), slices.end(),
[&type, &tuple](Slice<F> const& s) {
return type == s.info.type
&& tuple == s.info.tuple
;
});
WITH_CRAZY_DEBUG
WITH_RANK << "__slice__:find:" << type << " and tuple "
<< pretty_print(tuple)
<< "\n";
if (sliceIt == slices.end())
throw std::domain_error( "Slice not found: "
+ pretty_print(tuple)
+ ", "
+ pretty_print(type)
+ " rank: "
+ pretty_print(Atrip::rank)
);
return *sliceIt;
}
// Static utilities:5 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Static%20utilities][Static utilities:6]]
static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
Slice<F>::Info const& info) {
const auto sliceIt
= std::find_if(slices.begin(), slices.end(),
[&info](Slice<F> const& s) {
// TODO: maybe implement comparison in Info struct
return info.type == s.info.type
&& info.state == s.info.state
&& info.tuple == s.info.tuple
&& info.from.rank == s.info.from.rank
&& info.from.source == s.info.from.source
;
});
WITH_CRAZY_DEBUG
WITH_RANK << "__slice__:find:looking for " << pretty_print(info) << "\n";
if (sliceIt == slices.end())
throw std::domain_error( "Slice by info not found: "
+ pretty_print(info));
return *sliceIt;
}
// Static utilities:6 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Attributes][Attributes:1]]
Info info;
// Attributes:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Attributes][Attributes:2]]
F *data;
// Attributes:2 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Attributes][Attributes:3]]
MPI_Request request;
// Attributes:3 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Attributes][Attributes:4]]
const size_t size;
// Attributes:4 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Member%20functions][Member functions:1]]
void markReady() noexcept {
info.state = Ready;
info.recycling = Blank;
}
// Member functions:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Member%20functions][Member functions:2]]
bool isUnwrapped() const noexcept {
return info.state == Ready
|| info.state == SelfSufficient
;
}
// Member functions:2 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Member%20functions][Member functions:3]]
bool isUnwrappable() const noexcept {
return isUnwrapped()
|| info.state == Recycled
|| info.state == Dispatched
;
}
inline bool isDirectlyFetchable() const noexcept {
return info.state == Ready || info.state == Dispatched;
}
void free() noexcept {
info.tuple = {0, 0};
info.type = Blank;
info.state = Acceptor;
info.from = {0, 0};
info.recycling = Blank;
data = nullptr;
}
inline bool isFree() const noexcept {
return info.tuple == PartialTuple{0, 0}
&& info.type == Blank
&& info.state == Acceptor
&& info.from.rank == 0
&& info.from.source == 0
&& info.recycling == Blank
&& data == nullptr
;
}
// Member functions:3 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Member%20functions][Member functions:4]]
inline bool isRecyclable() const noexcept {
return ( info.state == Dispatched
|| info.state == Ready
|| info.state == Fetch
)
&& hasValidDataPointer()
;
}
// Member functions:4 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Member%20functions][Member functions:5]]
inline bool hasValidDataPointer() const noexcept {
return data != nullptr
&& info.state != Acceptor
&& info.type != Blank
;
}
// Member functions:5 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Member%20functions][Member functions:6]]
void unwrapAndMarkReady() {
if (info.state == Ready) return; if (info.state == Ready) return;
if (info.state != Dispatched) if (info.state != Dispatched)
throw throw
@ -447,17 +458,20 @@ struct Slice {
<< "\n"; << "\n";
#endif #endif
} }
// Member functions:6 ends here
Slice(size_t size_) // [[file:~/cc4s/src/atrip/complex/atrip.org::*Epilog][Epilog:1]]
: info({}) Slice(size_t size_)
, data(nullptr) : info({})
, size(size_) , data(nullptr)
{} , size(size_)
{}
}; // struct Slice }; // struct Slice
// Epilog:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Debug][Debug:1]]
template <typename F=double> template <typename F=double>
std::ostream& operator<<(std::ostream& out, typename Slice<F>::Location const& v) { std::ostream& operator<<(std::ostream& out, typename Slice<F>::Location const& v) {
// TODO: remove me // TODO: remove me
@ -476,4 +490,4 @@ std::ostream& operator<<(std::ostream& out, typename Slice<F>::Info const& i) {
} }
} // namespace atrip } // namespace atrip
// The slice:2 ends here // Debug:1 ends here

View File

@ -179,8 +179,14 @@ namespace atrip {
if (blank.info.state == Slice<F>::SelfSufficient) { if (blank.info.state == Slice<F>::SelfSufficient) {
blank.data = sources[from.source].data(); blank.data = sources[from.source].data();
} else { } else {
if (freePointers.size() == 0) if (freePointers.size() == 0) {
throw std::domain_error("No more free pointers!"); std::stringstream stream;
stream << "No more free pointers "
<< "for type " << type
<< " and name " << name
;
throw std::domain_error(stream.str());
}
auto dataPointer = freePointers.begin(); auto dataPointer = freePointers.begin();
freePointers.erase(dataPointer); freePointers.erase(dataPointer);
blank.data = *dataPointer; blank.data = *dataPointer;
@ -314,7 +320,8 @@ namespace atrip {
// at this point, let us blank the slice // at this point, let us blank the slice
WITH_RANK << "~~~:cl(" << name << ")" WITH_RANK << "~~~:cl(" << name << ")"
<< " freeing up slice " << " freeing up slice "
// TODO: make this possible // TODO: make this possible because of Templates
// TODO: there is a deduction error here
// << " info " << slice.info // << " info " << slice.info
<< "\n"; << "\n";
slice.free(); slice.free();
@ -334,7 +341,7 @@ namespace atrip {
, typename Slice<F>::Name name_ , typename Slice<F>::Name name_
, size_t nSliceBuffers = 4 , size_t nSliceBuffers = 4
) )
: rankMap(paramLength, np) : rankMap(paramLength, np, global_world)
, world(child_world) , world(child_world)
, universe(global_world) , universe(global_world)
, sliceLength(sliceLength_) , sliceLength(sliceLength_)
@ -353,7 +360,7 @@ namespace atrip {
slices slices
= std::vector<Slice<F>>(2 * sliceTypes.size(), { sources[0].size() }); = std::vector<Slice<F>>(2 * sliceTypes.size(), { sources[0].size() });
// TODO: think exactly ^------------------- about this number // TODO: think exactly ^------------------- about this number
// initialize the freePointers with the pointers to the buffers // initialize the freePointers with the pointers to the buffers
std::transform(sliceBuffers.begin(), sliceBuffers.end(), std::transform(sliceBuffers.begin(), sliceBuffers.end(),
@ -421,10 +428,11 @@ namespace atrip {
* \brief Send asynchronously only if the state is Fetch * \brief Send asynchronously only if the state is Fetch
*/ */
void send( size_t otherRank void send( size_t otherRank
, typename Slice<F>::Info const& info , typename Slice<F>::LocalDatabaseElement const& el
, size_t tag) const noexcept { , size_t tag) const noexcept {
MPI_Request request; MPI_Request request;
bool sendData_p = false; bool sendData_p = false;
auto const& info = el.info;
if (info.state == Slice<F>::Fetch) sendData_p = true; if (info.state == Slice<F>::Fetch) sendData_p = true;
// TODO: remove this because I have SelfSufficient // TODO: remove this because I have SelfSufficient
@ -539,8 +547,11 @@ namespace atrip {
[&name](SliceUnion<F> const* s) { [&name](SliceUnion<F> const* s) {
return name == s->name; return name == s->name;
}); });
if (sliceUnionIt == unions.end()) if (sliceUnionIt == unions.end()) {
throw std::domain_error("SliceUnion not found!"); std::stringstream stream;
stream << "SliceUnion(" << name << ") not found!";
throw std::domain_error(stream.str());
}
return **sliceUnionIt; return **sliceUnionIt;
} }

View File

@ -1,75 +1,538 @@
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Tuples][Tuples:1]] // [[file:~/cc4s/src/atrip/complex/atrip.org::*Prolog][Prolog:1]]
#pragma once #pragma once
#include <vector> #include <vector>
#include <array> #include <array>
#include <numeric> #include <numeric>
// TODO: remove some
#include <stdio.h>
#include <math.h>
#include <algorithm>
#include <map>
#include <cassert>
#include <chrono>
#include <climits>
#include <mpi.h>
#include <atrip/Utils.hpp> #include <atrip/Utils.hpp>
#include <atrip/Debug.hpp> #include <atrip/Debug.hpp>
namespace atrip { namespace atrip {
// Prolog:1 ends here
using ABCTuple = std::array<size_t, 3>; // [[file:~/cc4s/src/atrip/complex/atrip.org::*Tuples%20types][Tuples types:1]]
using PartialTuple = std::array<size_t, 2>; using ABCTuple = std::array<size_t, 3>;
using ABCTuples = std::vector<ABCTuple>; using PartialTuple = std::array<size_t, 2>;
using ABCTuples = std::vector<ABCTuple>;
ABCTuples getTuplesList(size_t Nv) { constexpr ABCTuple FAKE_TUPLE = {0, 0, 0};
const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv; constexpr ABCTuple INVALID_TUPLE = {1, 1, 1};
ABCTuples result(n); // Tuples types:1 ends here
size_t u(0);
for (size_t a(0); a < Nv; a++) // [[file:~/cc4s/src/atrip/complex/atrip.org::*Distributing%20the%20tuples][Distributing the tuples:1]]
for (size_t b(a); b < Nv; b++) struct TuplesDistribution {
for (size_t c(b); c < Nv; c++){ virtual ABCTuples getTuples(size_t Nv, MPI_Comm universe) = 0;
if ( a == b && b == c ) continue; virtual bool tupleIsFake(ABCTuple const& t) { return t == FAKE_TUPLE; }
result[u++] = {a, b, c}; };
} // Distributing the tuples:1 ends here
return result; // [[file:~/cc4s/src/atrip/complex/atrip.org::*Node%20information][Node information:1]]
std::vector<std::string> getNodeNames(MPI_Comm comm){
int rank, np;
MPI_Comm_rank(comm, &rank);
MPI_Comm_size(comm, &np);
std::vector<std::string> nodeList(np);
char nodeName[MPI_MAX_PROCESSOR_NAME]
, nodeNames[np*MPI_MAX_PROCESSOR_NAME]
;
std::vector<int> nameLengths(np)
, off(np)
;
int nameLength;
MPI_Get_processor_name(nodeName, &nameLength);
MPI_Allgather(&nameLength,
1,
MPI_INT,
nameLengths.data(),
1,
MPI_INT,
comm);
for (int i(1); i < np; i++)
off[i] = off[i-1] + nameLengths[i-1];
MPI_Allgatherv(nodeName,
nameLengths[rank],
MPI_BYTE,
nodeNames,
nameLengths.data(),
off.data(),
MPI_BYTE,
comm);
for (int i(0); i < np; i++) {
std::string const s(&nodeNames[off[i]], nameLengths[i]);
nodeList[i] = s;
} }
return nodeList;
}
// Node information:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Node%20information][Node information:2]]
struct RankInfo {
const std::string name;
const size_t nodeId;
const size_t globalRank;
const size_t localRank;
const size_t ranksPerNode;
};
std::pair<size_t, size_t> template <typename A>
getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) { A unique(A const &xs) {
auto result = xs;
std::vector<size_t> n_tuples_per_rank(np, tuplesList.size()/np); std::sort(std::begin(result), std::end(result));
const size_t auto const& last = std::unique(std::begin(result), std::end(result));
// how many valid tuples should we still verteilen to nodes result.erase(last, std::end(result));
// since the number of tuples is not divisible by the number of nodes return result;
nRoundRobin = tuplesList.size() % np }
// every node must have the sanme amount of tuples in order for the
// other nodes to receive and send somewhere, therefore
// some nodes will get extra tuples but that are dummy tuples
, nExtraInvalid = (np - nRoundRobin) % np
;
if (nRoundRobin) for (int i = 0; i < np; i++) n_tuples_per_rank[i]++;
#if defined(TODO)
assert( tuplesList.size()
==
( std::accumulate(n_tuples_per_rank.begin(),
n_tuples_per_rank.end(),
0UL,
std::plus<size_t>())
+ nExtraInvalid
));
#endif
WITH_RANK << "nRoundRobin = " << nRoundRobin << "\n";
WITH_RANK << "nExtraInvalid = " << nExtraInvalid << "\n";
WITH_RANK << "ntuples = " << n_tuples_per_rank[rank] << "\n";
auto const& it = n_tuples_per_rank.begin();
return
{ std::accumulate(it, it + rank , 0)
, std::accumulate(it, it + rank + 1, 0)
};
std::vector<RankInfo>
getNodeInfos(std::vector<string> const& nodeNames) {
std::vector<RankInfo> result;
auto const uniqueNames = unique(nodeNames);
auto const index = [&uniqueNames](std::string const& s) {
auto const& it = std::find(uniqueNames.begin(), uniqueNames.end(), s);
return std::distance(uniqueNames.begin(), it);
};
std::vector<size_t> localRanks(uniqueNames.size(), 0);
size_t globalRank = 0;
for (auto const& name: nodeNames) {
const size_t nodeId = index(name);
result.push_back({name,
nodeId,
globalRank++,
localRanks[nodeId]++,
std::count(nodeNames.begin(),
nodeNames.end(),
name)
});
} }
return result;
}
struct ClusterInfo {
const size_t nNodes, np, ranksPerNode;
const std::vector<RankInfo> rankInfos;
};
ClusterInfo
getClusterInfo(MPI_Comm comm) {
auto const names = getNodeNames(comm);
auto const rankInfos = getNodeInfos(names);
return ClusterInfo {
unique(names).size(),
names.size(),
rankInfos[0].ranksPerNode,
rankInfos
};
} }
// Tuples:1 ends here // Node information:2 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Naive%20list][Naive list:1]]
ABCTuples getTuplesList(size_t Nv, size_t rank, size_t np) {
const size_t
// total number of tuples for the problem
n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv
// all ranks should have the same number of tuples_per_rank
, tuples_per_rank = n / np + size_t(n % np != 0)
// start index for the global tuples list
, start = tuples_per_rank * rank
// end index for the global tuples list
, end = tuples_per_rank * (rank + 1)
;
LOG(1,"Atrip") << "tuples_per_rank = " << tuples_per_rank << "\n";
WITH_RANK << "start, end = " << start << ", " << end << "\n";
ABCTuples result(tuples_per_rank, FAKE_TUPLE);
for (size_t a(0), r(0), g(0); a < Nv; a++)
for (size_t b(a); b < Nv; b++)
for (size_t c(b); c < Nv; c++){
if ( a == b && b == c ) continue;
if ( start <= g && g < end) result[r++] = {a, b, c};
g++;
}
return result;
}
// Naive list:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Naive%20list][Naive list:2]]
ABCTuples getAllTuplesList(const size_t Nv) {
const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv;
ABCTuples result(n);
for (size_t a(0), u(0); a < Nv; a++)
for (size_t b(a); b < Nv; b++)
for (size_t c(b); c < Nv; c++){
if ( a == b && b == c ) continue;
result[u++] = {a, b, c};
}
return result;
}
// Naive list:2 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Naive%20list][Naive list:3]]
struct NaiveDistribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
int rank, np;
MPI_Comm_rank(universe, &rank);
MPI_Comm_size(universe, &np);
return getTuplesList(Nv, (size_t)rank, (size_t)np);
}
};
// Naive list:3 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Prolog][Prolog:1]]
namespace group_and_sort {
// Prolog:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Utils][Utils:1]]
// Provides the node on which the slice-element is found
// Right now we distribute the slices in a round robin fashion
// over the different nodes (NOTE: not mpi ranks but nodes)
inline
size_t isOnNode(size_t tuple, size_t nNodes) { return tuple % nNodes; }
// return the node (or all nodes) where the elements of this
// tuple are located
std::vector<size_t> getTupleNodes(ABCTuple const& t, size_t nNodes) {
std::vector<size_t>
nTuple = { isOnNode(t[0], nNodes)
, isOnNode(t[1], nNodes)
, isOnNode(t[2], nNodes)
};
return unique(nTuple);
}
struct Info {
size_t nNodes;
size_t nodeId;
};
// Utils:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Distribution][Distribution:1]]
ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
ABCTuples nodeTuples;
size_t const nNodes(info.nNodes);
std::vector<ABCTuples>
container1d(nNodes)
, container2d(nNodes * nNodes)
, container3d(nNodes * nNodes * nNodes)
;
if (info.nodeId == 0)
std::cout << "\tGoing through all "
<< allTuples.size()
<< " tuples in "
<< nNodes
<< " nodes\n";
// build container-n-d's
for (auto const& t: allTuples) {
// one which node(s) are the tuple elements located...
// put them into the right container
auto const _nodes = getTupleNodes(t, nNodes);
switch (_nodes.size()) {
case 1:
container1d[_nodes[0]].push_back(t);
break;
case 2:
container2d[ _nodes[0]
+ _nodes[1] * nNodes
].push_back(t);
break;
case 3:
container3d[ _nodes[0]
+ _nodes[1] * nNodes
+ _nodes[2] * nNodes * nNodes
].push_back(t);
break;
}
}
if (info.nodeId == 0)
std::cout << "\tBuilding 1-d containers\n";
// DISTRIBUTE 1-d containers
// every tuple which is only located at one node belongs to this node
{
auto const& _tuples = container1d[info.nodeId];
nodeTuples.resize(_tuples.size(), INVALID_TUPLE);
std::copy(_tuples.begin(), _tuples.end(), nodeTuples.begin());
}
if (info.nodeId == 0)
std::cout << "\tBuilding 2-d containers\n";
// DISTRIBUTE 2-d containers
//the tuples which are located at two nodes are half/half given to these nodes
for (size_t yx = 0; yx < container2d.size(); yx++) {
auto const& _tuples = container2d[yx];
const
size_t idx = yx % nNodes
// remeber: yx = idy * nNodes + idx
, idy = yx / nNodes
, n_half = _tuples.size() / 2
, size = nodeTuples.size()
;
size_t nbeg, nend;
if (info.nodeId == idx) {
nbeg = 0 * n_half;
nend = n_half;
} else if (info.nodeId == idy) {
nbeg = 1 * n_half;
nend = _tuples.size();
} else {
// either idx or idy is my node
continue;
}
size_t const nextra = nend - nbeg;
nodeTuples.resize(size + nextra, INVALID_TUPLE);
std::copy(_tuples.begin() + nbeg,
_tuples.begin() + nend,
nodeTuples.begin() + size);
}
if (info.nodeId == 0)
std::cout << "\tBuilding 3-d containers\n";
// DISTRIBUTE 3-d containers
for (size_t zyx = 0; zyx < container3d.size(); zyx++) {
auto const& _tuples = container3d[zyx];
const
size_t idx = zyx % nNodes
, idy = (zyx / nNodes) % nNodes
// remember: zyx = idx + idy * nNodes + idz * nNodes^2
, idz = zyx / nNodes / nNodes
, n_third = _tuples.size() / 3
, size = nodeTuples.size()
;
size_t nbeg, nend;
if (info.nodeId == idx) {
nbeg = 0 * n_third;
nend = 1 * n_third;
} else if (info.nodeId == idy) {
nbeg = 1 * n_third;
nend = 2 * n_third;
} else if (info.nodeId == idz) {
nbeg = 2 * n_third;
nend = _tuples.size();
} else {
// either idx or idy or idz is my node
continue;
}
size_t const nextra = nend - nbeg;
nodeTuples.resize(size + nextra, INVALID_TUPLE);
std::copy(_tuples.begin() + nbeg,
_tuples.begin() + nend,
nodeTuples.begin() + size);
}
if (info.nodeId == 0) std::cout << "\tswapping tuples...\n";
/*
* sort part of group-and-sort algorithm
* every tuple on a given node is sorted in a way that
* the 'home elements' are the fastest index.
* 1:yyy 2:yyn(x) 3:yny(x) 4:ynn(x) 5:nyy 6:nyn(x) 7:nny 8:nnn
*/
for (auto &nt: nodeTuples){
if ( isOnNode(nt[0], nNodes) == info.nodeId ){ // 1234
if ( isOnNode(nt[2], nNodes) != info.nodeId ){ // 24
size_t const x(nt[0]);
nt[0] = nt[2]; // switch first and last
nt[2] = x;
}
else if ( isOnNode(nt[1], nNodes) != info.nodeId){ // 3
size_t const x(nt[0]);
nt[0] = nt[1]; // switch first two
nt[1] = x;
}
} else {
if ( isOnNode(nt[1], nNodes) == info.nodeId // 56
&& isOnNode(nt[2], nNodes) != info.nodeId
) { // 6
size_t const x(nt[1]);
nt[1] = nt[2]; // switch last two
nt[2] = x;
}
}
}
if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
//now we sort the list of tuples
std::sort(nodeTuples.begin(), nodeTuples.end());
if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
// we bring the tuples abc back in the order a<b<c
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
#if ATRIP_DEBUG > 1
if (info.nodeId == 0)
std::cout << "checking for validity of " << nodeTuples.size() << std::endl;
const bool anyInvalid
= std::any_of(nodeTuples.begin(),
nodeTuples.end(),
[](ABCTuple const& t) { return t == INVALID_TUPLE; });
if (anyInvalid) throw "Some tuple is invalid in group-and-sort algorithm";
#endif
if (info.nodeId == 0) std::cout << "\treturning tuples...\n";
return nodeTuples;
}
// Distribution:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Main][Main:1]]
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
int rank, np;
MPI_Comm_rank(universe, &rank);
MPI_Comm_size(universe, &np);
std::vector<ABCTuple> result;
auto const nodeNames(getNodeNames(universe));
size_t const nNodes = unique(nodeNames).size();
auto const nodeInfos = getNodeInfos(nodeNames);
// We want to construct a communicator which only contains of one
// element per node
bool const computeDistribution
= nodeInfos[rank].localRank == 0;
std::vector<ABCTuple>
nodeTuples
= computeDistribution
? specialDistribution(Info{nNodes, nodeInfos[rank].nodeId},
getAllTuplesList(Nv))
: std::vector<ABCTuple>()
;
LOG(1,"Atrip") << "got nodeTuples\n";
// now we have to send the data from **one** rank on each node
// to all others ranks of this node
const
int color = nodeInfos[rank].nodeId
, key = nodeInfos[rank].localRank
;
MPI_Comm INTRA_COMM;
MPI_Comm_split(universe, color, key, &INTRA_COMM);
// Main:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Main][Main:2]]
size_t const
tuplesPerRankLocal
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
+ size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
;
size_t tuplesPerRankGlobal;
MPI_Reduce(&tuplesPerRankLocal,
&tuplesPerRankGlobal,
1,
MPI_UINT64_T,
MPI_MAX,
0,
universe);
MPI_Bcast(&tuplesPerRankGlobal,
1,
MPI_UINT64_T,
0,
universe);
LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
// Main:2 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Main][Main:3]]
size_t const totalTuples
= tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
if (computeDistribution) {
// pad with FAKE_TUPLEs
nodeTuples.insert(nodeTuples.end(),
totalTuples - nodeTuples.size(),
FAKE_TUPLE);
}
// Main:3 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Main][Main:4]]
{
// construct mpi type for abctuple
MPI_Datatype MPI_ABCTUPLE;
MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
MPI_Type_commit(&MPI_ABCTUPLE);
LOG(1,"Atrip") << "scattering tuples \n";
result.resize(tuplesPerRankGlobal);
MPI_Scatter(nodeTuples.data(),
tuplesPerRankGlobal,
MPI_ABCTUPLE,
result.data(),
tuplesPerRankGlobal,
MPI_ABCTUPLE,
0,
INTRA_COMM);
MPI_Type_free(&MPI_ABCTUPLE);
}
// Main:4 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Main][Main:5]]
return result;
}
// Main:5 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Interface][Interface:1]]
struct Distribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
return main(universe, Nv);
}
};
// Interface:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Epilog][Epilog:1]]
} // namespace group_and_sort
// Epilog:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here

View File

@ -59,7 +59,7 @@ namespace atrip {
, child_world , child_world
, global_world , global_world
, Slice<F>::TA , Slice<F>::TA
, 4) { , 6) {
init(sourceTensor); init(sourceTensor);
} }
@ -97,7 +97,7 @@ namespace atrip {
, child_world , child_world
, global_world , global_world
, Slice<F>::VIJKA , Slice<F>::VIJKA
, 4) { , 6) {
init(sourceTensor); init(sourceTensor);
} }

View File

@ -1,4 +1,4 @@
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Utils][Utils:1]] // [[file:~/cc4s/src/atrip/complex/atrip.org::*Prolog][Prolog:1]]
#pragma once #pragma once
#include <sstream> #include <sstream>
#include <string> #include <string>
@ -6,32 +6,41 @@
#include <chrono> #include <chrono>
#include <ctf.hpp> #include <ctf.hpp>
#include <atrip/Debug.hpp>
namespace atrip { namespace atrip {
// Prolog:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Pretty%20printing][Pretty printing:1]]
template <typename T> template <typename T>
std::string pretty_print(T&& value) { std::string pretty_print(T&& value) {
std::stringstream stream; std::stringstream stream;
#if ATRIP_DEBUG > 1 #if ATRIP_DEBUG > 2
dbg::pretty_print(stream, std::forward<T>(value)); dbg::pretty_print(stream, std::forward<T>(value));
#endif #endif
return stream.str(); return stream.str();
} }
// Pretty printing:1 ends here
#define WITH_CHRONO(__chrono, ...) \ // [[file:~/cc4s/src/atrip/complex/atrip.org::*Chrono][Chrono:1]]
__chrono.start(); __VA_ARGS__ __chrono.stop(); #define WITH_CHRONO(__chrono_name, ...) \
Atrip::chrono[__chrono_name].start(); \
__VA_ARGS__ \
Atrip::chrono[__chrono_name].stop();
struct Timer { struct Timer {
using Clock = std::chrono::high_resolution_clock; using Clock = std::chrono::high_resolution_clock;
using Event = std::chrono::time_point<Clock>; using Event = std::chrono::time_point<Clock>;
std::chrono::duration<double> duration; std::chrono::duration<double> duration;
Event _start; Event _start;
inline void start() noexcept { _start = Clock::now(); } inline void start() noexcept { _start = Clock::now(); }
inline void stop() noexcept { duration += Clock::now() - _start; } inline void stop() noexcept { duration += Clock::now() - _start; }
inline void clear() noexcept { duration *= 0; } inline void clear() noexcept { duration *= 0; }
inline double count() const noexcept { return duration.count(); } inline double count() const noexcept { return duration.count(); }
}; };
using Timings = std::map<std::string, Timer>; using Timings = std::map<std::string, Timer>;
// Chrono:1 ends here
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Epilog][Epilog:1]]
} }
// Utils:1 ends here // Epilog:1 ends here

View File

@ -9,8 +9,11 @@
using namespace atrip; using namespace atrip;
bool RankMap<Complex>::RANK_ROUND_ROBIN;
bool RankMap<double>::RANK_ROUND_ROBIN;
int Atrip::rank; int Atrip::rank;
int Atrip::np; int Atrip::np;
Timings Atrip::chrono;
// user printing block // user printing block
IterationDescriptor IterationDescription::descriptor; IterationDescriptor IterationDescription::descriptor;
@ -30,28 +33,35 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
const int rank = Atrip::rank; const int rank = Atrip::rank;
MPI_Comm universe = in.ei->wrld->comm; MPI_Comm universe = in.ei->wrld->comm;
// Timings in seconds ================================================{{{1
Timings chrono{};
const size_t No = in.ei->lens[0]; const size_t No = in.ei->lens[0];
const size_t Nv = in.ea->lens[0]; const size_t Nv = in.ea->lens[0];
LOG(0,"Atrip") << "No: " << No << "\n"; LOG(0,"Atrip") << "No: " << No << "\n";
LOG(0,"Atrip") << "Nv: " << Nv << "\n"; LOG(0,"Atrip") << "Nv: " << Nv << "\n";
LOG(0,"Atrip") << "np: " << np << "\n";
// allocate the three scratches, see piecuch // allocate the three scratches, see piecuch
std::vector<F> Tijk(No*No*No) // doubles only (see piecuch) std::vector<F> Tijk(No*No*No) // doubles only (see piecuch)
, Zijk(No*No*No) // singles + doubles (see piecuch) , Zijk(No*No*No) // singles + doubles (see piecuch)
// we need local copies of the following tensors on every // we need local copies of the following tensors on every
// rank // rank
, epsi(No) , epsi(No)
, epsa(Nv) , epsa(Nv)
, Tai(No * Nv) , Tai(No * Nv)
; ;
in.ei->read_all(epsi.data()); in.ei->read_all(epsi.data());
in.ea->read_all(epsa.data()); in.ea->read_all(epsa.data());
in.Tph->read_all(Tai.data()); in.Tph->read_all(Tai.data());
RankMap<F>::RANK_ROUND_ROBIN = in.rankRoundRobin;
if (RankMap<F>::RANK_ROUND_ROBIN) {
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
} else {
LOG(0,"Atrip")
<< "Doing node > local rank round robin slices distribution" << "\n";
}
// COMMUNICATOR CONSTRUCTION ========================================={{{1 // COMMUNICATOR CONSTRUCTION ========================================={{{1
// //
// Construct a new communicator living only on a single rank // Construct a new communicator living only on a single rank
@ -72,41 +82,49 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
} }
chrono["nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1 // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
LOG(0,"Atrip") << "BUILD NV-SLICES\n"; WITH_CHRONO("nv-slices",
TAPHH<F> taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); LOG(0,"Atrip") << "BUILD NV-SLICES\n";
HHHA<F> hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); TAPHH<F> taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-slices"].stop(); HHHA<F> hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
chrono["nv-nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1 // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; WITH_CHRONO("nv-nv-slices",
ABPH<F> abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
ABHH<F> abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); ABPH<F> abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
TABHH<F> tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); ABHH<F> abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-nv-slices"].stop(); TABHH<F> tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
// all tensors // all tensors
std::vector< SliceUnion<F>* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; std::vector< SliceUnion<F>* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
//CONSTRUCT TUPLE LIST ==============================================={{{1 // get tuples for the current rank
LOG(0,"Atrip") << "BUILD TUPLE LIST\n"; TuplesDistribution *distribution;
const auto tuplesList = std::move(getTuplesList(Nv));
WITH_RANK << "tupList.size() = " << tuplesList.size() << "\n";
// GET ABC INDEX RANGE FOR RANK ======================================{{{1 if (in.tuplesDistribution == Atrip::Input<F>::TuplesDistribution::NAIVE) {
auto abcIndex = getABCRange(np, rank, tuplesList); LOG(0,"Atrip") << "Using the naive distribution\n";
size_t nIterations = abcIndex.second - abcIndex.first; distribution = new NaiveDistribution();
} else {
LOG(0,"Atrip") << "Using the group-and-sort distribution\n";
distribution = new group_and_sort::Distribution();
}
WITH_RANK << "abcIndex = " << pretty_print(abcIndex) << "\n"; LOG(0,"Atrip") << "BUILDING TUPLE LIST\n";
LOG(0,"Atrip") << "#iterations: " << nIterations << "\n"; WITH_CHRONO("tuples:build",
auto const tuplesList = distribution->getTuples(Nv, universe);
)
const size_t nIterations = tuplesList.size();
// first abc {
const ABCTuple firstAbc = tuplesList[abcIndex.first]; const size_t _all_tuples = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv;
LOG(0,"Atrip") << "#iterations: "
<< nIterations
double energy(0.); << "/"
<< nIterations * np
<< "\n";
}
const size_t const size_t
iterationMod = (in.percentageMod > 0) iterationMod = (in.percentageMod > 0)
@ -119,7 +137,9 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
auto const isFakeTuple auto const isFakeTuple
= [&tuplesList](size_t const i) { return i >= tuplesList.size(); }; = [&tuplesList, distribution](size_t const i) {
return distribution->tupleIsFake(tuplesList[i]);
};
using Database = typename Slice<F>::Database; using Database = typename Slice<F>::Database;
@ -127,45 +147,42 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
auto communicateDatabase auto communicateDatabase
= [ &unions = [ &unions
, np , np
, &chrono
] (ABCTuple const& abc, MPI_Comm const& c) -> Database { ] (ABCTuple const& abc, MPI_Comm const& c) -> Database {
chrono["db:comm:type:do"].start(); WITH_CHRONO("db:comm:type:do",
auto MPI_LDB_ELEMENT = Slice<F>::mpi::localDatabaseElement(); auto MPI_LDB_ELEMENT = Slice<F>::mpi::localDatabaseElement();
chrono["db:comm:type:do"].stop(); )
chrono["db:comm:ldb"].start(); WITH_CHRONO("db:comm:ldb",
LocalDatabase ldb; typename Slice<F>::LocalDatabase ldb;
for (auto const& tensor: unions) {
for (auto const& tensor: unions) { auto const& tensorDb = tensor->buildLocalDatabase(abc);
auto const& tensorDb = tensor->buildLocalDatabase(abc); ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); }
} )
chrono["db:comm:ldb"].stop();
Database db(np * ldb.size(), ldb[0]); Database db(np * ldb.size(), ldb[0]);
chrono["oneshot-db:comm:allgather"].start(); WITH_CHRONO("oneshot-db:comm:allgather",
chrono["db:comm:allgather"].start(); WITH_CHRONO("db:comm:allgather",
MPI_Allgather( ldb.data() MPI_Allgather( ldb.data()
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, db.data() , db.data()
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, c); , c);
chrono["db:comm:allgather"].stop(); ))
chrono["oneshot-db:comm:allgather"].stop();
chrono["db:comm:type:free"].start(); WITH_CHRONO("db:comm:type:free",
MPI_Type_free(&MPI_LDB_ELEMENT); MPI_Type_free(&MPI_LDB_ELEMENT);
chrono["db:comm:type:free"].stop(); )
return db; return db;
}; };
auto doIOPhase auto doIOPhase
= [&unions, &rank, &np, &universe, &chrono] (Database const& db) { = [&unions, &rank, &np, &universe] (Database const& db) {
const size_t localDBLength = db.size() / np; const size_t localDBLength = db.size() / np;
@ -201,9 +218,9 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
<< "\n" << "\n"
; ;
chrono["db:io:recv"].start(); WITH_CHRONO("db:io:recv",
u.receive(el.info, recvTag); u.receive(el.info, recvTag);
chrono["db:io:recv"].stop(); )
} // recv } // recv
} }
@ -237,9 +254,9 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
<< "\n" << "\n"
; ;
chrono["db:io:send"].start(); WITH_CHRONO("db:io:send",
u.send(otherRank, el.info, sendTag); u.send(otherRank, el, sendTag);
chrono["db:io:send"].stop(); )
} // send phase } // send phase
@ -257,31 +274,30 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
* double(No) * double(No)
* double(No) * double(No)
* (double(No) + double(Nv)) * (double(No) + double(Nv))
* 2 * 2.0
* 6 * (traits::isComplex<F>() ? 2.0 : 1.0)
* 6.0
/ 1e9 / 1e9
; ;
// START MAIN LOOP ======================================================{{{1 // START MAIN LOOP ======================================================{{{1
for ( size_t i = abcIndex.first, iteration = 1 double energy(0.);
; i < abcIndex.second
for ( size_t i = 0, iteration = 1
; i < tuplesList.size()
; i++, iteration++ ; i++, iteration++
) { ) {
chrono["iterations"].start(); Atrip::chrono["iterations"].start();
// check overhead from chrono over all iterations // check overhead from chrono over all iterations
chrono["start:stop"].start(); chrono["start:stop"].stop(); WITH_CHRONO("start:stop", {})
// check overhead of doing a barrier at the beginning // check overhead of doing a barrier at the beginning
chrono["oneshot-mpi:barrier"].start(); WITH_CHRONO("oneshot-mpi:barrier",
chrono["mpi:barrier"].start(); WITH_CHRONO("mpi:barrier",
// TODO: REMOVE if (in.barrier) MPI_Barrier(universe);
if (in.barrier == 1) ))
MPI_Barrier(universe);
chrono["mpi:barrier"].stop();
chrono["oneshot-mpi:barrier"].stop();
if (iteration % iterationMod == 0 || iteration == iteration1Percent) { if (iteration % iterationMod == 0 || iteration == iteration1Percent) {
@ -289,22 +305,22 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
IterationDescription::descriptor({ IterationDescription::descriptor({
iteration, iteration,
nIterations, nIterations,
chrono["iterations"].count() Atrip::chrono["iterations"].count()
}); });
} }
LOG(0,"Atrip") LOG(0,"Atrip")
<< "iteration " << iteration << "iteration " << iteration
<< " [" << 100 * iteration / nIterations << "%]" << " [" << 100 * iteration / nIterations << "%]"
<< " (" << doublesFlops * iteration / chrono["doubles"].count() << " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
<< "GF)" << "GF)"
<< " (" << doublesFlops * iteration / chrono["iterations"].count() << " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
<< "GF)" << "GF)"
<< " ===========================\n"; << " ===========================\n";
// PRINT TIMINGS // PRINT TIMINGS
if (in.chrono) if (in.chrono)
for (auto const& pair: chrono) for (auto const& pair: Atrip::chrono)
LOG(1, " ") << pair.first << " :: " LOG(1, " ") << pair.first << " :: "
<< pair.second.count() << pair.second.count()
<< std::endl; << std::endl;
@ -314,46 +330,43 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
const ABCTuple abc = isFakeTuple(i) const ABCTuple abc = isFakeTuple(i)
? tuplesList[tuplesList.size() - 1] ? tuplesList[tuplesList.size() - 1]
: tuplesList[i] : tuplesList[i]
, *abcNext = i == (abcIndex.second - 1) , *abcNext = i == (tuplesList.size() - 1)
? nullptr ? nullptr
: isFakeTuple(i + 1)
? &tuplesList[tuplesList.size() - 1]
: &tuplesList[i + 1] : &tuplesList[i + 1]
; ;
chrono["with_rank"].start(); WITH_CHRONO("with_rank",
WITH_RANK << " :it " << iteration WITH_RANK << " :it " << iteration
<< " :abc " << pretty_print(abc) << " :abc " << pretty_print(abc)
<< " :abcN " << " :abcN "
<< (abcNext ? pretty_print(*abcNext) : "None") << (abcNext ? pretty_print(*abcNext) : "None")
<< "\n"; << "\n";
chrono["with_rank"].stop(); )
// COMM FIRST DATABASE ================================================{{{1 // COMM FIRST DATABASE ================================================{{{1
if (i == abcIndex.first) { if (i == 0) {
WITH_RANK << "__first__:first database ............ \n"; WITH_RANK << "__first__:first database ............ \n";
const auto __db = communicateDatabase(abc, universe); const auto db = communicateDatabase(abc, universe);
WITH_RANK << "__first__:first database communicated \n"; WITH_RANK << "__first__:first database communicated \n";
WITH_RANK << "__first__:first database io phase \n"; WITH_RANK << "__first__:first database io phase \n";
doIOPhase(__db); doIOPhase(db);
WITH_RANK << "__first__:first database io phase DONE\n"; WITH_RANK << "__first__:first database io phase DONE\n";
WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
for (auto& u: unions) u->unwrapAll(abc); for (auto& u: unions) u->unwrapAll(abc);
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n"; WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
MPI_Barrier(universe); MPI_Barrier(universe);
} }
// COMM NEXT DATABASE ================================================={{{1 // COMM NEXT DATABASE ================================================={{{1
if (abcNext) { if (abcNext) {
WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
chrono["db:comm"].start(); WITH_CHRONO("db:comm",
//const auto db = communicateDatabase(*abcNext, universe); const auto db = communicateDatabase(*abcNext, universe);
Database db = communicateDatabase(*abcNext, universe); )
chrono["db:comm"].stop(); WITH_CHRONO("db:io",
chrono["db:io"].start(); doIOPhase(db);
doIOPhase(db); )
chrono["db:io"].stop();
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n"; WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
} }
@ -361,63 +374,61 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
OCD_Barrier(universe); OCD_Barrier(universe);
if (!isFakeTuple(i)) { if (!isFakeTuple(i)) {
WITH_RANK << iteration << "-th doubles\n"; WITH_RANK << iteration << "-th doubles\n";
WITH_CHRONO(chrono["oneshot-unwrap"], WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO(chrono["unwrap"], WITH_CHRONO("unwrap",
WITH_CHRONO(chrono["unwrap:doubles"], WITH_CHRONO("unwrap:doubles",
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) { for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
u->unwrapAll(abc); u->unwrapAll(abc);
} }
))) )))
chrono["oneshot-doubles"].start(); WITH_CHRONO("oneshot-doubles",
chrono["doubles"].start(); WITH_CHRONO("doubles",
doublesContribution<F>( abc, (size_t)No, (size_t)Nv doublesContribution<F>( abc, (size_t)No, (size_t)Nv
// -- VABCI // -- VABCI
, abph.unwrapSlice(Slice<F>::AB, abc) , abph.unwrapSlice(Slice<F>::AB, abc)
, abph.unwrapSlice(Slice<F>::AC, abc) , abph.unwrapSlice(Slice<F>::AC, abc)
, abph.unwrapSlice(Slice<F>::BC, abc) , abph.unwrapSlice(Slice<F>::BC, abc)
, abph.unwrapSlice(Slice<F>::BA, abc) , abph.unwrapSlice(Slice<F>::BA, abc)
, abph.unwrapSlice(Slice<F>::CA, abc) , abph.unwrapSlice(Slice<F>::CA, abc)
, abph.unwrapSlice(Slice<F>::CB, abc) , abph.unwrapSlice(Slice<F>::CB, abc)
// -- VHHHA // -- VHHHA
, hhha.unwrapSlice(Slice<F>::A, abc) , hhha.unwrapSlice(Slice<F>::A, abc)
, hhha.unwrapSlice(Slice<F>::B, abc) , hhha.unwrapSlice(Slice<F>::B, abc)
, hhha.unwrapSlice(Slice<F>::C, abc) , hhha.unwrapSlice(Slice<F>::C, abc)
// -- TA // -- TA
, taphh.unwrapSlice(Slice<F>::A, abc) , taphh.unwrapSlice(Slice<F>::A, abc)
, taphh.unwrapSlice(Slice<F>::B, abc) , taphh.unwrapSlice(Slice<F>::B, abc)
, taphh.unwrapSlice(Slice<F>::C, abc) , taphh.unwrapSlice(Slice<F>::C, abc)
// -- TABIJ // -- TABIJ
, tabhh.unwrapSlice(Slice<F>::AB, abc) , tabhh.unwrapSlice(Slice<F>::AB, abc)
, tabhh.unwrapSlice(Slice<F>::AC, abc) , tabhh.unwrapSlice(Slice<F>::AC, abc)
, tabhh.unwrapSlice(Slice<F>::BC, abc) , tabhh.unwrapSlice(Slice<F>::BC, abc)
// -- TIJK // -- TIJK
, Tijk.data() , Tijk.data()
, chrono );
); WITH_RANK << iteration << "-th doubles done\n";
WITH_RANK << iteration << "-th doubles done\n"; ))
chrono["doubles"].stop();
chrono["oneshot-doubles"].stop();
} }
// COMPUTE SINGLES =================================================== {{{1 // COMPUTE SINGLES =================================================== {{{1
OCD_Barrier(universe); OCD_Barrier(universe);
if (!isFakeTuple(i)) { if (!isFakeTuple(i)) {
WITH_CHRONO(chrono["oneshot-unwrap"], WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO(chrono["unwrap"], WITH_CHRONO("unwrap",
WITH_CHRONO(chrono["unwrap:singles"], WITH_CHRONO("unwrap:singles",
abhh.unwrapAll(abc); abhh.unwrapAll(abc);
))) )))
chrono["reorder"].start(); WITH_CHRONO("reorder",
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
chrono["reorder"].stop(); )
chrono["singles"].start(); WITH_CHRONO("singles",
singlesContribution<F>( No, Nv, abc singlesContribution<F>( No, Nv, abc
, Tai.data() , Tai.data()
, abhh.unwrapSlice(Slice<F>::AB, abc) , abhh.unwrapSlice(Slice<F>::AB, abc)
, abhh.unwrapSlice(Slice<F>::AC, abc) , abhh.unwrapSlice(Slice<F>::AC, abc)
, abhh.unwrapSlice(Slice<F>::BC, abc) , abhh.unwrapSlice(Slice<F>::BC, abc)
, Zijk.data()); , Zijk.data());
chrono["singles"].stop(); )
} }
@ -430,12 +441,12 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
if (abc[1] == abc[2]) distinct--; if (abc[1] == abc[2]) distinct--;
const F epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); const F epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
chrono["energy"].start(); WITH_CHRONO("energy",
if ( distinct == 0) if ( distinct == 0)
tupleEnergy = getEnergyDistinct<F>(epsabc, epsi, Tijk, Zijk); tupleEnergy = getEnergyDistinct<F>(epsabc, epsi, Tijk, Zijk);
else else
tupleEnergy = getEnergySame<F>(epsabc, epsi, Tijk, Zijk); tupleEnergy = getEnergySame<F>(epsabc, epsi, Tijk, Zijk);
chrono["energy"].stop(); )
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES) #if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
tupleEnergies[abc] = tupleEnergy; tupleEnergies[abc] = tupleEnergy;
@ -445,6 +456,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
} }
// TODO: remove this
if (isFakeTuple(i)) { if (isFakeTuple(i)) {
// fake iterations should also unwrap whatever they got // fake iterations should also unwrap whatever they got
WITH_RANK << iteration WITH_RANK << iteration
@ -466,7 +478,6 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
// CLEANUP UNIONS ===================================================={{{1 // CLEANUP UNIONS ===================================================={{{1
OCD_Barrier(universe); OCD_Barrier(universe);
if (abcNext) { if (abcNext) {
chrono["gc"].start();
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n"; WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
for (auto& u: unions) { for (auto& u: unions) {
@ -500,12 +511,11 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
} }
chrono["gc"].stop();
} }
WITH_RANK << iteration << "-th cleaning up....... DONE\n"; WITH_RANK << iteration << "-th cleaning up....... DONE\n";
chrono["iterations"].stop(); Atrip::chrono["iterations"].stop();
// ITERATION END ====================================================={{{1 // ITERATION END ====================================================={{{1
} }
@ -543,15 +553,15 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
// PRINT TIMINGS {{{1 // PRINT TIMINGS {{{1
if (in.chrono) if (in.chrono)
for (auto const& pair: chrono) for (auto const& pair: Atrip::chrono)
LOG(0,"atrip:chrono") << pair.first << " " LOG(0,"atrip:chrono") << pair.first << " "
<< pair.second.count() << std::endl; << pair.second.count() << std::endl;
LOG(0, "atrip:flops(doubles)") LOG(0, "atrip:flops(doubles)")
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n"; << nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n";
LOG(0, "atrip:flops(iterations)") LOG(0, "atrip:flops(iterations)")
<< nIterations * doublesFlops / chrono["iterations"].count() << "\n"; << nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n";
// TODO: change the sign in the getEnergy routines // TODO: change the sign in the getEnergy routines
return { - globalEnergy }; return { - globalEnergy };