Update all chronos to use the static chrono

This commit is contained in:
Alejandro Gallo 2021-10-21 15:25:01 +02:00
parent 2823fa3699
commit 79a3f99cb3
13 changed files with 553 additions and 571 deletions

503
atrip.org
View File

@ -748,10 +748,10 @@ The chrono is just a simple wrapper for a high resolution clock
that can be found in the =std::chrono= namespace of the standard library. that can be found in the =std::chrono= namespace of the standard library.
#+begin_src c++ :tangle (atrip-utils-h) #+begin_src c++ :tangle (atrip-utils-h)
#define WITH_CHRONO(__chrono, ...) \ #define WITH_CHRONO(__chrono_name, ...) \
__chrono.start(); \ Atrip::chrono[__chrono_name].start(); \
__VA_ARGS__ \ __VA_ARGS__ \
__chrono.stop(); Atrip::chrono[__chrono_name].stop();
struct Timer { struct Timer {
using Clock = std::chrono::high_resolution_clock; using Clock = std::chrono::high_resolution_clock;
@ -1549,12 +1549,18 @@ struct NaiveDistribution : public TuplesDistribution {
WITH_RANK << "range = " WITH_RANK << "range = "
<< range.first << " -> " << range.second << range.first << " -> " << range.second
<< std::endl; << std::endl;
std::vector<ABCTuple> result(range.second - range.first + 1, FAKE_TUPLE);
WITH_RANK << "number of global tuples = " << all.size() << std::endl;
WITH_RANK << "number of local tuples = " << result.size() << std::endl;
std::vector<ABCTuple> result(range.second - range.first, FAKE_TUPLE); std::copy(range.first >= all.size()
std::copy(all.begin() + range.first, ? all.end()
: all.begin() + range.first,
// --
range.second >= all.size() range.second >= all.size()
? all.end() ? all.end()
: all.begin() + range.first + range.second, : all.begin() + range.first + range.second,
// --
result.begin()); result.begin());
return result; return result;
} }
@ -2467,54 +2473,51 @@ namespace atrip {
, double const* TBChh , double const* TBChh
// -- TIJK // -- TIJK
, double *Tijk , double *Tijk
, atrip::Timings& chrono
) { ) {
auto& t_reorder = chrono["doubles:reorder"];
const size_t a = abc[0], b = abc[1], c = abc[2] const size_t a = abc[0], b = abc[1], c = abc[2]
, NoNo = No*No, NoNv = No*Nv , NoNo = No*No, NoNv = No*Nv
; ;
#if defined(ATRIP_USE_DGEMM) #if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo #define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \ #define REORDER(__II, __JJ, __KK) \
t_reorder.start(); \ WITH_CHRONO("double:reorder", \
for (size_t k = 0; k < No; k++) \ for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \ for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \ for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ Tijk[_IJK_(i, j, k)] \
} \ += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
t_reorder.stop(); } \
#define DGEMM_PARTICLES(__A, __B) \ )
atrip::dgemm_( "T" \ #define DGEMM_PARTICLES(__A, __B) \
, "N" \ atrip::dgemm_("T", \
, (int const*)&NoNo \ "N", \
, (int const*)&No \ (int const*)&NoNo, \
, (int const*)&Nv \ (int const*)&No, \
, &one \ (int const*)&Nv, \
, __A \ &one, \
, (int const*)&Nv \ __A, \
, __B \ (int const*)&Nv, \
, (int const*)&Nv \ __B, \
, &zero \ (int const*)&Nv, \
, _t_buffer.data() \ &zero, \
, (int const*)&NoNo \ _t_buffer.data(), \
); (int const*)&NoNo);
#define DGEMM_HOLES(__A, __B, __TRANSB) \ #define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_( "N" \ atrip::dgemm_("N", \
, __TRANSB \ __TRANSB, \
, (int const*)&NoNo \ (int const*)&NoNo, \
, (int const*)&No \ (int const*)&No, \
, (int const*)&No \ (int const*)&No, \
, &m_one \ &m_one, \
, __A \ __A, \
, (int const*)&NoNo \ (int const*)&NoNo, \
, __B \ __B, \
, (int const*)&No \ (int const*)&No, \
, &zero \ &zero, \
, _t_buffer.data() \ _t_buffer.data(), \
, (int const*)&NoNo \ (int const*)&NoNo);
);
using F = double; using F = double;
const size_t NoNoNo = No*NoNo; const size_t NoNoNo = No*NoNo;
@ -2522,88 +2525,86 @@ namespace atrip {
_t_buffer.reserve(NoNoNo); _t_buffer.reserve(NoNoNo);
F one{1.0}, m_one{-1.0}, zero{0.0}; F one{1.0}, m_one{-1.0}, zero{0.0};
t_reorder.start(); WITH_CHRONO("double:reorder",
for (size_t k = 0; k < NoNoNo; k++) { for (size_t k = 0; k < NoNoNo; k++) {
// zero the Tijk Tijk[k] = 0.0;
Tijk[k] = 0.0; })
}
t_reorder.stop();
chrono["doubles:holes"].start(); WITH_CHRONO("doubles:holes",
{ // Holes part ============================================================ { // Holes part ================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
chrono["doubles:holes:1"].start(); WITH_CHRONO("doubles:holes:1",
DGEMM_HOLES(VhhhC, TABhh, "N") DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j) REORDER(i, k, j)
chrono["doubles:holes:1"].stop(); )
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
chrono["doubles:holes:2"].start(); WITH_CHRONO("doubles:holes:2",
DGEMM_HOLES(VhhhC, TABhh, "T") DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i) REORDER(j, k, i)
chrono["doubles:holes:2"].stop(); )
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
chrono["doubles:holes:3"].start(); WITH_CHRONO("doubles:holes:3",
DGEMM_HOLES(VhhhB, TAChh, "N") DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k) REORDER(i, j, k)
chrono["doubles:holes:3"].stop(); )
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
chrono["doubles:holes:4"].start(); WITH_CHRONO("doubles:holes:4",
DGEMM_HOLES(VhhhB, TAChh, "T") DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i) REORDER(k, j, i)
chrono["doubles:holes:4"].stop(); )
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
chrono["doubles:holes:5"].start(); WITH_CHRONO("doubles:holes:5",
DGEMM_HOLES(VhhhA, TBChh, "N") DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k) REORDER(j, i, k)
chrono["doubles:holes:5"].stop(); )
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
chrono["doubles:holes:6"].start(); WITH_CHRONO("doubles:holes:6",
DGEMM_HOLES(VhhhA, TBChh, "T") DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j) REORDER(k, i, j)
chrono["doubles:holes:6"].stop(); )
} }
chrono["doubles:holes"].stop(); )
chrono["doubles:particles"].start(); WITH_CHRONO("doubles:particles",
{ // Particle part ========================================================= { // Particle part ===========================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0 // TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
chrono["doubles:particles:1"].start(); WITH_CHRONO("doubles:particles:1",
DGEMM_PARTICLES(TAphh, VBCph) DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k) REORDER(i, j, k)
chrono["doubles:particles:1"].stop(); )
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3 // TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
chrono["doubles:particles:2"].start(); WITH_CHRONO("doubles:particles:2",
DGEMM_PARTICLES(TAphh, VCBph) DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j) REORDER(i, k, j)
chrono["doubles:particles:2"].stop(); )
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5 // TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
chrono["doubles:particles:3"].start(); WITH_CHRONO("doubles:particles:3",
DGEMM_PARTICLES(TCphh, VABph) DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j) REORDER(k, i, j)
chrono["doubles:particles:3"].stop(); )
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2 // TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
chrono["doubles:particles:4"].start(); WITH_CHRONO("doubles:particles:4",
DGEMM_PARTICLES(TCphh, VBAph) DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i) REORDER(k, j, i)
chrono["doubles:particles:4"].stop(); )
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1 // TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
chrono["doubles:particles:5"].start(); WITH_CHRONO("doubles:particles:5",
DGEMM_PARTICLES(TBphh, VACph) DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k) REORDER(j, i, k)
chrono["doubles:particles:5"].stop(); )
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4 // TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
chrono["doubles:particles:6"].start(); WITH_CHRONO("doubles:particles:6",
DGEMM_PARTICLES(TBphh, VCAph) DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i) REORDER(j, k, i)
chrono["doubles:particles:6"].stop(); )
} }
chrono["doubles:particles"].stop(); )
#undef REORDER #undef REORDER
#undef DGEMM_HOLES #undef DGEMM_HOLES
#undef DGEMM_PARTICLES #undef DGEMM_PARTICLES
#undef _IJK_ #undef _IJK_
#else #else
for (size_t k = 0; k < No; k++) for (size_t k = 0; k < No; k++)
for (size_t j = 0; j < No; j++) for (size_t j = 0; j < No; j++)
for (size_t i = 0; i < No; i++){ for (size_t i = 0; i < No; i++){
@ -2647,7 +2648,7 @@ namespace atrip {
} }
} }
#endif #endif
} }
} }
@ -2777,9 +2778,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
const int rank = Atrip::rank; const int rank = Atrip::rank;
MPI_Comm universe = in.ei->wrld->comm; MPI_Comm universe = in.ei->wrld->comm;
// Timings in seconds ================================================{{{1
Timings chrono{};
const size_t No = in.ei->lens[0]; const size_t No = in.ei->lens[0];
const size_t Nv = in.ea->lens[0]; const size_t Nv = in.ea->lens[0];
LOG(0,"Atrip") << "No: " << No << "\n"; LOG(0,"Atrip") << "No: " << No << "\n";
@ -2819,20 +2817,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
} }
chrono["nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1 // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
LOG(0,"Atrip") << "BUILD NV-SLICES\n"; WITH_CHRONO("nv-slices",
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); LOG(0,"Atrip") << "BUILD NV-SLICES\n";
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-slices"].stop(); HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
chrono["nv-nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1 // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; WITH_CHRONO("nv-nv-slices",
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-nv-slices"].stop(); TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
// all tensors // all tensors
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
@ -2849,7 +2847,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
} }
LOG(0,"Atrip") << "BUILDING TUPLE LIST\n"; LOG(0,"Atrip") << "BUILDING TUPLE LIST\n";
WITH_CHRONO(chrono["tuples:build"], WITH_CHRONO("tuples:build",
auto const tuplesList = distribution->getTuples(Nv, universe); auto const tuplesList = distribution->getTuples(Nv, universe);
) )
size_t nIterations = tuplesList.size(); size_t nIterations = tuplesList.size();
@ -2872,45 +2870,42 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
auto communicateDatabase auto communicateDatabase
= [ &unions = [ &unions
, np , np
, &chrono
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database { ] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
chrono["db:comm:type:do"].start(); WITH_CHRONO("db:comm:type:do",
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
chrono["db:comm:type:do"].stop(); )
chrono["db:comm:ldb"].start(); WITH_CHRONO("db:comm:ldb",
Slice::LocalDatabase ldb; Slice::LocalDatabase ldb;
for (auto const& tensor: unions) {
for (auto const& tensor: unions) { auto const& tensorDb = tensor->buildLocalDatabase(abc);
auto const& tensorDb = tensor->buildLocalDatabase(abc); ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); }
} )
chrono["db:comm:ldb"].stop();
Slice::Database db(np * ldb.size(), ldb[0]); Slice::Database db(np * ldb.size(), ldb[0]);
chrono["oneshot-db:comm:allgather"].start(); WITH_CHRONO("oneshot-db:comm:allgather",
chrono["db:comm:allgather"].start(); WITH_CHRONO("db:comm:allgather",
MPI_Allgather( ldb.data() MPI_Allgather( ldb.data()
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, db.data() , db.data()
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, c); , c);
chrono["db:comm:allgather"].stop(); ))
chrono["oneshot-db:comm:allgather"].stop();
chrono["db:comm:type:free"].start(); WITH_CHRONO("db:comm:type:free",
MPI_Type_free(&MPI_LDB_ELEMENT); MPI_Type_free(&MPI_LDB_ELEMENT);
chrono["db:comm:type:free"].stop(); )
return db; return db;
}; };
auto doIOPhase auto doIOPhase
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) { = [&unions, &rank, &np, &universe] (Slice::Database const& db) {
const size_t localDBLength = db.size() / np; const size_t localDBLength = db.size() / np;
@ -2946,9 +2941,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n" << "\n"
; ;
chrono["db:io:recv"].start(); WITH_CHRONO("db:io:recv",
u.receive(el.info, recvTag); u.receive(el.info, recvTag);
chrono["db:io:recv"].stop(); )
} // recv } // recv
} }
@ -2982,9 +2977,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n" << "\n"
; ;
chrono["db:io:send"].start(); WITH_CHRONO("db:io:send",
u.send(otherRank, el.info, sendTag); u.send(otherRank, el.info, sendTag);
chrono["db:io:send"].stop(); )
} // send phase } // send phase
@ -3015,14 +3010,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
; i < tuplesList.size() ; i < tuplesList.size()
; i++, iteration++ ; i++, iteration++
) { ) {
chrono["iterations"].start(); Atrip::chrono["iterations"].start();
// check overhead from chrono over all iterations // check overhead from chrono over all iterations
chrono["start:stop"].start(); chrono["start:stop"].stop(); WITH_CHRONO("start:stop", {})
// check overhead of doing a barrier at the beginning // check overhead of doing a barrier at the beginning
WITH_CHRONO(chrono["oneshot-mpi:barrier"], WITH_CHRONO("oneshot-mpi:barrier",
WITH_CHRONO(chrono["mpi:barrier"], WITH_CHRONO("mpi:barrier",
if (in.barrier) MPI_Barrier(universe); if (in.barrier) MPI_Barrier(universe);
)) ))
@ -3030,15 +3025,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
LOG(0,"Atrip") LOG(0,"Atrip")
<< "iteration " << iteration << "iteration " << iteration
<< " [" << 100 * iteration / nIterations << "%]" << " [" << 100 * iteration / nIterations << "%]"
<< " (" << doublesFlops * iteration / chrono["doubles"].count() << " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
<< "GF)" << "GF)"
<< " (" << doublesFlops * iteration / chrono["iterations"].count() << " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
<< "GF)" << "GF)"
<< " ===========================\n"; << " ===========================\n";
// PRINT TIMINGS // PRINT TIMINGS
if (in.chrono) if (in.chrono)
for (auto const& pair: chrono) for (auto const& pair: Atrip::chrono)
LOG(1, " ") << pair.first << " :: " LOG(1, " ") << pair.first << " :: "
<< pair.second.count() << pair.second.count()
<< std::endl; << std::endl;
@ -3055,13 +3050,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
: &tuplesList[i + 1] : &tuplesList[i + 1]
; ;
chrono["with_rank"].start(); WITH_CHRONO("with_rank",
WITH_RANK << " :it " << iteration WITH_RANK << " :it " << iteration
<< " :abc " << pretty_print(abc) << " :abc " << pretty_print(abc)
<< " :abcN " << " :abcN "
<< (abcNext ? pretty_print(*abcNext) : "None") << (abcNext ? pretty_print(*abcNext) : "None")
<< "\n"; << "\n";
chrono["with_rank"].stop(); )
// COMM FIRST DATABASE ================================================{{{1 // COMM FIRST DATABASE ================================================{{{1
@ -3074,19 +3069,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
WITH_RANK << "__first__:first database io phase DONE\n"; WITH_RANK << "__first__:first database io phase DONE\n";
WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
for (auto& u: unions) u->unwrapAll(abc); for (auto& u: unions) u->unwrapAll(abc);
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n"; WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
MPI_Barrier(universe); MPI_Barrier(universe);
} }
// COMM NEXT DATABASE ================================================={{{1 // COMM NEXT DATABASE ================================================={{{1
if (abcNext) { if (abcNext) {
WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
chrono["db:comm"].start(); WITH_CHRONO("db:comm",
const auto db = communicateDatabase(*abcNext, universe); const auto db = communicateDatabase(*abcNext, universe);
chrono["db:comm"].stop(); )
chrono["db:io"].start(); WITH_CHRONO("db:io",
doIOPhase(db); doIOPhase(db);
chrono["db:io"].stop(); )
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n"; WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
} }
@ -3094,63 +3089,61 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
OCD_Barrier(universe); OCD_Barrier(universe);
if (!isFakeTuple(i)) { if (!isFakeTuple(i)) {
WITH_RANK << iteration << "-th doubles\n"; WITH_RANK << iteration << "-th doubles\n";
WITH_CHRONO(chrono["oneshot-unwrap"], WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO(chrono["unwrap"], WITH_CHRONO("unwrap",
WITH_CHRONO(chrono["unwrap:doubles"], WITH_CHRONO("unwrap:doubles",
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) { for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
u->unwrapAll(abc); u->unwrapAll(abc);
} }
))) )))
chrono["oneshot-doubles"].start(); WITH_CHRONO("oneshot-doubles",
chrono["doubles"].start(); WITH_CHRONO("doubles",
doublesContribution( abc, (size_t)No, (size_t)Nv doublesContribution( abc, (size_t)No, (size_t)Nv
// -- VABCI // -- VABCI
, abph.unwrapSlice(Slice::AB, abc) , abph.unwrapSlice(Slice::AB, abc)
, abph.unwrapSlice(Slice::AC, abc) , abph.unwrapSlice(Slice::AC, abc)
, abph.unwrapSlice(Slice::BC, abc) , abph.unwrapSlice(Slice::BC, abc)
, abph.unwrapSlice(Slice::BA, abc) , abph.unwrapSlice(Slice::BA, abc)
, abph.unwrapSlice(Slice::CA, abc) , abph.unwrapSlice(Slice::CA, abc)
, abph.unwrapSlice(Slice::CB, abc) , abph.unwrapSlice(Slice::CB, abc)
// -- VHHHA // -- VHHHA
, hhha.unwrapSlice(Slice::A, abc) , hhha.unwrapSlice(Slice::A, abc)
, hhha.unwrapSlice(Slice::B, abc) , hhha.unwrapSlice(Slice::B, abc)
, hhha.unwrapSlice(Slice::C, abc) , hhha.unwrapSlice(Slice::C, abc)
// -- TA // -- TA
, taphh.unwrapSlice(Slice::A, abc) , taphh.unwrapSlice(Slice::A, abc)
, taphh.unwrapSlice(Slice::B, abc) , taphh.unwrapSlice(Slice::B, abc)
, taphh.unwrapSlice(Slice::C, abc) , taphh.unwrapSlice(Slice::C, abc)
// -- TABIJ // -- TABIJ
, tabhh.unwrapSlice(Slice::AB, abc) , tabhh.unwrapSlice(Slice::AB, abc)
, tabhh.unwrapSlice(Slice::AC, abc) , tabhh.unwrapSlice(Slice::AC, abc)
, tabhh.unwrapSlice(Slice::BC, abc) , tabhh.unwrapSlice(Slice::BC, abc)
// -- TIJK // -- TIJK
, Tijk.data() , Tijk.data()
, chrono );
); WITH_RANK << iteration << "-th doubles done\n";
WITH_RANK << iteration << "-th doubles done\n"; ))
chrono["doubles"].stop();
chrono["oneshot-doubles"].stop();
} }
// COMPUTE SINGLES =================================================== {{{1 // COMPUTE SINGLES =================================================== {{{1
OCD_Barrier(universe); OCD_Barrier(universe);
if (!isFakeTuple(i)) { if (!isFakeTuple(i)) {
WITH_CHRONO(chrono["oneshot-unwrap"], WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO(chrono["unwrap"], WITH_CHRONO("unwrap",
WITH_CHRONO(chrono["unwrap:singles"], WITH_CHRONO("unwrap:singles",
abhh.unwrapAll(abc); abhh.unwrapAll(abc);
))) )))
chrono["reorder"].start(); WITH_CHRONO("reorder",
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
chrono["reorder"].stop(); )
chrono["singles"].start(); WITH_CHRONO("singles",
singlesContribution( No, Nv, abc singlesContribution( No, Nv, abc
, Tai.data() , Tai.data()
, abhh.unwrapSlice(Slice::AB, abc) , abhh.unwrapSlice(Slice::AB, abc)
, abhh.unwrapSlice(Slice::AC, abc) , abhh.unwrapSlice(Slice::AC, abc)
, abhh.unwrapSlice(Slice::BC, abc) , abhh.unwrapSlice(Slice::BC, abc)
, Zijk.data()); , Zijk.data());
chrono["singles"].stop(); )
} }
@ -3163,12 +3156,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
if (abc[1] == abc[2]) distinct--; if (abc[1] == abc[2]) distinct--;
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
chrono["energy"].start(); WITH_CHRONO("energy",
if ( distinct == 0) if ( distinct == 0)
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
else else
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
chrono["energy"].stop(); )
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES) #if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
tupleEnergies[abc] = tupleEnergy; tupleEnergies[abc] = tupleEnergy;
@ -3199,7 +3192,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// CLEANUP UNIONS ===================================================={{{1 // CLEANUP UNIONS ===================================================={{{1
OCD_Barrier(universe); OCD_Barrier(universe);
if (abcNext) { if (abcNext) {
chrono["gc"].start();
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n"; WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
for (auto& u: unions) { for (auto& u: unions) {
@ -3233,12 +3225,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
} }
chrono["gc"].stop();
} }
WITH_RANK << iteration << "-th cleaning up....... DONE\n"; WITH_RANK << iteration << "-th cleaning up....... DONE\n";
chrono["iterations"].stop(); Atrip::chrono["iterations"].stop();
// ITERATION END ====================================================={{{1 // ITERATION END ====================================================={{{1
} }
@ -3276,15 +3267,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// PRINT TIMINGS {{{1 // PRINT TIMINGS {{{1
if (in.chrono) if (in.chrono)
for (auto const& pair: chrono) for (auto const& pair: Atrip::chrono)
LOG(0,"atrip:chrono") << pair.first << " " LOG(0,"atrip:chrono") << pair.first << " "
<< pair.second.count() << std::endl; << pair.second.count() << std::endl;
LOG(0, "atrip:flops(doubles)") LOG(0, "atrip:flops(doubles)")
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n"; << nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n";
LOG(0, "atrip:flops(iterations)") LOG(0, "atrip:flops(iterations)")
<< nIterations * doublesFlops / chrono["iterations"].count() << "\n"; << nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n";
// TODO: change the sign in the getEnergy routines // TODO: change the sign in the getEnergy routines
return { - globalEnergy }; return { - globalEnergy };

View File

@ -1,4 +1,4 @@
// [[file:../atrip.org::*Include header][Include header:1]] // [[file:~/atrip/atrip.org::*Include%20header][Include header:1]]
#pragma once #pragma once
#include <atrip/Atrip.hpp> #include <atrip/Atrip.hpp>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Atrip][Atrip:1]] // [[file:~/atrip/atrip.org::*Atrip][Atrip:1]]
#pragma once #pragma once
#include <sstream> #include <sstream>
#include <string> #include <string>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Blas][Blas:1]] // [[file:~/atrip/atrip.org::*Blas][Blas:1]]
#pragma once #pragma once
namespace atrip { namespace atrip {
extern "C" { extern "C" {

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Debug][Debug:1]] // [[file:~/atrip/atrip.org::*Debug][Debug:1]]
#pragma once #pragma once
#define ATRIP_BENCHMARK #define ATRIP_BENCHMARK
//#define ATRIP_DONT_SLICE //#define ATRIP_DONT_SLICE

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Equations][Equations:1]] // [[file:~/atrip/atrip.org::*Equations][Equations:1]]
#pragma once #pragma once
#include<atrip/Slice.hpp> #include<atrip/Slice.hpp>
@ -150,54 +150,51 @@ namespace atrip {
, double const* TBChh , double const* TBChh
// -- TIJK // -- TIJK
, double *Tijk , double *Tijk
, atrip::Timings& chrono
) { ) {
auto& t_reorder = chrono["doubles:reorder"];
const size_t a = abc[0], b = abc[1], c = abc[2] const size_t a = abc[0], b = abc[1], c = abc[2]
, NoNo = No*No, NoNv = No*Nv , NoNo = No*No, NoNv = No*Nv
; ;
#if defined(ATRIP_USE_DGEMM) #if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo #define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \ #define REORDER(__II, __JJ, __KK) \
t_reorder.start(); \ WITH_CHRONO("double:reorder", \
for (size_t k = 0; k < No; k++) \ for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \ for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \ for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ Tijk[_IJK_(i, j, k)] \
} \ += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
t_reorder.stop(); } \
#define DGEMM_PARTICLES(__A, __B) \ )
atrip::dgemm_( "T" \ #define DGEMM_PARTICLES(__A, __B) \
, "N" \ atrip::dgemm_("T", \
, (int const*)&NoNo \ "N", \
, (int const*)&No \ (int const*)&NoNo, \
, (int const*)&Nv \ (int const*)&No, \
, &one \ (int const*)&Nv, \
, __A \ &one, \
, (int const*)&Nv \ __A, \
, __B \ (int const*)&Nv, \
, (int const*)&Nv \ __B, \
, &zero \ (int const*)&Nv, \
, _t_buffer.data() \ &zero, \
, (int const*)&NoNo \ _t_buffer.data(), \
); (int const*)&NoNo);
#define DGEMM_HOLES(__A, __B, __TRANSB) \ #define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_( "N" \ atrip::dgemm_("N", \
, __TRANSB \ __TRANSB, \
, (int const*)&NoNo \ (int const*)&NoNo, \
, (int const*)&No \ (int const*)&No, \
, (int const*)&No \ (int const*)&No, \
, &m_one \ &m_one, \
, __A \ __A, \
, (int const*)&NoNo \ (int const*)&NoNo, \
, __B \ __B, \
, (int const*)&No \ (int const*)&No, \
, &zero \ &zero, \
, _t_buffer.data() \ _t_buffer.data(), \
, (int const*)&NoNo \ (int const*)&NoNo);
);
using F = double; using F = double;
const size_t NoNoNo = No*NoNo; const size_t NoNoNo = No*NoNo;
@ -205,88 +202,86 @@ namespace atrip {
_t_buffer.reserve(NoNoNo); _t_buffer.reserve(NoNoNo);
F one{1.0}, m_one{-1.0}, zero{0.0}; F one{1.0}, m_one{-1.0}, zero{0.0};
t_reorder.start(); WITH_CHRONO("double:reorder",
for (size_t k = 0; k < NoNoNo; k++) { for (size_t k = 0; k < NoNoNo; k++) {
// zero the Tijk Tijk[k] = 0.0;
Tijk[k] = 0.0; })
}
t_reorder.stop();
chrono["doubles:holes"].start(); WITH_CHRONO("doubles:holes",
{ // Holes part ============================================================ { // Holes part ================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
chrono["doubles:holes:1"].start(); WITH_CHRONO("doubles:holes:1",
DGEMM_HOLES(VhhhC, TABhh, "N") DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j) REORDER(i, k, j)
chrono["doubles:holes:1"].stop(); )
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
chrono["doubles:holes:2"].start(); WITH_CHRONO("doubles:holes:2",
DGEMM_HOLES(VhhhC, TABhh, "T") DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i) REORDER(j, k, i)
chrono["doubles:holes:2"].stop(); )
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
chrono["doubles:holes:3"].start(); WITH_CHRONO("doubles:holes:3",
DGEMM_HOLES(VhhhB, TAChh, "N") DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k) REORDER(i, j, k)
chrono["doubles:holes:3"].stop(); )
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
chrono["doubles:holes:4"].start(); WITH_CHRONO("doubles:holes:4",
DGEMM_HOLES(VhhhB, TAChh, "T") DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i) REORDER(k, j, i)
chrono["doubles:holes:4"].stop(); )
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
chrono["doubles:holes:5"].start(); WITH_CHRONO("doubles:holes:5",
DGEMM_HOLES(VhhhA, TBChh, "N") DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k) REORDER(j, i, k)
chrono["doubles:holes:5"].stop(); )
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
chrono["doubles:holes:6"].start(); WITH_CHRONO("doubles:holes:6",
DGEMM_HOLES(VhhhA, TBChh, "T") DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j) REORDER(k, i, j)
chrono["doubles:holes:6"].stop(); )
} }
chrono["doubles:holes"].stop(); )
chrono["doubles:particles"].start(); WITH_CHRONO("doubles:particles",
{ // Particle part ========================================================= { // Particle part ===========================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0 // TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
chrono["doubles:particles:1"].start(); WITH_CHRONO("doubles:particles:1",
DGEMM_PARTICLES(TAphh, VBCph) DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k) REORDER(i, j, k)
chrono["doubles:particles:1"].stop(); )
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3 // TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
chrono["doubles:particles:2"].start(); WITH_CHRONO("doubles:particles:2",
DGEMM_PARTICLES(TAphh, VCBph) DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j) REORDER(i, k, j)
chrono["doubles:particles:2"].stop(); )
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5 // TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
chrono["doubles:particles:3"].start(); WITH_CHRONO("doubles:particles:3",
DGEMM_PARTICLES(TCphh, VABph) DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j) REORDER(k, i, j)
chrono["doubles:particles:3"].stop(); )
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2 // TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
chrono["doubles:particles:4"].start(); WITH_CHRONO("doubles:particles:4",
DGEMM_PARTICLES(TCphh, VBAph) DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i) REORDER(k, j, i)
chrono["doubles:particles:4"].stop(); )
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1 // TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
chrono["doubles:particles:5"].start(); WITH_CHRONO("doubles:particles:5",
DGEMM_PARTICLES(TBphh, VACph) DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k) REORDER(j, i, k)
chrono["doubles:particles:5"].stop(); )
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4 // TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
chrono["doubles:particles:6"].start(); WITH_CHRONO("doubles:particles:6",
DGEMM_PARTICLES(TBphh, VCAph) DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i) REORDER(j, k, i)
chrono["doubles:particles:6"].stop(); )
} }
chrono["doubles:particles"].stop(); )
#undef REORDER #undef REORDER
#undef DGEMM_HOLES #undef DGEMM_HOLES
#undef DGEMM_PARTICLES #undef DGEMM_PARTICLES
#undef _IJK_ #undef _IJK_
#else #else
for (size_t k = 0; k < No; k++) for (size_t k = 0; k < No; k++)
for (size_t j = 0; j < No; j++) for (size_t j = 0; j < No; j++)
for (size_t i = 0; i < No; i++){ for (size_t i = 0; i < No; i++){
@ -330,7 +325,7 @@ namespace atrip {
} }
} }
#endif #endif
} }
} }

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]] // [[file:~/atrip/atrip.org::*The%20rank%20mapping][The rank mapping:1]]
#pragma once #pragma once
#include <vector> #include <vector>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]] // [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <algorithm> #include <algorithm>
@ -16,11 +16,11 @@ struct Slice {
using F = double; using F = double;
// Prolog:1 ends here // Prolog:1 ends here
// [[file:../../atrip.org::*Location][Location:1]] // [[file:~/atrip/atrip.org::*Location][Location:1]]
struct Location { size_t rank; size_t source; }; struct Location { size_t rank; size_t source; };
// Location:1 ends here // Location:1 ends here
// [[file:../../atrip.org::*Type][Type:1]] // [[file:~/atrip/atrip.org::*Type][Type:1]]
enum Type enum Type
{ A = 10 { A = 10
, B , B
@ -38,7 +38,7 @@ enum Type
}; };
// Type:1 ends here // Type:1 ends here
// [[file:../../atrip.org::*State][State:1]] // [[file:~/atrip/atrip.org::*State][State:1]]
enum State { enum State {
Fetch = 0, Fetch = 0,
Dispatched = 2, Dispatched = 2,
@ -49,7 +49,7 @@ enum State {
}; };
// State:1 ends here // State:1 ends here
// [[file:../../atrip.org::*The Info structure][The Info structure:1]] // [[file:~/atrip/atrip.org::*The%20Info%20structure][The Info structure:1]]
struct Info { struct Info {
// which part of a,b,c the slice holds // which part of a,b,c the slice holds
PartialTuple tuple; PartialTuple tuple;
@ -73,7 +73,7 @@ struct Info {
using Ty_x_Tu = std::pair< Type, PartialTuple >; using Ty_x_Tu = std::pair< Type, PartialTuple >;
// The Info structure:1 ends here // The Info structure:1 ends here
// [[file:../../atrip.org::*Name][Name:1]] // [[file:~/atrip/atrip.org::*Name][Name:1]]
enum Name enum Name
{ TA = 100 { TA = 100
, VIJKA = 101 , VIJKA = 101
@ -83,19 +83,19 @@ enum Name
}; };
// Name:1 ends here // Name:1 ends here
// [[file:../../atrip.org::*Database][Database:1]] // [[file:~/atrip/atrip.org::*Database][Database:1]]
struct LocalDatabaseElement { struct LocalDatabaseElement {
Slice::Name name; Slice::Name name;
Slice::Info info; Slice::Info info;
}; };
// Database:1 ends here // Database:1 ends here
// [[file:../../atrip.org::*Database][Database:2]] // [[file:~/atrip/atrip.org::*Database][Database:2]]
using LocalDatabase = std::vector<LocalDatabaseElement>; using LocalDatabase = std::vector<LocalDatabaseElement>;
using Database = LocalDatabase; using Database = LocalDatabase;
// Database:2 ends here // Database:2 ends here
// [[file:../../atrip.org::*MPI Types][MPI Types:1]] // [[file:~/atrip/atrip.org::*MPI%20Types][MPI Types:1]]
struct mpi { struct mpi {
static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) { static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) {
@ -185,7 +185,7 @@ struct mpi {
}; };
// MPI Types:1 ends here // MPI Types:1 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:1]] // [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:1]]
static static
PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) { PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
switch (sliceType) { switch (sliceType) {
@ -203,7 +203,7 @@ PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
} }
// Static utilities:1 ends here // Static utilities:1 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:2]] // [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:2]]
static std::vector<Slice*> hasRecycledReferencingToIt static std::vector<Slice*> hasRecycledReferencingToIt
( std::vector<Slice> &slices ( std::vector<Slice> &slices
, Info const& info , Info const& info
@ -220,7 +220,7 @@ static std::vector<Slice*> hasRecycledReferencingToIt
} }
// Static utilities:2 ends here // Static utilities:2 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:3]] // [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:3]]
static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) { static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
const auto sliceIt const auto sliceIt
= std::find_if(slices.begin(), slices.end(), = std::find_if(slices.begin(), slices.end(),
@ -236,7 +236,7 @@ static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
} }
// Static utilities:3 ends here // Static utilities:3 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:4]] // [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:4]]
static Slice& static Slice&
findRecycledSource (std::vector<Slice> &slices, Slice::Info info) { findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
const auto sliceIt const auto sliceIt
@ -262,7 +262,7 @@ findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
} }
// Static utilities:4 ends here // Static utilities:4 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:5]] // [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:5]]
static Slice& findByTypeAbc static Slice& findByTypeAbc
( std::vector<Slice> &slices ( std::vector<Slice> &slices
, Slice::Type type , Slice::Type type
@ -292,7 +292,7 @@ static Slice& findByTypeAbc
} }
// Static utilities:5 ends here // Static utilities:5 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:6]] // [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:6]]
static Slice& findByInfo(std::vector<Slice> &slices, static Slice& findByInfo(std::vector<Slice> &slices,
Slice::Info const& info) { Slice::Info const& info) {
const auto sliceIt const auto sliceIt
@ -315,30 +315,30 @@ static Slice& findByInfo(std::vector<Slice> &slices,
} }
// Static utilities:6 ends here // Static utilities:6 ends here
// [[file:../../atrip.org::*Attributes][Attributes:1]] // [[file:~/atrip/atrip.org::*Attributes][Attributes:1]]
Info info; Info info;
// Attributes:1 ends here // Attributes:1 ends here
// [[file:../../atrip.org::*Attributes][Attributes:2]] // [[file:~/atrip/atrip.org::*Attributes][Attributes:2]]
F *data; F *data;
// Attributes:2 ends here // Attributes:2 ends here
// [[file:../../atrip.org::*Attributes][Attributes:3]] // [[file:~/atrip/atrip.org::*Attributes][Attributes:3]]
MPI_Request request; MPI_Request request;
// Attributes:3 ends here // Attributes:3 ends here
// [[file:../../atrip.org::*Attributes][Attributes:4]] // [[file:~/atrip/atrip.org::*Attributes][Attributes:4]]
const size_t size; const size_t size;
// Attributes:4 ends here // Attributes:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:1]] // [[file:~/atrip/atrip.org::*Member%20functions][Member functions:1]]
void markReady() noexcept { void markReady() noexcept {
info.state = Ready; info.state = Ready;
info.recycling = Blank; info.recycling = Blank;
} }
// Member functions:1 ends here // Member functions:1 ends here
// [[file:../../atrip.org::*Member functions][Member functions:2]] // [[file:~/atrip/atrip.org::*Member%20functions][Member functions:2]]
bool isUnwrapped() const noexcept { bool isUnwrapped() const noexcept {
return info.state == Ready return info.state == Ready
|| info.state == SelfSufficient || info.state == SelfSufficient
@ -346,7 +346,7 @@ bool isUnwrapped() const noexcept {
} }
// Member functions:2 ends here // Member functions:2 ends here
// [[file:../../atrip.org::*Member functions][Member functions:3]] // [[file:~/atrip/atrip.org::*Member%20functions][Member functions:3]]
bool isUnwrappable() const noexcept { bool isUnwrappable() const noexcept {
return isUnwrapped() return isUnwrapped()
|| info.state == Recycled || info.state == Recycled
@ -379,7 +379,7 @@ inline bool isFree() const noexcept {
} }
// Member functions:3 ends here // Member functions:3 ends here
// [[file:../../atrip.org::*Member functions][Member functions:4]] // [[file:~/atrip/atrip.org::*Member%20functions][Member functions:4]]
inline bool isRecyclable() const noexcept { inline bool isRecyclable() const noexcept {
return ( info.state == Dispatched return ( info.state == Dispatched
|| info.state == Ready || info.state == Ready
@ -390,7 +390,7 @@ inline bool isRecyclable() const noexcept {
} }
// Member functions:4 ends here // Member functions:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:5]] // [[file:~/atrip/atrip.org::*Member%20functions][Member functions:5]]
inline bool hasValidDataPointer() const noexcept { inline bool hasValidDataPointer() const noexcept {
return data != nullptr return data != nullptr
&& info.state != Acceptor && info.state != Acceptor
@ -399,7 +399,7 @@ inline bool hasValidDataPointer() const noexcept {
} }
// Member functions:5 ends here // Member functions:5 ends here
// [[file:../../atrip.org::*Member functions][Member functions:6]] // [[file:~/atrip/atrip.org::*Member%20functions][Member functions:6]]
void unwrapAndMarkReady() { void unwrapAndMarkReady() {
if (info.state == Ready) return; if (info.state == Ready) return;
if (info.state != Dispatched) if (info.state != Dispatched)
@ -431,7 +431,7 @@ void unwrapAndMarkReady() {
} }
// Member functions:6 ends here // Member functions:6 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]] // [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
Slice(size_t size_) Slice(size_t size_)
: info({}) : info({})
, data(nullptr) , data(nullptr)
@ -442,7 +442,7 @@ Slice(size_t size_)
}; // struct Slice }; // struct Slice
// Epilog:1 ends here // Epilog:1 ends here
// [[file:../../atrip.org::*Debug][Debug:1]] // [[file:~/atrip/atrip.org::*Debug][Debug:1]]
std::ostream& operator<<(std::ostream& out, Slice::Location const& v) { std::ostream& operator<<(std::ostream& out, Slice::Location const& v) {
// TODO: remove me // TODO: remove me
out << "{.r(" << v.rank << "), .s(" << v.source << ")};"; out << "{.r(" << v.rank << "), .s(" << v.source << ")};";

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*The slice union][The slice union:1]] // [[file:~/atrip/atrip.org::*The%20slice%20union][The slice union:1]]
#pragma once #pragma once
#include <atrip/Debug.hpp> #include <atrip/Debug.hpp>
#include <atrip/Slice.hpp> #include <atrip/Slice.hpp>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]] // [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once #pragma once
#include <vector> #include <vector>
@ -21,7 +21,7 @@
namespace atrip { namespace atrip {
// Prolog:1 ends here // Prolog:1 ends here
// [[file:../../atrip.org::*Tuples types][Tuples types:1]] // [[file:~/atrip/atrip.org::*Tuples%20types][Tuples types:1]]
using ABCTuple = std::array<size_t, 3>; using ABCTuple = std::array<size_t, 3>;
using PartialTuple = std::array<size_t, 2>; using PartialTuple = std::array<size_t, 2>;
using ABCTuples = std::vector<ABCTuple>; using ABCTuples = std::vector<ABCTuple>;
@ -29,14 +29,14 @@ using ABCTuples = std::vector<ABCTuple>;
constexpr ABCTuple FAKE_TUPLE = {0, 0, 0}; constexpr ABCTuple FAKE_TUPLE = {0, 0, 0};
// Tuples types:1 ends here // Tuples types:1 ends here
// [[file:../../atrip.org::*Distributing the tuples][Distributing the tuples:1]] // [[file:~/atrip/atrip.org::*Distributing%20the%20tuples][Distributing the tuples:1]]
struct TuplesDistribution { struct TuplesDistribution {
virtual ABCTuples getTuples(size_t Nv, MPI_Comm universe) = 0; virtual ABCTuples getTuples(size_t Nv, MPI_Comm universe) = 0;
virtual bool tupleIsFake(ABCTuple const& t) { return t == FAKE_TUPLE; } virtual bool tupleIsFake(ABCTuple const& t) { return t == FAKE_TUPLE; }
}; };
// Distributing the tuples:1 ends here // Distributing the tuples:1 ends here
// [[file:../../atrip.org::*Naive list][Naive list:1]] // [[file:~/atrip/atrip.org::*Naive%20list][Naive list:1]]
ABCTuples getTuplesList(size_t Nv) { ABCTuples getTuplesList(size_t Nv) {
const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv; const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv;
ABCTuples result(n); ABCTuples result(n);
@ -54,7 +54,7 @@ ABCTuples getTuplesList(size_t Nv) {
} }
// Naive list:1 ends here // Naive list:1 ends here
// [[file:../../atrip.org::*Naive list][Naive list:2]] // [[file:~/atrip/atrip.org::*Naive%20list][Naive list:2]]
std::pair<size_t, size_t> std::pair<size_t, size_t>
getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) { getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
@ -91,7 +91,7 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
} }
// Naive list:2 ends here // Naive list:2 ends here
// [[file:../../atrip.org::*Naive list][Naive list:3]] // [[file:~/atrip/atrip.org::*Naive%20list][Naive list:3]]
struct NaiveDistribution : public TuplesDistribution { struct NaiveDistribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override { ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
int rank, np; int rank, np;
@ -113,23 +113,29 @@ struct NaiveDistribution : public TuplesDistribution {
WITH_RANK << "range = " WITH_RANK << "range = "
<< range.first << " -> " << range.second << range.first << " -> " << range.second
<< std::endl; << std::endl;
std::vector<ABCTuple> result(range.second - range.first + 1, FAKE_TUPLE);
WITH_RANK << "number of global tuples = " << all.size() << std::endl;
WITH_RANK << "number of local tuples = " << result.size() << std::endl;
std::vector<ABCTuple> result(range.second - range.first, FAKE_TUPLE); std::copy(range.first >= all.size()
std::copy(all.begin() + range.first, ? all.end()
: all.begin() + range.first,
// --
range.second >= all.size() range.second >= all.size()
? all.end() ? all.end()
: all.begin() + range.first + range.second, : all.begin() + range.first + range.second,
// --
result.begin()); result.begin());
return result; return result;
} }
}; };
// Naive list:3 ends here // Naive list:3 ends here
// [[file:../../atrip.org::*Prolog][Prolog:1]] // [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
namespace group_and_sort { namespace group_and_sort {
// Prolog:1 ends here // Prolog:1 ends here
// [[file:../../atrip.org::*Node information][Node information:1]] // [[file:~/atrip/atrip.org::*Node%20information][Node information:1]]
std::vector<std::string> getNodeNames(MPI_Comm comm){ std::vector<std::string> getNodeNames(MPI_Comm comm){
int rank, np; int rank, np;
MPI_Comm_rank(comm, &rank); MPI_Comm_rank(comm, &rank);
@ -169,7 +175,7 @@ std::vector<std::string> getNodeNames(MPI_Comm comm){
} }
// Node information:1 ends here // Node information:1 ends here
// [[file:../../atrip.org::*Node information][Node information:2]] // [[file:~/atrip/atrip.org::*Node%20information][Node information:2]]
struct RankInfo { struct RankInfo {
const std::string name; const std::string name;
const size_t nodeId; const size_t nodeId;
@ -208,7 +214,7 @@ getNodeInfos(std::vector<string> const& nodeNames) {
} }
// Node information:2 ends here // Node information:2 ends here
// [[file:../../atrip.org::*Utils][Utils:1]] // [[file:~/atrip/atrip.org::*Utils][Utils:1]]
// Provides the node on which the slice-element is found // Provides the node on which the slice-element is found
// Right now we distribute the slices in a round robin fashion // Right now we distribute the slices in a round robin fashion
// over the different nodes (NOTE: not mpi ranks but nodes) // over the different nodes (NOTE: not mpi ranks but nodes)
@ -239,7 +245,7 @@ std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
} }
// Utils:1 ends here // Utils:1 ends here
// [[file:../../atrip.org::*Distribution][Distribution:1]] // [[file:~/atrip/atrip.org::*Distribution][Distribution:1]]
std::vector<ABCTuple> std::vector<ABCTuple>
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) { specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
@ -380,7 +386,7 @@ std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
} }
// Distribution:1 ends here // Distribution:1 ends here
// [[file:../../atrip.org::*Main][Main:1]] // [[file:~/atrip/atrip.org::*Main][Main:1]]
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) { std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
int rank, np; int rank, np;
@ -432,7 +438,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
MPI_Comm_split(universe, color, key, &INTRA_COMM); MPI_Comm_split(universe, color, key, &INTRA_COMM);
// Main:1 ends here // Main:1 ends here
// [[file:../../atrip.org::*Main][Main:2]] // [[file:~/atrip/atrip.org::*Main][Main:2]]
const size_t const size_t
tuplesPerRankLocal tuplesPerRankLocal
= nodeTuples.size() / nodeInfos[rank].ranksPerNode = nodeTuples.size() / nodeInfos[rank].ranksPerNode
@ -456,7 +462,7 @@ MPI_Bcast(&tuplesPerRankGlobal,
universe); universe);
// Main:2 ends here // Main:2 ends here
// [[file:../../atrip.org::*Main][Main:3]] // [[file:~/atrip/atrip.org::*Main][Main:3]]
size_t const totalTuplesLocal size_t const totalTuplesLocal
= tuplesPerRankLocal = tuplesPerRankLocal
* nodeInfos[rank].ranksPerNode; * nodeInfos[rank].ranksPerNode;
@ -467,7 +473,7 @@ if (makeDistribution)
FAKE_TUPLE); FAKE_TUPLE);
// Main:3 ends here // Main:3 ends here
// [[file:../../atrip.org::*Main][Main:4]] // [[file:~/atrip/atrip.org::*Main][Main:4]]
{ {
std::vector<int> const std::vector<int> const
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal); sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
@ -503,7 +509,7 @@ if (makeDistribution)
} }
// Main:4 ends here // Main:4 ends here
// [[file:../../atrip.org::*Main][Main:5]] // [[file:~/atrip/atrip.org::*Main][Main:5]]
result.insert(result.end(), result.insert(result.end(),
tuplesPerRankGlobal - result.size(), tuplesPerRankGlobal - result.size(),
FAKE_TUPLE); FAKE_TUPLE);
@ -513,7 +519,7 @@ result.insert(result.end(),
} }
// Main:5 ends here // Main:5 ends here
// [[file:../../atrip.org::*Interface][Interface:1]] // [[file:~/atrip/atrip.org::*Interface][Interface:1]]
struct Distribution : public TuplesDistribution { struct Distribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override { ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
return main(universe, Nv); return main(universe, Nv);
@ -521,10 +527,10 @@ struct Distribution : public TuplesDistribution {
}; };
// Interface:1 ends here // Interface:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]] // [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
} // namespace group_and_sort } // namespace group_and_sort
// Epilog:1 ends here // Epilog:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]] // [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
} }
// Epilog:1 ends here // Epilog:1 ends here

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Unions][Unions:1]] // [[file:~/atrip/atrip.org::*Unions][Unions:1]]
#pragma once #pragma once
#include <atrip/SliceUnion.hpp> #include <atrip/SliceUnion.hpp>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]] // [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once #pragma once
#include <sstream> #include <sstream>
#include <string> #include <string>
@ -11,7 +11,7 @@
namespace atrip { namespace atrip {
// Prolog:1 ends here // Prolog:1 ends here
// [[file:../../atrip.org::*Pretty printing][Pretty printing:1]] // [[file:~/atrip/atrip.org::*Pretty%20printing][Pretty printing:1]]
template <typename T> template <typename T>
std::string pretty_print(T&& value) { std::string pretty_print(T&& value) {
std::stringstream stream; std::stringstream stream;
@ -22,11 +22,11 @@ template <typename T>
} }
// Pretty printing:1 ends here // Pretty printing:1 ends here
// [[file:../../atrip.org::*Chrono][Chrono:1]] // [[file:~/atrip/atrip.org::*Chrono][Chrono:1]]
#define WITH_CHRONO(__chrono, ...) \ #define WITH_CHRONO(__chrono_name, ...) \
__chrono.start(); \ Atrip::chrono[__chrono_name].start(); \
__VA_ARGS__ \ __VA_ARGS__ \
__chrono.stop(); Atrip::chrono[__chrono_name].stop();
struct Timer { struct Timer {
using Clock = std::chrono::high_resolution_clock; using Clock = std::chrono::high_resolution_clock;
@ -41,6 +41,6 @@ struct Timer {
using Timings = std::map<std::string, Timer>; using Timings = std::map<std::string, Timer>;
// Chrono:1 ends here // Chrono:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]] // [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
} }
// Epilog:1 ends here // Epilog:1 ends here

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Main][Main:1]] // [[file:~/atrip/atrip.org::*Main][Main:1]]
#include <iomanip> #include <iomanip>
#include <atrip/Atrip.hpp> #include <atrip/Atrip.hpp>
@ -24,9 +24,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
const int rank = Atrip::rank; const int rank = Atrip::rank;
MPI_Comm universe = in.ei->wrld->comm; MPI_Comm universe = in.ei->wrld->comm;
// Timings in seconds ================================================{{{1
Timings chrono{};
const size_t No = in.ei->lens[0]; const size_t No = in.ei->lens[0];
const size_t Nv = in.ea->lens[0]; const size_t Nv = in.ea->lens[0];
LOG(0,"Atrip") << "No: " << No << "\n"; LOG(0,"Atrip") << "No: " << No << "\n";
@ -66,20 +63,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
} }
chrono["nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1 // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
LOG(0,"Atrip") << "BUILD NV-SLICES\n"; WITH_CHRONO("nv-slices",
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); LOG(0,"Atrip") << "BUILD NV-SLICES\n";
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-slices"].stop(); HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
chrono["nv-nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1 // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; WITH_CHRONO("nv-nv-slices",
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-nv-slices"].stop(); TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
// all tensors // all tensors
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
@ -96,7 +93,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
} }
LOG(0,"Atrip") << "BUILDING TUPLE LIST\n"; LOG(0,"Atrip") << "BUILDING TUPLE LIST\n";
WITH_CHRONO(chrono["tuples:build"], WITH_CHRONO("tuples:build",
auto const tuplesList = distribution->getTuples(Nv, universe); auto const tuplesList = distribution->getTuples(Nv, universe);
) )
size_t nIterations = tuplesList.size(); size_t nIterations = tuplesList.size();
@ -119,45 +116,42 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
auto communicateDatabase auto communicateDatabase
= [ &unions = [ &unions
, np , np
, &chrono
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database { ] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
chrono["db:comm:type:do"].start(); WITH_CHRONO("db:comm:type:do",
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
chrono["db:comm:type:do"].stop(); )
chrono["db:comm:ldb"].start(); WITH_CHRONO("db:comm:ldb",
Slice::LocalDatabase ldb; Slice::LocalDatabase ldb;
for (auto const& tensor: unions) {
for (auto const& tensor: unions) { auto const& tensorDb = tensor->buildLocalDatabase(abc);
auto const& tensorDb = tensor->buildLocalDatabase(abc); ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); }
} )
chrono["db:comm:ldb"].stop();
Slice::Database db(np * ldb.size(), ldb[0]); Slice::Database db(np * ldb.size(), ldb[0]);
chrono["oneshot-db:comm:allgather"].start(); WITH_CHRONO("oneshot-db:comm:allgather",
chrono["db:comm:allgather"].start(); WITH_CHRONO("db:comm:allgather",
MPI_Allgather( ldb.data() MPI_Allgather( ldb.data()
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, db.data() , db.data()
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, c); , c);
chrono["db:comm:allgather"].stop(); ))
chrono["oneshot-db:comm:allgather"].stop();
chrono["db:comm:type:free"].start(); WITH_CHRONO("db:comm:type:free",
MPI_Type_free(&MPI_LDB_ELEMENT); MPI_Type_free(&MPI_LDB_ELEMENT);
chrono["db:comm:type:free"].stop(); )
return db; return db;
}; };
auto doIOPhase auto doIOPhase
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) { = [&unions, &rank, &np, &universe] (Slice::Database const& db) {
const size_t localDBLength = db.size() / np; const size_t localDBLength = db.size() / np;
@ -193,9 +187,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n" << "\n"
; ;
chrono["db:io:recv"].start(); WITH_CHRONO("db:io:recv",
u.receive(el.info, recvTag); u.receive(el.info, recvTag);
chrono["db:io:recv"].stop(); )
} // recv } // recv
} }
@ -229,9 +223,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n" << "\n"
; ;
chrono["db:io:send"].start(); WITH_CHRONO("db:io:send",
u.send(otherRank, el.info, sendTag); u.send(otherRank, el.info, sendTag);
chrono["db:io:send"].stop(); )
} // send phase } // send phase
@ -262,14 +256,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
; i < tuplesList.size() ; i < tuplesList.size()
; i++, iteration++ ; i++, iteration++
) { ) {
chrono["iterations"].start(); Atrip::chrono["iterations"].start();
// check overhead from chrono over all iterations // check overhead from chrono over all iterations
chrono["start:stop"].start(); chrono["start:stop"].stop(); WITH_CHRONO("start:stop", {})
// check overhead of doing a barrier at the beginning // check overhead of doing a barrier at the beginning
WITH_CHRONO(chrono["oneshot-mpi:barrier"], WITH_CHRONO("oneshot-mpi:barrier",
WITH_CHRONO(chrono["mpi:barrier"], WITH_CHRONO("mpi:barrier",
if (in.barrier) MPI_Barrier(universe); if (in.barrier) MPI_Barrier(universe);
)) ))
@ -277,15 +271,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
LOG(0,"Atrip") LOG(0,"Atrip")
<< "iteration " << iteration << "iteration " << iteration
<< " [" << 100 * iteration / nIterations << "%]" << " [" << 100 * iteration / nIterations << "%]"
<< " (" << doublesFlops * iteration / chrono["doubles"].count() << " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
<< "GF)" << "GF)"
<< " (" << doublesFlops * iteration / chrono["iterations"].count() << " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
<< "GF)" << "GF)"
<< " ===========================\n"; << " ===========================\n";
// PRINT TIMINGS // PRINT TIMINGS
if (in.chrono) if (in.chrono)
for (auto const& pair: chrono) for (auto const& pair: Atrip::chrono)
LOG(1, " ") << pair.first << " :: " LOG(1, " ") << pair.first << " :: "
<< pair.second.count() << pair.second.count()
<< std::endl; << std::endl;
@ -302,13 +296,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
: &tuplesList[i + 1] : &tuplesList[i + 1]
; ;
chrono["with_rank"].start(); WITH_CHRONO("with_rank",
WITH_RANK << " :it " << iteration WITH_RANK << " :it " << iteration
<< " :abc " << pretty_print(abc) << " :abc " << pretty_print(abc)
<< " :abcN " << " :abcN "
<< (abcNext ? pretty_print(*abcNext) : "None") << (abcNext ? pretty_print(*abcNext) : "None")
<< "\n"; << "\n";
chrono["with_rank"].stop(); )
// COMM FIRST DATABASE ================================================{{{1 // COMM FIRST DATABASE ================================================{{{1
@ -321,19 +315,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
WITH_RANK << "__first__:first database io phase DONE\n"; WITH_RANK << "__first__:first database io phase DONE\n";
WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
for (auto& u: unions) u->unwrapAll(abc); for (auto& u: unions) u->unwrapAll(abc);
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n"; WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
MPI_Barrier(universe); MPI_Barrier(universe);
} }
// COMM NEXT DATABASE ================================================={{{1 // COMM NEXT DATABASE ================================================={{{1
if (abcNext) { if (abcNext) {
WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
chrono["db:comm"].start(); WITH_CHRONO("db:comm",
const auto db = communicateDatabase(*abcNext, universe); const auto db = communicateDatabase(*abcNext, universe);
chrono["db:comm"].stop(); )
chrono["db:io"].start(); WITH_CHRONO("db:io",
doIOPhase(db); doIOPhase(db);
chrono["db:io"].stop(); )
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n"; WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
} }
@ -341,63 +335,61 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
OCD_Barrier(universe); OCD_Barrier(universe);
if (!isFakeTuple(i)) { if (!isFakeTuple(i)) {
WITH_RANK << iteration << "-th doubles\n"; WITH_RANK << iteration << "-th doubles\n";
WITH_CHRONO(chrono["oneshot-unwrap"], WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO(chrono["unwrap"], WITH_CHRONO("unwrap",
WITH_CHRONO(chrono["unwrap:doubles"], WITH_CHRONO("unwrap:doubles",
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) { for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
u->unwrapAll(abc); u->unwrapAll(abc);
} }
))) )))
chrono["oneshot-doubles"].start(); WITH_CHRONO("oneshot-doubles",
chrono["doubles"].start(); WITH_CHRONO("doubles",
doublesContribution( abc, (size_t)No, (size_t)Nv doublesContribution( abc, (size_t)No, (size_t)Nv
// -- VABCI // -- VABCI
, abph.unwrapSlice(Slice::AB, abc) , abph.unwrapSlice(Slice::AB, abc)
, abph.unwrapSlice(Slice::AC, abc) , abph.unwrapSlice(Slice::AC, abc)
, abph.unwrapSlice(Slice::BC, abc) , abph.unwrapSlice(Slice::BC, abc)
, abph.unwrapSlice(Slice::BA, abc) , abph.unwrapSlice(Slice::BA, abc)
, abph.unwrapSlice(Slice::CA, abc) , abph.unwrapSlice(Slice::CA, abc)
, abph.unwrapSlice(Slice::CB, abc) , abph.unwrapSlice(Slice::CB, abc)
// -- VHHHA // -- VHHHA
, hhha.unwrapSlice(Slice::A, abc) , hhha.unwrapSlice(Slice::A, abc)
, hhha.unwrapSlice(Slice::B, abc) , hhha.unwrapSlice(Slice::B, abc)
, hhha.unwrapSlice(Slice::C, abc) , hhha.unwrapSlice(Slice::C, abc)
// -- TA // -- TA
, taphh.unwrapSlice(Slice::A, abc) , taphh.unwrapSlice(Slice::A, abc)
, taphh.unwrapSlice(Slice::B, abc) , taphh.unwrapSlice(Slice::B, abc)
, taphh.unwrapSlice(Slice::C, abc) , taphh.unwrapSlice(Slice::C, abc)
// -- TABIJ // -- TABIJ
, tabhh.unwrapSlice(Slice::AB, abc) , tabhh.unwrapSlice(Slice::AB, abc)
, tabhh.unwrapSlice(Slice::AC, abc) , tabhh.unwrapSlice(Slice::AC, abc)
, tabhh.unwrapSlice(Slice::BC, abc) , tabhh.unwrapSlice(Slice::BC, abc)
// -- TIJK // -- TIJK
, Tijk.data() , Tijk.data()
, chrono );
); WITH_RANK << iteration << "-th doubles done\n";
WITH_RANK << iteration << "-th doubles done\n"; ))
chrono["doubles"].stop();
chrono["oneshot-doubles"].stop();
} }
// COMPUTE SINGLES =================================================== {{{1 // COMPUTE SINGLES =================================================== {{{1
OCD_Barrier(universe); OCD_Barrier(universe);
if (!isFakeTuple(i)) { if (!isFakeTuple(i)) {
WITH_CHRONO(chrono["oneshot-unwrap"], WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO(chrono["unwrap"], WITH_CHRONO("unwrap",
WITH_CHRONO(chrono["unwrap:singles"], WITH_CHRONO("unwrap:singles",
abhh.unwrapAll(abc); abhh.unwrapAll(abc);
))) )))
chrono["reorder"].start(); WITH_CHRONO("reorder",
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
chrono["reorder"].stop(); )
chrono["singles"].start(); WITH_CHRONO("singles",
singlesContribution( No, Nv, abc singlesContribution( No, Nv, abc
, Tai.data() , Tai.data()
, abhh.unwrapSlice(Slice::AB, abc) , abhh.unwrapSlice(Slice::AB, abc)
, abhh.unwrapSlice(Slice::AC, abc) , abhh.unwrapSlice(Slice::AC, abc)
, abhh.unwrapSlice(Slice::BC, abc) , abhh.unwrapSlice(Slice::BC, abc)
, Zijk.data()); , Zijk.data());
chrono["singles"].stop(); )
} }
@ -410,12 +402,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
if (abc[1] == abc[2]) distinct--; if (abc[1] == abc[2]) distinct--;
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
chrono["energy"].start(); WITH_CHRONO("energy",
if ( distinct == 0) if ( distinct == 0)
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
else else
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
chrono["energy"].stop(); )
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES) #if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
tupleEnergies[abc] = tupleEnergy; tupleEnergies[abc] = tupleEnergy;
@ -446,7 +438,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// CLEANUP UNIONS ===================================================={{{1 // CLEANUP UNIONS ===================================================={{{1
OCD_Barrier(universe); OCD_Barrier(universe);
if (abcNext) { if (abcNext) {
chrono["gc"].start();
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n"; WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
for (auto& u: unions) { for (auto& u: unions) {
@ -480,12 +471,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
} }
chrono["gc"].stop();
} }
WITH_RANK << iteration << "-th cleaning up....... DONE\n"; WITH_RANK << iteration << "-th cleaning up....... DONE\n";
chrono["iterations"].stop(); Atrip::chrono["iterations"].stop();
// ITERATION END ====================================================={{{1 // ITERATION END ====================================================={{{1
} }
@ -523,15 +513,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// PRINT TIMINGS {{{1 // PRINT TIMINGS {{{1
if (in.chrono) if (in.chrono)
for (auto const& pair: chrono) for (auto const& pair: Atrip::chrono)
LOG(0,"atrip:chrono") << pair.first << " " LOG(0,"atrip:chrono") << pair.first << " "
<< pair.second.count() << std::endl; << pair.second.count() << std::endl;
LOG(0, "atrip:flops(doubles)") LOG(0, "atrip:flops(doubles)")
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n"; << nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n";
LOG(0, "atrip:flops(iterations)") LOG(0, "atrip:flops(iterations)")
<< nIterations * doublesFlops / chrono["iterations"].count() << "\n"; << nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n";
// TODO: change the sign in the getEnergy routines // TODO: change the sign in the getEnergy routines
return { - globalEnergy }; return { - globalEnergy };