Update all chronos to use the static chrono

This commit is contained in:
Alejandro Gallo 2021-10-21 15:25:01 +02:00
parent 2823fa3699
commit 79a3f99cb3
13 changed files with 553 additions and 571 deletions

503
atrip.org
View File

@ -748,10 +748,10 @@ The chrono is just a simple wrapper for a high resolution clock
that can be found in the =std::chrono= namespace of the standard library.
#+begin_src c++ :tangle (atrip-utils-h)
#define WITH_CHRONO(__chrono, ...) \
__chrono.start(); \
#define WITH_CHRONO(__chrono_name, ...) \
Atrip::chrono[__chrono_name].start(); \
__VA_ARGS__ \
__chrono.stop();
Atrip::chrono[__chrono_name].stop();
struct Timer {
using Clock = std::chrono::high_resolution_clock;
@ -1549,12 +1549,18 @@ struct NaiveDistribution : public TuplesDistribution {
WITH_RANK << "range = "
<< range.first << " -> " << range.second
<< std::endl;
std::vector<ABCTuple> result(range.second - range.first + 1, FAKE_TUPLE);
WITH_RANK << "number of global tuples = " << all.size() << std::endl;
WITH_RANK << "number of local tuples = " << result.size() << std::endl;
std::vector<ABCTuple> result(range.second - range.first, FAKE_TUPLE);
std::copy(all.begin() + range.first,
std::copy(range.first >= all.size()
? all.end()
: all.begin() + range.first,
// --
range.second >= all.size()
? all.end()
: all.begin() + range.first + range.second,
? all.end()
: all.begin() + range.first + range.second,
// --
result.begin());
return result;
}
@ -2467,54 +2473,51 @@ namespace atrip {
, double const* TBChh
// -- TIJK
, double *Tijk
, atrip::Timings& chrono
) {
auto& t_reorder = chrono["doubles:reorder"];
const size_t a = abc[0], b = abc[1], c = abc[2]
, NoNo = No*No, NoNv = No*Nv
;
#if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \
t_reorder.start(); \
for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
} \
t_reorder.stop();
#define DGEMM_PARTICLES(__A, __B) \
atrip::dgemm_( "T" \
, "N" \
, (int const*)&NoNo \
, (int const*)&No \
, (int const*)&Nv \
, &one \
, __A \
, (int const*)&Nv \
, __B \
, (int const*)&Nv \
, &zero \
, _t_buffer.data() \
, (int const*)&NoNo \
);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_( "N" \
, __TRANSB \
, (int const*)&NoNo \
, (int const*)&No \
, (int const*)&No \
, &m_one \
, __A \
, (int const*)&NoNo \
, __B \
, (int const*)&No \
, &zero \
, _t_buffer.data() \
, (int const*)&NoNo \
);
#if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \
WITH_CHRONO("double:reorder", \
for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] \
+= _t_buffer[_IJK_(__II, __JJ, __KK)]; \
} \
)
#define DGEMM_PARTICLES(__A, __B) \
atrip::dgemm_("T", \
"N", \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&Nv, \
&one, \
__A, \
(int const*)&Nv, \
__B, \
(int const*)&Nv, \
&zero, \
_t_buffer.data(), \
(int const*)&NoNo);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_("N", \
__TRANSB, \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&No, \
&m_one, \
__A, \
(int const*)&NoNo, \
__B, \
(int const*)&No, \
&zero, \
_t_buffer.data(), \
(int const*)&NoNo);
using F = double;
const size_t NoNoNo = No*NoNo;
@ -2522,88 +2525,86 @@ namespace atrip {
_t_buffer.reserve(NoNoNo);
F one{1.0}, m_one{-1.0}, zero{0.0};
t_reorder.start();
for (size_t k = 0; k < NoNoNo; k++) {
// zero the Tijk
Tijk[k] = 0.0;
}
t_reorder.stop();
WITH_CHRONO("double:reorder",
for (size_t k = 0; k < NoNoNo; k++) {
Tijk[k] = 0.0;
})
chrono["doubles:holes"].start();
{ // Holes part ============================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
chrono["doubles:holes:1"].start();
DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j)
chrono["doubles:holes:1"].stop();
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
chrono["doubles:holes:2"].start();
DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i)
chrono["doubles:holes:2"].stop();
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
chrono["doubles:holes:3"].start();
DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k)
chrono["doubles:holes:3"].stop();
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
chrono["doubles:holes:4"].start();
DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i)
chrono["doubles:holes:4"].stop();
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
chrono["doubles:holes:5"].start();
DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k)
chrono["doubles:holes:5"].stop();
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
chrono["doubles:holes:6"].start();
DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j)
chrono["doubles:holes:6"].stop();
}
chrono["doubles:holes"].stop();
WITH_CHRONO("doubles:holes",
{ // Holes part ================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
WITH_CHRONO("doubles:holes:1",
DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j)
)
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
WITH_CHRONO("doubles:holes:2",
DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i)
)
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
WITH_CHRONO("doubles:holes:3",
DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k)
)
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
WITH_CHRONO("doubles:holes:4",
DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i)
)
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
WITH_CHRONO("doubles:holes:5",
DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k)
)
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
WITH_CHRONO("doubles:holes:6",
DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j)
)
}
)
chrono["doubles:particles"].start();
{ // Particle part =========================================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
chrono["doubles:particles:1"].start();
DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k)
chrono["doubles:particles:1"].stop();
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
chrono["doubles:particles:2"].start();
DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j)
chrono["doubles:particles:2"].stop();
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
chrono["doubles:particles:3"].start();
DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j)
chrono["doubles:particles:3"].stop();
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
chrono["doubles:particles:4"].start();
DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i)
chrono["doubles:particles:4"].stop();
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
chrono["doubles:particles:5"].start();
DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k)
chrono["doubles:particles:5"].stop();
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
chrono["doubles:particles:6"].start();
DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i)
chrono["doubles:particles:6"].stop();
}
chrono["doubles:particles"].stop();
WITH_CHRONO("doubles:particles",
{ // Particle part ===========================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
WITH_CHRONO("doubles:particles:1",
DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k)
)
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
WITH_CHRONO("doubles:particles:2",
DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j)
)
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
WITH_CHRONO("doubles:particles:3",
DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j)
)
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
WITH_CHRONO("doubles:particles:4",
DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i)
)
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
WITH_CHRONO("doubles:particles:5",
DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k)
)
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
WITH_CHRONO("doubles:particles:6",
DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i)
)
}
)
#undef REORDER
#undef DGEMM_HOLES
#undef DGEMM_PARTICLES
#undef _IJK_
#else
#undef REORDER
#undef DGEMM_HOLES
#undef DGEMM_PARTICLES
#undef _IJK_
#else
for (size_t k = 0; k < No; k++)
for (size_t j = 0; j < No; j++)
for (size_t i = 0; i < No; i++){
@ -2647,7 +2648,7 @@ namespace atrip {
}
}
#endif
#endif
}
}
@ -2777,9 +2778,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
const int rank = Atrip::rank;
MPI_Comm universe = in.ei->wrld->comm;
// Timings in seconds ================================================{{{1
Timings chrono{};
const size_t No = in.ei->lens[0];
const size_t Nv = in.ea->lens[0];
LOG(0,"Atrip") << "No: " << No << "\n";
@ -2819,20 +2817,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
}
chrono["nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-slices"].stop();
WITH_CHRONO("nv-slices",
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
chrono["nv-nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-nv-slices"].stop();
WITH_CHRONO("nv-nv-slices",
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
// all tensors
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
@ -2849,7 +2847,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
}
LOG(0,"Atrip") << "BUILDING TUPLE LIST\n";
WITH_CHRONO(chrono["tuples:build"],
WITH_CHRONO("tuples:build",
auto const tuplesList = distribution->getTuples(Nv, universe);
)
size_t nIterations = tuplesList.size();
@ -2872,45 +2870,42 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
auto communicateDatabase
= [ &unions
, np
, &chrono
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
chrono["db:comm:type:do"].start();
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
chrono["db:comm:type:do"].stop();
WITH_CHRONO("db:comm:type:do",
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
)
chrono["db:comm:ldb"].start();
Slice::LocalDatabase ldb;
for (auto const& tensor: unions) {
auto const& tensorDb = tensor->buildLocalDatabase(abc);
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
}
chrono["db:comm:ldb"].stop();
WITH_CHRONO("db:comm:ldb",
Slice::LocalDatabase ldb;
for (auto const& tensor: unions) {
auto const& tensorDb = tensor->buildLocalDatabase(abc);
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
}
)
Slice::Database db(np * ldb.size(), ldb[0]);
chrono["oneshot-db:comm:allgather"].start();
chrono["db:comm:allgather"].start();
MPI_Allgather( ldb.data()
, ldb.size()
, MPI_LDB_ELEMENT
, db.data()
, ldb.size()
, MPI_LDB_ELEMENT
, c);
chrono["db:comm:allgather"].stop();
chrono["oneshot-db:comm:allgather"].stop();
WITH_CHRONO("oneshot-db:comm:allgather",
WITH_CHRONO("db:comm:allgather",
MPI_Allgather( ldb.data()
, ldb.size()
, MPI_LDB_ELEMENT
, db.data()
, ldb.size()
, MPI_LDB_ELEMENT
, c);
))
chrono["db:comm:type:free"].start();
MPI_Type_free(&MPI_LDB_ELEMENT);
chrono["db:comm:type:free"].stop();
WITH_CHRONO("db:comm:type:free",
MPI_Type_free(&MPI_LDB_ELEMENT);
)
return db;
};
auto doIOPhase
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
= [&unions, &rank, &np, &universe] (Slice::Database const& db) {
const size_t localDBLength = db.size() / np;
@ -2946,9 +2941,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n"
;
chrono["db:io:recv"].start();
u.receive(el.info, recvTag);
chrono["db:io:recv"].stop();
WITH_CHRONO("db:io:recv",
u.receive(el.info, recvTag);
)
} // recv
}
@ -2982,9 +2977,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n"
;
chrono["db:io:send"].start();
u.send(otherRank, el.info, sendTag);
chrono["db:io:send"].stop();
WITH_CHRONO("db:io:send",
u.send(otherRank, el.info, sendTag);
)
} // send phase
@ -3015,14 +3010,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
; i < tuplesList.size()
; i++, iteration++
) {
chrono["iterations"].start();
Atrip::chrono["iterations"].start();
// check overhead from chrono over all iterations
chrono["start:stop"].start(); chrono["start:stop"].stop();
WITH_CHRONO("start:stop", {})
// check overhead of doing a barrier at the beginning
WITH_CHRONO(chrono["oneshot-mpi:barrier"],
WITH_CHRONO(chrono["mpi:barrier"],
WITH_CHRONO("oneshot-mpi:barrier",
WITH_CHRONO("mpi:barrier",
if (in.barrier) MPI_Barrier(universe);
))
@ -3030,15 +3025,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
LOG(0,"Atrip")
<< "iteration " << iteration
<< " [" << 100 * iteration / nIterations << "%]"
<< " (" << doublesFlops * iteration / chrono["doubles"].count()
<< " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
<< "GF)"
<< " (" << doublesFlops * iteration / chrono["iterations"].count()
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
<< "GF)"
<< " ===========================\n";
// PRINT TIMINGS
if (in.chrono)
for (auto const& pair: chrono)
for (auto const& pair: Atrip::chrono)
LOG(1, " ") << pair.first << " :: "
<< pair.second.count()
<< std::endl;
@ -3055,13 +3050,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
: &tuplesList[i + 1]
;
chrono["with_rank"].start();
WITH_RANK << " :it " << iteration
<< " :abc " << pretty_print(abc)
<< " :abcN "
<< (abcNext ? pretty_print(*abcNext) : "None")
<< "\n";
chrono["with_rank"].stop();
WITH_CHRONO("with_rank",
WITH_RANK << " :it " << iteration
<< " :abc " << pretty_print(abc)
<< " :abcN "
<< (abcNext ? pretty_print(*abcNext) : "None")
<< "\n";
)
// COMM FIRST DATABASE ================================================{{{1
@ -3074,19 +3069,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
WITH_RANK << "__first__:first database io phase DONE\n";
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
for (auto& u: unions) u->unwrapAll(abc);
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n";
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
MPI_Barrier(universe);
}
// COMM NEXT DATABASE ================================================={{{1
if (abcNext) {
WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
chrono["db:comm"].start();
const auto db = communicateDatabase(*abcNext, universe);
chrono["db:comm"].stop();
chrono["db:io"].start();
doIOPhase(db);
chrono["db:io"].stop();
WITH_CHRONO("db:comm",
const auto db = communicateDatabase(*abcNext, universe);
)
WITH_CHRONO("db:io",
doIOPhase(db);
)
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
}
@ -3094,63 +3089,61 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
OCD_Barrier(universe);
if (!isFakeTuple(i)) {
WITH_RANK << iteration << "-th doubles\n";
WITH_CHRONO(chrono["oneshot-unwrap"],
WITH_CHRONO(chrono["unwrap"],
WITH_CHRONO(chrono["unwrap:doubles"],
WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO("unwrap",
WITH_CHRONO("unwrap:doubles",
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
u->unwrapAll(abc);
}
)))
chrono["oneshot-doubles"].start();
chrono["doubles"].start();
doublesContribution( abc, (size_t)No, (size_t)Nv
// -- VABCI
, abph.unwrapSlice(Slice::AB, abc)
, abph.unwrapSlice(Slice::AC, abc)
, abph.unwrapSlice(Slice::BC, abc)
, abph.unwrapSlice(Slice::BA, abc)
, abph.unwrapSlice(Slice::CA, abc)
, abph.unwrapSlice(Slice::CB, abc)
// -- VHHHA
, hhha.unwrapSlice(Slice::A, abc)
, hhha.unwrapSlice(Slice::B, abc)
, hhha.unwrapSlice(Slice::C, abc)
// -- TA
, taphh.unwrapSlice(Slice::A, abc)
, taphh.unwrapSlice(Slice::B, abc)
, taphh.unwrapSlice(Slice::C, abc)
// -- TABIJ
, tabhh.unwrapSlice(Slice::AB, abc)
, tabhh.unwrapSlice(Slice::AC, abc)
, tabhh.unwrapSlice(Slice::BC, abc)
// -- TIJK
, Tijk.data()
, chrono
);
WITH_RANK << iteration << "-th doubles done\n";
chrono["doubles"].stop();
chrono["oneshot-doubles"].stop();
WITH_CHRONO("oneshot-doubles",
WITH_CHRONO("doubles",
doublesContribution( abc, (size_t)No, (size_t)Nv
// -- VABCI
, abph.unwrapSlice(Slice::AB, abc)
, abph.unwrapSlice(Slice::AC, abc)
, abph.unwrapSlice(Slice::BC, abc)
, abph.unwrapSlice(Slice::BA, abc)
, abph.unwrapSlice(Slice::CA, abc)
, abph.unwrapSlice(Slice::CB, abc)
// -- VHHHA
, hhha.unwrapSlice(Slice::A, abc)
, hhha.unwrapSlice(Slice::B, abc)
, hhha.unwrapSlice(Slice::C, abc)
// -- TA
, taphh.unwrapSlice(Slice::A, abc)
, taphh.unwrapSlice(Slice::B, abc)
, taphh.unwrapSlice(Slice::C, abc)
// -- TABIJ
, tabhh.unwrapSlice(Slice::AB, abc)
, tabhh.unwrapSlice(Slice::AC, abc)
, tabhh.unwrapSlice(Slice::BC, abc)
// -- TIJK
, Tijk.data()
);
WITH_RANK << iteration << "-th doubles done\n";
))
}
// COMPUTE SINGLES =================================================== {{{1
OCD_Barrier(universe);
if (!isFakeTuple(i)) {
WITH_CHRONO(chrono["oneshot-unwrap"],
WITH_CHRONO(chrono["unwrap"],
WITH_CHRONO(chrono["unwrap:singles"],
WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO("unwrap",
WITH_CHRONO("unwrap:singles",
abhh.unwrapAll(abc);
)))
chrono["reorder"].start();
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
chrono["reorder"].stop();
chrono["singles"].start();
singlesContribution( No, Nv, abc
, Tai.data()
, abhh.unwrapSlice(Slice::AB, abc)
, abhh.unwrapSlice(Slice::AC, abc)
, abhh.unwrapSlice(Slice::BC, abc)
, Zijk.data());
chrono["singles"].stop();
WITH_CHRONO("reorder",
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
)
WITH_CHRONO("singles",
singlesContribution( No, Nv, abc
, Tai.data()
, abhh.unwrapSlice(Slice::AB, abc)
, abhh.unwrapSlice(Slice::AC, abc)
, abhh.unwrapSlice(Slice::BC, abc)
, Zijk.data());
)
}
@ -3163,12 +3156,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
if (abc[1] == abc[2]) distinct--;
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
chrono["energy"].start();
if ( distinct == 0)
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
else
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
chrono["energy"].stop();
WITH_CHRONO("energy",
if ( distinct == 0)
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
else
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
)
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
tupleEnergies[abc] = tupleEnergy;
@ -3199,7 +3192,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// CLEANUP UNIONS ===================================================={{{1
OCD_Barrier(universe);
if (abcNext) {
chrono["gc"].start();
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
for (auto& u: unions) {
@ -3233,12 +3225,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
}
chrono["gc"].stop();
}
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
chrono["iterations"].stop();
Atrip::chrono["iterations"].stop();
// ITERATION END ====================================================={{{1
}
@ -3276,15 +3267,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// PRINT TIMINGS {{{1
if (in.chrono)
for (auto const& pair: chrono)
for (auto const& pair: Atrip::chrono)
LOG(0,"atrip:chrono") << pair.first << " "
<< pair.second.count() << std::endl;
LOG(0, "atrip:flops(doubles)")
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n";
<< nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n";
LOG(0, "atrip:flops(iterations)")
<< nIterations * doublesFlops / chrono["iterations"].count() << "\n";
<< nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n";
// TODO: change the sign in the getEnergy routines
return { - globalEnergy };

View File

@ -1,4 +1,4 @@
// [[file:../atrip.org::*Include header][Include header:1]]
// [[file:~/atrip/atrip.org::*Include%20header][Include header:1]]
#pragma once
#include <atrip/Atrip.hpp>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Atrip][Atrip:1]]
// [[file:~/atrip/atrip.org::*Atrip][Atrip:1]]
#pragma once
#include <sstream>
#include <string>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Blas][Blas:1]]
// [[file:~/atrip/atrip.org::*Blas][Blas:1]]
#pragma once
namespace atrip {
extern "C" {

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Debug][Debug:1]]
// [[file:~/atrip/atrip.org::*Debug][Debug:1]]
#pragma once
#define ATRIP_BENCHMARK
//#define ATRIP_DONT_SLICE

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Equations][Equations:1]]
// [[file:~/atrip/atrip.org::*Equations][Equations:1]]
#pragma once
#include<atrip/Slice.hpp>
@ -150,54 +150,51 @@ namespace atrip {
, double const* TBChh
// -- TIJK
, double *Tijk
, atrip::Timings& chrono
) {
auto& t_reorder = chrono["doubles:reorder"];
const size_t a = abc[0], b = abc[1], c = abc[2]
, NoNo = No*No, NoNv = No*Nv
;
#if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \
t_reorder.start(); \
for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
} \
t_reorder.stop();
#define DGEMM_PARTICLES(__A, __B) \
atrip::dgemm_( "T" \
, "N" \
, (int const*)&NoNo \
, (int const*)&No \
, (int const*)&Nv \
, &one \
, __A \
, (int const*)&Nv \
, __B \
, (int const*)&Nv \
, &zero \
, _t_buffer.data() \
, (int const*)&NoNo \
);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_( "N" \
, __TRANSB \
, (int const*)&NoNo \
, (int const*)&No \
, (int const*)&No \
, &m_one \
, __A \
, (int const*)&NoNo \
, __B \
, (int const*)&No \
, &zero \
, _t_buffer.data() \
, (int const*)&NoNo \
);
#if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \
WITH_CHRONO("double:reorder", \
for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] \
+= _t_buffer[_IJK_(__II, __JJ, __KK)]; \
} \
)
#define DGEMM_PARTICLES(__A, __B) \
atrip::dgemm_("T", \
"N", \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&Nv, \
&one, \
__A, \
(int const*)&Nv, \
__B, \
(int const*)&Nv, \
&zero, \
_t_buffer.data(), \
(int const*)&NoNo);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_("N", \
__TRANSB, \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&No, \
&m_one, \
__A, \
(int const*)&NoNo, \
__B, \
(int const*)&No, \
&zero, \
_t_buffer.data(), \
(int const*)&NoNo);
using F = double;
const size_t NoNoNo = No*NoNo;
@ -205,88 +202,86 @@ namespace atrip {
_t_buffer.reserve(NoNoNo);
F one{1.0}, m_one{-1.0}, zero{0.0};
t_reorder.start();
for (size_t k = 0; k < NoNoNo; k++) {
// zero the Tijk
Tijk[k] = 0.0;
}
t_reorder.stop();
WITH_CHRONO("double:reorder",
for (size_t k = 0; k < NoNoNo; k++) {
Tijk[k] = 0.0;
})
chrono["doubles:holes"].start();
{ // Holes part ============================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
chrono["doubles:holes:1"].start();
DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j)
chrono["doubles:holes:1"].stop();
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
chrono["doubles:holes:2"].start();
DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i)
chrono["doubles:holes:2"].stop();
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
chrono["doubles:holes:3"].start();
DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k)
chrono["doubles:holes:3"].stop();
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
chrono["doubles:holes:4"].start();
DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i)
chrono["doubles:holes:4"].stop();
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
chrono["doubles:holes:5"].start();
DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k)
chrono["doubles:holes:5"].stop();
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
chrono["doubles:holes:6"].start();
DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j)
chrono["doubles:holes:6"].stop();
}
chrono["doubles:holes"].stop();
WITH_CHRONO("doubles:holes",
{ // Holes part ================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
WITH_CHRONO("doubles:holes:1",
DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j)
)
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
WITH_CHRONO("doubles:holes:2",
DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i)
)
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
WITH_CHRONO("doubles:holes:3",
DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k)
)
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
WITH_CHRONO("doubles:holes:4",
DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i)
)
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
WITH_CHRONO("doubles:holes:5",
DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k)
)
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
WITH_CHRONO("doubles:holes:6",
DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j)
)
}
)
chrono["doubles:particles"].start();
{ // Particle part =========================================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
chrono["doubles:particles:1"].start();
DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k)
chrono["doubles:particles:1"].stop();
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
chrono["doubles:particles:2"].start();
DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j)
chrono["doubles:particles:2"].stop();
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
chrono["doubles:particles:3"].start();
DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j)
chrono["doubles:particles:3"].stop();
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
chrono["doubles:particles:4"].start();
DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i)
chrono["doubles:particles:4"].stop();
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
chrono["doubles:particles:5"].start();
DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k)
chrono["doubles:particles:5"].stop();
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
chrono["doubles:particles:6"].start();
DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i)
chrono["doubles:particles:6"].stop();
}
chrono["doubles:particles"].stop();
WITH_CHRONO("doubles:particles",
{ // Particle part ===========================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
WITH_CHRONO("doubles:particles:1",
DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k)
)
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
WITH_CHRONO("doubles:particles:2",
DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j)
)
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
WITH_CHRONO("doubles:particles:3",
DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j)
)
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
WITH_CHRONO("doubles:particles:4",
DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i)
)
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
WITH_CHRONO("doubles:particles:5",
DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k)
)
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
WITH_CHRONO("doubles:particles:6",
DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i)
)
}
)
#undef REORDER
#undef DGEMM_HOLES
#undef DGEMM_PARTICLES
#undef _IJK_
#else
#undef REORDER
#undef DGEMM_HOLES
#undef DGEMM_PARTICLES
#undef _IJK_
#else
for (size_t k = 0; k < No; k++)
for (size_t j = 0; j < No; j++)
for (size_t i = 0; i < No; i++){
@ -330,7 +325,7 @@ namespace atrip {
}
}
#endif
#endif
}
}

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]]
// [[file:~/atrip/atrip.org::*The%20rank%20mapping][The rank mapping:1]]
#pragma once
#include <vector>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once
#include <iostream>
#include <algorithm>
@ -16,11 +16,11 @@ struct Slice {
using F = double;
// Prolog:1 ends here
// [[file:../../atrip.org::*Location][Location:1]]
// [[file:~/atrip/atrip.org::*Location][Location:1]]
struct Location { size_t rank; size_t source; };
// Location:1 ends here
// [[file:../../atrip.org::*Type][Type:1]]
// [[file:~/atrip/atrip.org::*Type][Type:1]]
enum Type
{ A = 10
, B
@ -38,7 +38,7 @@ enum Type
};
// Type:1 ends here
// [[file:../../atrip.org::*State][State:1]]
// [[file:~/atrip/atrip.org::*State][State:1]]
enum State {
Fetch = 0,
Dispatched = 2,
@ -49,7 +49,7 @@ enum State {
};
// State:1 ends here
// [[file:../../atrip.org::*The Info structure][The Info structure:1]]
// [[file:~/atrip/atrip.org::*The%20Info%20structure][The Info structure:1]]
struct Info {
// which part of a,b,c the slice holds
PartialTuple tuple;
@ -73,7 +73,7 @@ struct Info {
using Ty_x_Tu = std::pair< Type, PartialTuple >;
// The Info structure:1 ends here
// [[file:../../atrip.org::*Name][Name:1]]
// [[file:~/atrip/atrip.org::*Name][Name:1]]
enum Name
{ TA = 100
, VIJKA = 101
@ -83,19 +83,19 @@ enum Name
};
// Name:1 ends here
// [[file:../../atrip.org::*Database][Database:1]]
// [[file:~/atrip/atrip.org::*Database][Database:1]]
struct LocalDatabaseElement {
Slice::Name name;
Slice::Info info;
};
// Database:1 ends here
// [[file:../../atrip.org::*Database][Database:2]]
// [[file:~/atrip/atrip.org::*Database][Database:2]]
using LocalDatabase = std::vector<LocalDatabaseElement>;
using Database = LocalDatabase;
// Database:2 ends here
// [[file:../../atrip.org::*MPI Types][MPI Types:1]]
// [[file:~/atrip/atrip.org::*MPI%20Types][MPI Types:1]]
struct mpi {
static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) {
@ -185,7 +185,7 @@ struct mpi {
};
// MPI Types:1 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:1]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:1]]
static
PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
switch (sliceType) {
@ -203,7 +203,7 @@ PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
}
// Static utilities:1 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:2]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:2]]
static std::vector<Slice*> hasRecycledReferencingToIt
( std::vector<Slice> &slices
, Info const& info
@ -220,7 +220,7 @@ static std::vector<Slice*> hasRecycledReferencingToIt
}
// Static utilities:2 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:3]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:3]]
static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
const auto sliceIt
= std::find_if(slices.begin(), slices.end(),
@ -236,7 +236,7 @@ static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
}
// Static utilities:3 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:4]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:4]]
static Slice&
findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
const auto sliceIt
@ -262,7 +262,7 @@ findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
}
// Static utilities:4 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:5]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:5]]
static Slice& findByTypeAbc
( std::vector<Slice> &slices
, Slice::Type type
@ -292,7 +292,7 @@ static Slice& findByTypeAbc
}
// Static utilities:5 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:6]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:6]]
static Slice& findByInfo(std::vector<Slice> &slices,
Slice::Info const& info) {
const auto sliceIt
@ -315,30 +315,30 @@ static Slice& findByInfo(std::vector<Slice> &slices,
}
// Static utilities:6 ends here
// [[file:../../atrip.org::*Attributes][Attributes:1]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:1]]
Info info;
// Attributes:1 ends here
// [[file:../../atrip.org::*Attributes][Attributes:2]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:2]]
F *data;
// Attributes:2 ends here
// [[file:../../atrip.org::*Attributes][Attributes:3]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:3]]
MPI_Request request;
// Attributes:3 ends here
// [[file:../../atrip.org::*Attributes][Attributes:4]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:4]]
const size_t size;
// Attributes:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:1]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:1]]
void markReady() noexcept {
info.state = Ready;
info.recycling = Blank;
}
// Member functions:1 ends here
// [[file:../../atrip.org::*Member functions][Member functions:2]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:2]]
bool isUnwrapped() const noexcept {
return info.state == Ready
|| info.state == SelfSufficient
@ -346,7 +346,7 @@ bool isUnwrapped() const noexcept {
}
// Member functions:2 ends here
// [[file:../../atrip.org::*Member functions][Member functions:3]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:3]]
bool isUnwrappable() const noexcept {
return isUnwrapped()
|| info.state == Recycled
@ -379,7 +379,7 @@ inline bool isFree() const noexcept {
}
// Member functions:3 ends here
// [[file:../../atrip.org::*Member functions][Member functions:4]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:4]]
inline bool isRecyclable() const noexcept {
return ( info.state == Dispatched
|| info.state == Ready
@ -390,7 +390,7 @@ inline bool isRecyclable() const noexcept {
}
// Member functions:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:5]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:5]]
inline bool hasValidDataPointer() const noexcept {
return data != nullptr
&& info.state != Acceptor
@ -399,7 +399,7 @@ inline bool hasValidDataPointer() const noexcept {
}
// Member functions:5 ends here
// [[file:../../atrip.org::*Member functions][Member functions:6]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:6]]
void unwrapAndMarkReady() {
if (info.state == Ready) return;
if (info.state != Dispatched)
@ -431,7 +431,7 @@ void unwrapAndMarkReady() {
}
// Member functions:6 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
Slice(size_t size_)
: info({})
, data(nullptr)
@ -442,7 +442,7 @@ Slice(size_t size_)
}; // struct Slice
// Epilog:1 ends here
// [[file:../../atrip.org::*Debug][Debug:1]]
// [[file:~/atrip/atrip.org::*Debug][Debug:1]]
std::ostream& operator<<(std::ostream& out, Slice::Location const& v) {
// TODO: remove me
out << "{.r(" << v.rank << "), .s(" << v.source << ")};";

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*The slice union][The slice union:1]]
// [[file:~/atrip/atrip.org::*The%20slice%20union][The slice union:1]]
#pragma once
#include <atrip/Debug.hpp>
#include <atrip/Slice.hpp>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once
#include <vector>
@ -21,7 +21,7 @@
namespace atrip {
// Prolog:1 ends here
// [[file:../../atrip.org::*Tuples types][Tuples types:1]]
// [[file:~/atrip/atrip.org::*Tuples%20types][Tuples types:1]]
using ABCTuple = std::array<size_t, 3>;
using PartialTuple = std::array<size_t, 2>;
using ABCTuples = std::vector<ABCTuple>;
@ -29,14 +29,14 @@ using ABCTuples = std::vector<ABCTuple>;
constexpr ABCTuple FAKE_TUPLE = {0, 0, 0};
// Tuples types:1 ends here
// [[file:../../atrip.org::*Distributing the tuples][Distributing the tuples:1]]
// [[file:~/atrip/atrip.org::*Distributing%20the%20tuples][Distributing the tuples:1]]
struct TuplesDistribution {
virtual ABCTuples getTuples(size_t Nv, MPI_Comm universe) = 0;
virtual bool tupleIsFake(ABCTuple const& t) { return t == FAKE_TUPLE; }
};
// Distributing the tuples:1 ends here
// [[file:../../atrip.org::*Naive list][Naive list:1]]
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:1]]
ABCTuples getTuplesList(size_t Nv) {
const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv;
ABCTuples result(n);
@ -54,7 +54,7 @@ ABCTuples getTuplesList(size_t Nv) {
}
// Naive list:1 ends here
// [[file:../../atrip.org::*Naive list][Naive list:2]]
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:2]]
std::pair<size_t, size_t>
getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
@ -91,7 +91,7 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
}
// Naive list:2 ends here
// [[file:../../atrip.org::*Naive list][Naive list:3]]
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:3]]
struct NaiveDistribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
int rank, np;
@ -113,23 +113,29 @@ struct NaiveDistribution : public TuplesDistribution {
WITH_RANK << "range = "
<< range.first << " -> " << range.second
<< std::endl;
std::vector<ABCTuple> result(range.second - range.first + 1, FAKE_TUPLE);
WITH_RANK << "number of global tuples = " << all.size() << std::endl;
WITH_RANK << "number of local tuples = " << result.size() << std::endl;
std::vector<ABCTuple> result(range.second - range.first, FAKE_TUPLE);
std::copy(all.begin() + range.first,
std::copy(range.first >= all.size()
? all.end()
: all.begin() + range.first,
// --
range.second >= all.size()
? all.end()
: all.begin() + range.first + range.second,
? all.end()
: all.begin() + range.first + range.second,
// --
result.begin());
return result;
}
};
// Naive list:3 ends here
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
namespace group_and_sort {
// Prolog:1 ends here
// [[file:../../atrip.org::*Node information][Node information:1]]
// [[file:~/atrip/atrip.org::*Node%20information][Node information:1]]
std::vector<std::string> getNodeNames(MPI_Comm comm){
int rank, np;
MPI_Comm_rank(comm, &rank);
@ -169,7 +175,7 @@ std::vector<std::string> getNodeNames(MPI_Comm comm){
}
// Node information:1 ends here
// [[file:../../atrip.org::*Node information][Node information:2]]
// [[file:~/atrip/atrip.org::*Node%20information][Node information:2]]
struct RankInfo {
const std::string name;
const size_t nodeId;
@ -208,7 +214,7 @@ getNodeInfos(std::vector<string> const& nodeNames) {
}
// Node information:2 ends here
// [[file:../../atrip.org::*Utils][Utils:1]]
// [[file:~/atrip/atrip.org::*Utils][Utils:1]]
// Provides the node on which the slice-element is found
// Right now we distribute the slices in a round robin fashion
// over the different nodes (NOTE: not mpi ranks but nodes)
@ -239,7 +245,7 @@ std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
}
// Utils:1 ends here
// [[file:../../atrip.org::*Distribution][Distribution:1]]
// [[file:~/atrip/atrip.org::*Distribution][Distribution:1]]
std::vector<ABCTuple>
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
@ -380,7 +386,7 @@ std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
}
// Distribution:1 ends here
// [[file:../../atrip.org::*Main][Main:1]]
// [[file:~/atrip/atrip.org::*Main][Main:1]]
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
int rank, np;
@ -432,7 +438,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
MPI_Comm_split(universe, color, key, &INTRA_COMM);
// Main:1 ends here
// [[file:../../atrip.org::*Main][Main:2]]
// [[file:~/atrip/atrip.org::*Main][Main:2]]
const size_t
tuplesPerRankLocal
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
@ -456,7 +462,7 @@ MPI_Bcast(&tuplesPerRankGlobal,
universe);
// Main:2 ends here
// [[file:../../atrip.org::*Main][Main:3]]
// [[file:~/atrip/atrip.org::*Main][Main:3]]
size_t const totalTuplesLocal
= tuplesPerRankLocal
* nodeInfos[rank].ranksPerNode;
@ -467,7 +473,7 @@ if (makeDistribution)
FAKE_TUPLE);
// Main:3 ends here
// [[file:../../atrip.org::*Main][Main:4]]
// [[file:~/atrip/atrip.org::*Main][Main:4]]
{
std::vector<int> const
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
@ -503,7 +509,7 @@ if (makeDistribution)
}
// Main:4 ends here
// [[file:../../atrip.org::*Main][Main:5]]
// [[file:~/atrip/atrip.org::*Main][Main:5]]
result.insert(result.end(),
tuplesPerRankGlobal - result.size(),
FAKE_TUPLE);
@ -513,7 +519,7 @@ result.insert(result.end(),
}
// Main:5 ends here
// [[file:../../atrip.org::*Interface][Interface:1]]
// [[file:~/atrip/atrip.org::*Interface][Interface:1]]
struct Distribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
return main(universe, Nv);
@ -521,10 +527,10 @@ struct Distribution : public TuplesDistribution {
};
// Interface:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
} // namespace group_and_sort
// Epilog:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Unions][Unions:1]]
// [[file:~/atrip/atrip.org::*Unions][Unions:1]]
#pragma once
#include <atrip/SliceUnion.hpp>

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once
#include <sstream>
#include <string>
@ -11,7 +11,7 @@
namespace atrip {
// Prolog:1 ends here
// [[file:../../atrip.org::*Pretty printing][Pretty printing:1]]
// [[file:~/atrip/atrip.org::*Pretty%20printing][Pretty printing:1]]
template <typename T>
std::string pretty_print(T&& value) {
std::stringstream stream;
@ -22,11 +22,11 @@ template <typename T>
}
// Pretty printing:1 ends here
// [[file:../../atrip.org::*Chrono][Chrono:1]]
#define WITH_CHRONO(__chrono, ...) \
__chrono.start(); \
// [[file:~/atrip/atrip.org::*Chrono][Chrono:1]]
#define WITH_CHRONO(__chrono_name, ...) \
Atrip::chrono[__chrono_name].start(); \
__VA_ARGS__ \
__chrono.stop();
Atrip::chrono[__chrono_name].stop();
struct Timer {
using Clock = std::chrono::high_resolution_clock;
@ -41,6 +41,6 @@ struct Timer {
using Timings = std::map<std::string, Timer>;
// Chrono:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here

View File

@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Main][Main:1]]
// [[file:~/atrip/atrip.org::*Main][Main:1]]
#include <iomanip>
#include <atrip/Atrip.hpp>
@ -24,9 +24,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
const int rank = Atrip::rank;
MPI_Comm universe = in.ei->wrld->comm;
// Timings in seconds ================================================{{{1
Timings chrono{};
const size_t No = in.ei->lens[0];
const size_t Nv = in.ea->lens[0];
LOG(0,"Atrip") << "No: " << No << "\n";
@ -66,20 +63,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
}
chrono["nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-slices"].stop();
WITH_CHRONO("nv-slices",
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
chrono["nv-nv-slices"].start();
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
chrono["nv-nv-slices"].stop();
WITH_CHRONO("nv-nv-slices",
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
)
// all tensors
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
@ -96,7 +93,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
}
LOG(0,"Atrip") << "BUILDING TUPLE LIST\n";
WITH_CHRONO(chrono["tuples:build"],
WITH_CHRONO("tuples:build",
auto const tuplesList = distribution->getTuples(Nv, universe);
)
size_t nIterations = tuplesList.size();
@ -119,45 +116,42 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
auto communicateDatabase
= [ &unions
, np
, &chrono
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
chrono["db:comm:type:do"].start();
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
chrono["db:comm:type:do"].stop();
WITH_CHRONO("db:comm:type:do",
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
)
chrono["db:comm:ldb"].start();
Slice::LocalDatabase ldb;
for (auto const& tensor: unions) {
auto const& tensorDb = tensor->buildLocalDatabase(abc);
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
}
chrono["db:comm:ldb"].stop();
WITH_CHRONO("db:comm:ldb",
Slice::LocalDatabase ldb;
for (auto const& tensor: unions) {
auto const& tensorDb = tensor->buildLocalDatabase(abc);
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
}
)
Slice::Database db(np * ldb.size(), ldb[0]);
chrono["oneshot-db:comm:allgather"].start();
chrono["db:comm:allgather"].start();
MPI_Allgather( ldb.data()
, ldb.size()
, MPI_LDB_ELEMENT
, db.data()
, ldb.size()
, MPI_LDB_ELEMENT
, c);
chrono["db:comm:allgather"].stop();
chrono["oneshot-db:comm:allgather"].stop();
WITH_CHRONO("oneshot-db:comm:allgather",
WITH_CHRONO("db:comm:allgather",
MPI_Allgather( ldb.data()
, ldb.size()
, MPI_LDB_ELEMENT
, db.data()
, ldb.size()
, MPI_LDB_ELEMENT
, c);
))
chrono["db:comm:type:free"].start();
MPI_Type_free(&MPI_LDB_ELEMENT);
chrono["db:comm:type:free"].stop();
WITH_CHRONO("db:comm:type:free",
MPI_Type_free(&MPI_LDB_ELEMENT);
)
return db;
};
auto doIOPhase
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
= [&unions, &rank, &np, &universe] (Slice::Database const& db) {
const size_t localDBLength = db.size() / np;
@ -193,9 +187,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n"
;
chrono["db:io:recv"].start();
u.receive(el.info, recvTag);
chrono["db:io:recv"].stop();
WITH_CHRONO("db:io:recv",
u.receive(el.info, recvTag);
)
} // recv
}
@ -229,9 +223,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
<< "\n"
;
chrono["db:io:send"].start();
u.send(otherRank, el.info, sendTag);
chrono["db:io:send"].stop();
WITH_CHRONO("db:io:send",
u.send(otherRank, el.info, sendTag);
)
} // send phase
@ -262,14 +256,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
; i < tuplesList.size()
; i++, iteration++
) {
chrono["iterations"].start();
Atrip::chrono["iterations"].start();
// check overhead from chrono over all iterations
chrono["start:stop"].start(); chrono["start:stop"].stop();
WITH_CHRONO("start:stop", {})
// check overhead of doing a barrier at the beginning
WITH_CHRONO(chrono["oneshot-mpi:barrier"],
WITH_CHRONO(chrono["mpi:barrier"],
WITH_CHRONO("oneshot-mpi:barrier",
WITH_CHRONO("mpi:barrier",
if (in.barrier) MPI_Barrier(universe);
))
@ -277,15 +271,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
LOG(0,"Atrip")
<< "iteration " << iteration
<< " [" << 100 * iteration / nIterations << "%]"
<< " (" << doublesFlops * iteration / chrono["doubles"].count()
<< " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
<< "GF)"
<< " (" << doublesFlops * iteration / chrono["iterations"].count()
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
<< "GF)"
<< " ===========================\n";
// PRINT TIMINGS
if (in.chrono)
for (auto const& pair: chrono)
for (auto const& pair: Atrip::chrono)
LOG(1, " ") << pair.first << " :: "
<< pair.second.count()
<< std::endl;
@ -302,13 +296,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
: &tuplesList[i + 1]
;
chrono["with_rank"].start();
WITH_RANK << " :it " << iteration
<< " :abc " << pretty_print(abc)
<< " :abcN "
<< (abcNext ? pretty_print(*abcNext) : "None")
<< "\n";
chrono["with_rank"].stop();
WITH_CHRONO("with_rank",
WITH_RANK << " :it " << iteration
<< " :abc " << pretty_print(abc)
<< " :abcN "
<< (abcNext ? pretty_print(*abcNext) : "None")
<< "\n";
)
// COMM FIRST DATABASE ================================================{{{1
@ -321,19 +315,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
WITH_RANK << "__first__:first database io phase DONE\n";
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
for (auto& u: unions) u->unwrapAll(abc);
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n";
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
MPI_Barrier(universe);
}
// COMM NEXT DATABASE ================================================={{{1
if (abcNext) {
WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
chrono["db:comm"].start();
const auto db = communicateDatabase(*abcNext, universe);
chrono["db:comm"].stop();
chrono["db:io"].start();
doIOPhase(db);
chrono["db:io"].stop();
WITH_CHRONO("db:comm",
const auto db = communicateDatabase(*abcNext, universe);
)
WITH_CHRONO("db:io",
doIOPhase(db);
)
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
}
@ -341,63 +335,61 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
OCD_Barrier(universe);
if (!isFakeTuple(i)) {
WITH_RANK << iteration << "-th doubles\n";
WITH_CHRONO(chrono["oneshot-unwrap"],
WITH_CHRONO(chrono["unwrap"],
WITH_CHRONO(chrono["unwrap:doubles"],
WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO("unwrap",
WITH_CHRONO("unwrap:doubles",
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
u->unwrapAll(abc);
}
)))
chrono["oneshot-doubles"].start();
chrono["doubles"].start();
doublesContribution( abc, (size_t)No, (size_t)Nv
// -- VABCI
, abph.unwrapSlice(Slice::AB, abc)
, abph.unwrapSlice(Slice::AC, abc)
, abph.unwrapSlice(Slice::BC, abc)
, abph.unwrapSlice(Slice::BA, abc)
, abph.unwrapSlice(Slice::CA, abc)
, abph.unwrapSlice(Slice::CB, abc)
// -- VHHHA
, hhha.unwrapSlice(Slice::A, abc)
, hhha.unwrapSlice(Slice::B, abc)
, hhha.unwrapSlice(Slice::C, abc)
// -- TA
, taphh.unwrapSlice(Slice::A, abc)
, taphh.unwrapSlice(Slice::B, abc)
, taphh.unwrapSlice(Slice::C, abc)
// -- TABIJ
, tabhh.unwrapSlice(Slice::AB, abc)
, tabhh.unwrapSlice(Slice::AC, abc)
, tabhh.unwrapSlice(Slice::BC, abc)
// -- TIJK
, Tijk.data()
, chrono
);
WITH_RANK << iteration << "-th doubles done\n";
chrono["doubles"].stop();
chrono["oneshot-doubles"].stop();
WITH_CHRONO("oneshot-doubles",
WITH_CHRONO("doubles",
doublesContribution( abc, (size_t)No, (size_t)Nv
// -- VABCI
, abph.unwrapSlice(Slice::AB, abc)
, abph.unwrapSlice(Slice::AC, abc)
, abph.unwrapSlice(Slice::BC, abc)
, abph.unwrapSlice(Slice::BA, abc)
, abph.unwrapSlice(Slice::CA, abc)
, abph.unwrapSlice(Slice::CB, abc)
// -- VHHHA
, hhha.unwrapSlice(Slice::A, abc)
, hhha.unwrapSlice(Slice::B, abc)
, hhha.unwrapSlice(Slice::C, abc)
// -- TA
, taphh.unwrapSlice(Slice::A, abc)
, taphh.unwrapSlice(Slice::B, abc)
, taphh.unwrapSlice(Slice::C, abc)
// -- TABIJ
, tabhh.unwrapSlice(Slice::AB, abc)
, tabhh.unwrapSlice(Slice::AC, abc)
, tabhh.unwrapSlice(Slice::BC, abc)
// -- TIJK
, Tijk.data()
);
WITH_RANK << iteration << "-th doubles done\n";
))
}
// COMPUTE SINGLES =================================================== {{{1
OCD_Barrier(universe);
if (!isFakeTuple(i)) {
WITH_CHRONO(chrono["oneshot-unwrap"],
WITH_CHRONO(chrono["unwrap"],
WITH_CHRONO(chrono["unwrap:singles"],
WITH_CHRONO("oneshot-unwrap",
WITH_CHRONO("unwrap",
WITH_CHRONO("unwrap:singles",
abhh.unwrapAll(abc);
)))
chrono["reorder"].start();
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
chrono["reorder"].stop();
chrono["singles"].start();
singlesContribution( No, Nv, abc
, Tai.data()
, abhh.unwrapSlice(Slice::AB, abc)
, abhh.unwrapSlice(Slice::AC, abc)
, abhh.unwrapSlice(Slice::BC, abc)
, Zijk.data());
chrono["singles"].stop();
WITH_CHRONO("reorder",
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
)
WITH_CHRONO("singles",
singlesContribution( No, Nv, abc
, Tai.data()
, abhh.unwrapSlice(Slice::AB, abc)
, abhh.unwrapSlice(Slice::AC, abc)
, abhh.unwrapSlice(Slice::BC, abc)
, Zijk.data());
)
}
@ -410,12 +402,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
if (abc[1] == abc[2]) distinct--;
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
chrono["energy"].start();
if ( distinct == 0)
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
else
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
chrono["energy"].stop();
WITH_CHRONO("energy",
if ( distinct == 0)
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
else
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
)
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
tupleEnergies[abc] = tupleEnergy;
@ -446,7 +438,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// CLEANUP UNIONS ===================================================={{{1
OCD_Barrier(universe);
if (abcNext) {
chrono["gc"].start();
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
for (auto& u: unions) {
@ -480,12 +471,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
}
chrono["gc"].stop();
}
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
chrono["iterations"].stop();
Atrip::chrono["iterations"].stop();
// ITERATION END ====================================================={{{1
}
@ -523,15 +513,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
// PRINT TIMINGS {{{1
if (in.chrono)
for (auto const& pair: chrono)
for (auto const& pair: Atrip::chrono)
LOG(0,"atrip:chrono") << pair.first << " "
<< pair.second.count() << std::endl;
LOG(0, "atrip:flops(doubles)")
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n";
<< nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n";
LOG(0, "atrip:flops(iterations)")
<< nIterations * doublesFlops / chrono["iterations"].count() << "\n";
<< nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n";
// TODO: change the sign in the getEnergy routines
return { - globalEnergy };