Update all chronos to use the static chrono
This commit is contained in:
parent
2823fa3699
commit
79a3f99cb3
259
atrip.org
259
atrip.org
@ -748,10 +748,10 @@ The chrono is just a simple wrapper for a high resolution clock
|
||||
that can be found in the =std::chrono= namespace of the standard library.
|
||||
|
||||
#+begin_src c++ :tangle (atrip-utils-h)
|
||||
#define WITH_CHRONO(__chrono, ...) \
|
||||
__chrono.start(); \
|
||||
#define WITH_CHRONO(__chrono_name, ...) \
|
||||
Atrip::chrono[__chrono_name].start(); \
|
||||
__VA_ARGS__ \
|
||||
__chrono.stop();
|
||||
Atrip::chrono[__chrono_name].stop();
|
||||
|
||||
struct Timer {
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
@ -1549,12 +1549,18 @@ struct NaiveDistribution : public TuplesDistribution {
|
||||
WITH_RANK << "range = "
|
||||
<< range.first << " -> " << range.second
|
||||
<< std::endl;
|
||||
std::vector<ABCTuple> result(range.second - range.first + 1, FAKE_TUPLE);
|
||||
WITH_RANK << "number of global tuples = " << all.size() << std::endl;
|
||||
WITH_RANK << "number of local tuples = " << result.size() << std::endl;
|
||||
|
||||
std::vector<ABCTuple> result(range.second - range.first, FAKE_TUPLE);
|
||||
std::copy(all.begin() + range.first,
|
||||
std::copy(range.first >= all.size()
|
||||
? all.end()
|
||||
: all.begin() + range.first,
|
||||
// --
|
||||
range.second >= all.size()
|
||||
? all.end()
|
||||
: all.begin() + range.first + range.second,
|
||||
// --
|
||||
result.begin());
|
||||
return result;
|
||||
}
|
||||
@ -2467,10 +2473,8 @@ namespace atrip {
|
||||
, double const* TBChh
|
||||
// -- TIJK
|
||||
, double *Tijk
|
||||
, atrip::Timings& chrono
|
||||
) {
|
||||
|
||||
auto& t_reorder = chrono["doubles:reorder"];
|
||||
const size_t a = abc[0], b = abc[1], c = abc[2]
|
||||
, NoNo = No*No, NoNv = No*Nv
|
||||
;
|
||||
@ -2478,43 +2482,42 @@ namespace atrip {
|
||||
#if defined(ATRIP_USE_DGEMM)
|
||||
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
||||
#define REORDER(__II, __JJ, __KK) \
|
||||
t_reorder.start(); \
|
||||
WITH_CHRONO("double:reorder", \
|
||||
for (size_t k = 0; k < No; k++) \
|
||||
for (size_t j = 0; j < No; j++) \
|
||||
for (size_t i = 0; i < No; i++) { \
|
||||
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
||||
Tijk[_IJK_(i, j, k)] \
|
||||
+= _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
||||
} \
|
||||
t_reorder.stop();
|
||||
)
|
||||
#define DGEMM_PARTICLES(__A, __B) \
|
||||
atrip::dgemm_( "T" \
|
||||
, "N" \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&Nv \
|
||||
, &one \
|
||||
, __A \
|
||||
, (int const*)&Nv \
|
||||
, __B \
|
||||
, (int const*)&Nv \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
atrip::dgemm_("T", \
|
||||
"N", \
|
||||
(int const*)&NoNo, \
|
||||
(int const*)&No, \
|
||||
(int const*)&Nv, \
|
||||
&one, \
|
||||
__A, \
|
||||
(int const*)&Nv, \
|
||||
__B, \
|
||||
(int const*)&Nv, \
|
||||
&zero, \
|
||||
_t_buffer.data(), \
|
||||
(int const*)&NoNo);
|
||||
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||
atrip::dgemm_( "N" \
|
||||
, __TRANSB \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&No \
|
||||
, &m_one \
|
||||
, __A \
|
||||
, (int const*)&NoNo \
|
||||
, __B \
|
||||
, (int const*)&No \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
atrip::dgemm_("N", \
|
||||
__TRANSB, \
|
||||
(int const*)&NoNo, \
|
||||
(int const*)&No, \
|
||||
(int const*)&No, \
|
||||
&m_one, \
|
||||
__A, \
|
||||
(int const*)&NoNo, \
|
||||
__B, \
|
||||
(int const*)&No, \
|
||||
&zero, \
|
||||
_t_buffer.data(), \
|
||||
(int const*)&NoNo);
|
||||
|
||||
using F = double;
|
||||
const size_t NoNoNo = No*NoNo;
|
||||
@ -2522,82 +2525,80 @@ namespace atrip {
|
||||
_t_buffer.reserve(NoNoNo);
|
||||
F one{1.0}, m_one{-1.0}, zero{0.0};
|
||||
|
||||
t_reorder.start();
|
||||
WITH_CHRONO("double:reorder",
|
||||
for (size_t k = 0; k < NoNoNo; k++) {
|
||||
// zero the Tijk
|
||||
Tijk[k] = 0.0;
|
||||
}
|
||||
t_reorder.stop();
|
||||
})
|
||||
|
||||
chrono["doubles:holes"].start();
|
||||
{ // Holes part ============================================================
|
||||
WITH_CHRONO("doubles:holes",
|
||||
{ // Holes part ================================================
|
||||
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
||||
chrono["doubles:holes:1"].start();
|
||||
WITH_CHRONO("doubles:holes:1",
|
||||
DGEMM_HOLES(VhhhC, TABhh, "N")
|
||||
REORDER(i, k, j)
|
||||
chrono["doubles:holes:1"].stop();
|
||||
)
|
||||
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
|
||||
chrono["doubles:holes:2"].start();
|
||||
WITH_CHRONO("doubles:holes:2",
|
||||
DGEMM_HOLES(VhhhC, TABhh, "T")
|
||||
REORDER(j, k, i)
|
||||
chrono["doubles:holes:2"].stop();
|
||||
)
|
||||
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
|
||||
chrono["doubles:holes:3"].start();
|
||||
WITH_CHRONO("doubles:holes:3",
|
||||
DGEMM_HOLES(VhhhB, TAChh, "N")
|
||||
REORDER(i, j, k)
|
||||
chrono["doubles:holes:3"].stop();
|
||||
)
|
||||
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
|
||||
chrono["doubles:holes:4"].start();
|
||||
WITH_CHRONO("doubles:holes:4",
|
||||
DGEMM_HOLES(VhhhB, TAChh, "T")
|
||||
REORDER(k, j, i)
|
||||
chrono["doubles:holes:4"].stop();
|
||||
)
|
||||
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
|
||||
chrono["doubles:holes:5"].start();
|
||||
WITH_CHRONO("doubles:holes:5",
|
||||
DGEMM_HOLES(VhhhA, TBChh, "N")
|
||||
REORDER(j, i, k)
|
||||
chrono["doubles:holes:5"].stop();
|
||||
)
|
||||
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
|
||||
chrono["doubles:holes:6"].start();
|
||||
WITH_CHRONO("doubles:holes:6",
|
||||
DGEMM_HOLES(VhhhA, TBChh, "T")
|
||||
REORDER(k, i, j)
|
||||
chrono["doubles:holes:6"].stop();
|
||||
)
|
||||
}
|
||||
chrono["doubles:holes"].stop();
|
||||
)
|
||||
|
||||
chrono["doubles:particles"].start();
|
||||
{ // Particle part =========================================================
|
||||
WITH_CHRONO("doubles:particles",
|
||||
{ // Particle part ===========================================
|
||||
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
|
||||
chrono["doubles:particles:1"].start();
|
||||
WITH_CHRONO("doubles:particles:1",
|
||||
DGEMM_PARTICLES(TAphh, VBCph)
|
||||
REORDER(i, j, k)
|
||||
chrono["doubles:particles:1"].stop();
|
||||
)
|
||||
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
|
||||
chrono["doubles:particles:2"].start();
|
||||
WITH_CHRONO("doubles:particles:2",
|
||||
DGEMM_PARTICLES(TAphh, VCBph)
|
||||
REORDER(i, k, j)
|
||||
chrono["doubles:particles:2"].stop();
|
||||
)
|
||||
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
|
||||
chrono["doubles:particles:3"].start();
|
||||
WITH_CHRONO("doubles:particles:3",
|
||||
DGEMM_PARTICLES(TCphh, VABph)
|
||||
REORDER(k, i, j)
|
||||
chrono["doubles:particles:3"].stop();
|
||||
)
|
||||
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
|
||||
chrono["doubles:particles:4"].start();
|
||||
WITH_CHRONO("doubles:particles:4",
|
||||
DGEMM_PARTICLES(TCphh, VBAph)
|
||||
REORDER(k, j, i)
|
||||
chrono["doubles:particles:4"].stop();
|
||||
)
|
||||
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
|
||||
chrono["doubles:particles:5"].start();
|
||||
WITH_CHRONO("doubles:particles:5",
|
||||
DGEMM_PARTICLES(TBphh, VACph)
|
||||
REORDER(j, i, k)
|
||||
chrono["doubles:particles:5"].stop();
|
||||
)
|
||||
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
|
||||
chrono["doubles:particles:6"].start();
|
||||
WITH_CHRONO("doubles:particles:6",
|
||||
DGEMM_PARTICLES(TBphh, VCAph)
|
||||
REORDER(j, k, i)
|
||||
chrono["doubles:particles:6"].stop();
|
||||
)
|
||||
}
|
||||
chrono["doubles:particles"].stop();
|
||||
)
|
||||
|
||||
#undef REORDER
|
||||
#undef DGEMM_HOLES
|
||||
@ -2777,9 +2778,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
const int rank = Atrip::rank;
|
||||
MPI_Comm universe = in.ei->wrld->comm;
|
||||
|
||||
// Timings in seconds ================================================{{{1
|
||||
Timings chrono{};
|
||||
|
||||
const size_t No = in.ei->lens[0];
|
||||
const size_t Nv = in.ea->lens[0];
|
||||
LOG(0,"Atrip") << "No: " << No << "\n";
|
||||
@ -2819,20 +2817,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
}
|
||||
|
||||
|
||||
chrono["nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
|
||||
WITH_CHRONO("nv-slices",
|
||||
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
|
||||
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-slices"].stop();
|
||||
)
|
||||
|
||||
chrono["nv-nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
|
||||
WITH_CHRONO("nv-nv-slices",
|
||||
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
|
||||
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-nv-slices"].stop();
|
||||
)
|
||||
|
||||
// all tensors
|
||||
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
||||
@ -2849,7 +2847,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
}
|
||||
|
||||
LOG(0,"Atrip") << "BUILDING TUPLE LIST\n";
|
||||
WITH_CHRONO(chrono["tuples:build"],
|
||||
WITH_CHRONO("tuples:build",
|
||||
auto const tuplesList = distribution->getTuples(Nv, universe);
|
||||
)
|
||||
size_t nIterations = tuplesList.size();
|
||||
@ -2872,26 +2870,24 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
auto communicateDatabase
|
||||
= [ &unions
|
||||
, np
|
||||
, &chrono
|
||||
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
|
||||
|
||||
chrono["db:comm:type:do"].start();
|
||||
WITH_CHRONO("db:comm:type:do",
|
||||
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
|
||||
chrono["db:comm:type:do"].stop();
|
||||
)
|
||||
|
||||
chrono["db:comm:ldb"].start();
|
||||
WITH_CHRONO("db:comm:ldb",
|
||||
Slice::LocalDatabase ldb;
|
||||
|
||||
for (auto const& tensor: unions) {
|
||||
auto const& tensorDb = tensor->buildLocalDatabase(abc);
|
||||
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
|
||||
}
|
||||
chrono["db:comm:ldb"].stop();
|
||||
)
|
||||
|
||||
Slice::Database db(np * ldb.size(), ldb[0]);
|
||||
|
||||
chrono["oneshot-db:comm:allgather"].start();
|
||||
chrono["db:comm:allgather"].start();
|
||||
WITH_CHRONO("oneshot-db:comm:allgather",
|
||||
WITH_CHRONO("db:comm:allgather",
|
||||
MPI_Allgather( ldb.data()
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
@ -2899,18 +2895,17 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
, c);
|
||||
chrono["db:comm:allgather"].stop();
|
||||
chrono["oneshot-db:comm:allgather"].stop();
|
||||
))
|
||||
|
||||
chrono["db:comm:type:free"].start();
|
||||
WITH_CHRONO("db:comm:type:free",
|
||||
MPI_Type_free(&MPI_LDB_ELEMENT);
|
||||
chrono["db:comm:type:free"].stop();
|
||||
)
|
||||
|
||||
return db;
|
||||
};
|
||||
|
||||
auto doIOPhase
|
||||
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
|
||||
= [&unions, &rank, &np, &universe] (Slice::Database const& db) {
|
||||
|
||||
const size_t localDBLength = db.size() / np;
|
||||
|
||||
@ -2946,9 +2941,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
<< "\n"
|
||||
;
|
||||
|
||||
chrono["db:io:recv"].start();
|
||||
WITH_CHRONO("db:io:recv",
|
||||
u.receive(el.info, recvTag);
|
||||
chrono["db:io:recv"].stop();
|
||||
)
|
||||
|
||||
} // recv
|
||||
}
|
||||
@ -2982,9 +2977,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
<< "\n"
|
||||
;
|
||||
|
||||
chrono["db:io:send"].start();
|
||||
WITH_CHRONO("db:io:send",
|
||||
u.send(otherRank, el.info, sendTag);
|
||||
chrono["db:io:send"].stop();
|
||||
)
|
||||
|
||||
} // send phase
|
||||
|
||||
@ -3015,14 +3010,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
; i < tuplesList.size()
|
||||
; i++, iteration++
|
||||
) {
|
||||
chrono["iterations"].start();
|
||||
Atrip::chrono["iterations"].start();
|
||||
|
||||
// check overhead from chrono over all iterations
|
||||
chrono["start:stop"].start(); chrono["start:stop"].stop();
|
||||
WITH_CHRONO("start:stop", {})
|
||||
|
||||
// check overhead of doing a barrier at the beginning
|
||||
WITH_CHRONO(chrono["oneshot-mpi:barrier"],
|
||||
WITH_CHRONO(chrono["mpi:barrier"],
|
||||
WITH_CHRONO("oneshot-mpi:barrier",
|
||||
WITH_CHRONO("mpi:barrier",
|
||||
if (in.barrier) MPI_Barrier(universe);
|
||||
))
|
||||
|
||||
@ -3030,15 +3025,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
LOG(0,"Atrip")
|
||||
<< "iteration " << iteration
|
||||
<< " [" << 100 * iteration / nIterations << "%]"
|
||||
<< " (" << doublesFlops * iteration / chrono["doubles"].count()
|
||||
<< " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
|
||||
<< "GF)"
|
||||
<< " (" << doublesFlops * iteration / chrono["iterations"].count()
|
||||
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
|
||||
<< "GF)"
|
||||
<< " ===========================\n";
|
||||
|
||||
// PRINT TIMINGS
|
||||
if (in.chrono)
|
||||
for (auto const& pair: chrono)
|
||||
for (auto const& pair: Atrip::chrono)
|
||||
LOG(1, " ") << pair.first << " :: "
|
||||
<< pair.second.count()
|
||||
<< std::endl;
|
||||
@ -3055,13 +3050,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
: &tuplesList[i + 1]
|
||||
;
|
||||
|
||||
chrono["with_rank"].start();
|
||||
WITH_CHRONO("with_rank",
|
||||
WITH_RANK << " :it " << iteration
|
||||
<< " :abc " << pretty_print(abc)
|
||||
<< " :abcN "
|
||||
<< (abcNext ? pretty_print(*abcNext) : "None")
|
||||
<< "\n";
|
||||
chrono["with_rank"].stop();
|
||||
)
|
||||
|
||||
|
||||
// COMM FIRST DATABASE ================================================{{{1
|
||||
@ -3074,19 +3069,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
WITH_RANK << "__first__:first database io phase DONE\n";
|
||||
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
|
||||
for (auto& u: unions) u->unwrapAll(abc);
|
||||
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n";
|
||||
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
|
||||
MPI_Barrier(universe);
|
||||
}
|
||||
|
||||
// COMM NEXT DATABASE ================================================={{{1
|
||||
if (abcNext) {
|
||||
WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
|
||||
chrono["db:comm"].start();
|
||||
WITH_CHRONO("db:comm",
|
||||
const auto db = communicateDatabase(*abcNext, universe);
|
||||
chrono["db:comm"].stop();
|
||||
chrono["db:io"].start();
|
||||
)
|
||||
WITH_CHRONO("db:io",
|
||||
doIOPhase(db);
|
||||
chrono["db:io"].stop();
|
||||
)
|
||||
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
|
||||
}
|
||||
|
||||
@ -3094,15 +3089,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
OCD_Barrier(universe);
|
||||
if (!isFakeTuple(i)) {
|
||||
WITH_RANK << iteration << "-th doubles\n";
|
||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap:doubles"],
|
||||
WITH_CHRONO("oneshot-unwrap",
|
||||
WITH_CHRONO("unwrap",
|
||||
WITH_CHRONO("unwrap:doubles",
|
||||
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
|
||||
u->unwrapAll(abc);
|
||||
}
|
||||
)))
|
||||
chrono["oneshot-doubles"].start();
|
||||
chrono["doubles"].start();
|
||||
WITH_CHRONO("oneshot-doubles",
|
||||
WITH_CHRONO("doubles",
|
||||
doublesContribution( abc, (size_t)No, (size_t)Nv
|
||||
// -- VABCI
|
||||
, abph.unwrapSlice(Slice::AB, abc)
|
||||
@ -3125,32 +3120,30 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
, tabhh.unwrapSlice(Slice::BC, abc)
|
||||
// -- TIJK
|
||||
, Tijk.data()
|
||||
, chrono
|
||||
);
|
||||
WITH_RANK << iteration << "-th doubles done\n";
|
||||
chrono["doubles"].stop();
|
||||
chrono["oneshot-doubles"].stop();
|
||||
))
|
||||
}
|
||||
|
||||
// COMPUTE SINGLES =================================================== {{{1
|
||||
OCD_Barrier(universe);
|
||||
if (!isFakeTuple(i)) {
|
||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap:singles"],
|
||||
WITH_CHRONO("oneshot-unwrap",
|
||||
WITH_CHRONO("unwrap",
|
||||
WITH_CHRONO("unwrap:singles",
|
||||
abhh.unwrapAll(abc);
|
||||
)))
|
||||
chrono["reorder"].start();
|
||||
WITH_CHRONO("reorder",
|
||||
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
||||
chrono["reorder"].stop();
|
||||
chrono["singles"].start();
|
||||
)
|
||||
WITH_CHRONO("singles",
|
||||
singlesContribution( No, Nv, abc
|
||||
, Tai.data()
|
||||
, abhh.unwrapSlice(Slice::AB, abc)
|
||||
, abhh.unwrapSlice(Slice::AC, abc)
|
||||
, abhh.unwrapSlice(Slice::BC, abc)
|
||||
, Zijk.data());
|
||||
chrono["singles"].stop();
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -3163,12 +3156,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
if (abc[1] == abc[2]) distinct--;
|
||||
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
||||
|
||||
chrono["energy"].start();
|
||||
WITH_CHRONO("energy",
|
||||
if ( distinct == 0)
|
||||
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
|
||||
else
|
||||
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
|
||||
chrono["energy"].stop();
|
||||
)
|
||||
|
||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||
tupleEnergies[abc] = tupleEnergy;
|
||||
@ -3199,7 +3192,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
// CLEANUP UNIONS ===================================================={{{1
|
||||
OCD_Barrier(universe);
|
||||
if (abcNext) {
|
||||
chrono["gc"].start();
|
||||
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
|
||||
for (auto& u: unions) {
|
||||
|
||||
@ -3233,12 +3225,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
|
||||
|
||||
}
|
||||
chrono["gc"].stop();
|
||||
}
|
||||
|
||||
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
|
||||
|
||||
chrono["iterations"].stop();
|
||||
Atrip::chrono["iterations"].stop();
|
||||
// ITERATION END ====================================================={{{1
|
||||
|
||||
}
|
||||
@ -3276,15 +3267,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
|
||||
// PRINT TIMINGS {{{1
|
||||
if (in.chrono)
|
||||
for (auto const& pair: chrono)
|
||||
for (auto const& pair: Atrip::chrono)
|
||||
LOG(0,"atrip:chrono") << pair.first << " "
|
||||
<< pair.second.count() << std::endl;
|
||||
|
||||
|
||||
LOG(0, "atrip:flops(doubles)")
|
||||
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n";
|
||||
<< nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n";
|
||||
LOG(0, "atrip:flops(iterations)")
|
||||
<< nIterations * doublesFlops / chrono["iterations"].count() << "\n";
|
||||
<< nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n";
|
||||
|
||||
// TODO: change the sign in the getEnergy routines
|
||||
return { - globalEnergy };
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../atrip.org::*Include header][Include header:1]]
|
||||
// [[file:~/atrip/atrip.org::*Include%20header][Include header:1]]
|
||||
#pragma once
|
||||
|
||||
#include <atrip/Atrip.hpp>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Atrip][Atrip:1]]
|
||||
// [[file:~/atrip/atrip.org::*Atrip][Atrip:1]]
|
||||
#pragma once
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Blas][Blas:1]]
|
||||
// [[file:~/atrip/atrip.org::*Blas][Blas:1]]
|
||||
#pragma once
|
||||
namespace atrip {
|
||||
extern "C" {
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Debug][Debug:1]]
|
||||
// [[file:~/atrip/atrip.org::*Debug][Debug:1]]
|
||||
#pragma once
|
||||
#define ATRIP_BENCHMARK
|
||||
//#define ATRIP_DONT_SLICE
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Equations][Equations:1]]
|
||||
// [[file:~/atrip/atrip.org::*Equations][Equations:1]]
|
||||
#pragma once
|
||||
|
||||
#include<atrip/Slice.hpp>
|
||||
@ -150,10 +150,8 @@ namespace atrip {
|
||||
, double const* TBChh
|
||||
// -- TIJK
|
||||
, double *Tijk
|
||||
, atrip::Timings& chrono
|
||||
) {
|
||||
|
||||
auto& t_reorder = chrono["doubles:reorder"];
|
||||
const size_t a = abc[0], b = abc[1], c = abc[2]
|
||||
, NoNo = No*No, NoNv = No*Nv
|
||||
;
|
||||
@ -161,43 +159,42 @@ namespace atrip {
|
||||
#if defined(ATRIP_USE_DGEMM)
|
||||
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
||||
#define REORDER(__II, __JJ, __KK) \
|
||||
t_reorder.start(); \
|
||||
WITH_CHRONO("double:reorder", \
|
||||
for (size_t k = 0; k < No; k++) \
|
||||
for (size_t j = 0; j < No; j++) \
|
||||
for (size_t i = 0; i < No; i++) { \
|
||||
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
||||
Tijk[_IJK_(i, j, k)] \
|
||||
+= _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
||||
} \
|
||||
t_reorder.stop();
|
||||
)
|
||||
#define DGEMM_PARTICLES(__A, __B) \
|
||||
atrip::dgemm_( "T" \
|
||||
, "N" \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&Nv \
|
||||
, &one \
|
||||
, __A \
|
||||
, (int const*)&Nv \
|
||||
, __B \
|
||||
, (int const*)&Nv \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
atrip::dgemm_("T", \
|
||||
"N", \
|
||||
(int const*)&NoNo, \
|
||||
(int const*)&No, \
|
||||
(int const*)&Nv, \
|
||||
&one, \
|
||||
__A, \
|
||||
(int const*)&Nv, \
|
||||
__B, \
|
||||
(int const*)&Nv, \
|
||||
&zero, \
|
||||
_t_buffer.data(), \
|
||||
(int const*)&NoNo);
|
||||
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||
atrip::dgemm_( "N" \
|
||||
, __TRANSB \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&No \
|
||||
, &m_one \
|
||||
, __A \
|
||||
, (int const*)&NoNo \
|
||||
, __B \
|
||||
, (int const*)&No \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
atrip::dgemm_("N", \
|
||||
__TRANSB, \
|
||||
(int const*)&NoNo, \
|
||||
(int const*)&No, \
|
||||
(int const*)&No, \
|
||||
&m_one, \
|
||||
__A, \
|
||||
(int const*)&NoNo, \
|
||||
__B, \
|
||||
(int const*)&No, \
|
||||
&zero, \
|
||||
_t_buffer.data(), \
|
||||
(int const*)&NoNo);
|
||||
|
||||
using F = double;
|
||||
const size_t NoNoNo = No*NoNo;
|
||||
@ -205,82 +202,80 @@ namespace atrip {
|
||||
_t_buffer.reserve(NoNoNo);
|
||||
F one{1.0}, m_one{-1.0}, zero{0.0};
|
||||
|
||||
t_reorder.start();
|
||||
WITH_CHRONO("double:reorder",
|
||||
for (size_t k = 0; k < NoNoNo; k++) {
|
||||
// zero the Tijk
|
||||
Tijk[k] = 0.0;
|
||||
}
|
||||
t_reorder.stop();
|
||||
})
|
||||
|
||||
chrono["doubles:holes"].start();
|
||||
{ // Holes part ============================================================
|
||||
WITH_CHRONO("doubles:holes",
|
||||
{ // Holes part ================================================
|
||||
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
||||
chrono["doubles:holes:1"].start();
|
||||
WITH_CHRONO("doubles:holes:1",
|
||||
DGEMM_HOLES(VhhhC, TABhh, "N")
|
||||
REORDER(i, k, j)
|
||||
chrono["doubles:holes:1"].stop();
|
||||
)
|
||||
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
|
||||
chrono["doubles:holes:2"].start();
|
||||
WITH_CHRONO("doubles:holes:2",
|
||||
DGEMM_HOLES(VhhhC, TABhh, "T")
|
||||
REORDER(j, k, i)
|
||||
chrono["doubles:holes:2"].stop();
|
||||
)
|
||||
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
|
||||
chrono["doubles:holes:3"].start();
|
||||
WITH_CHRONO("doubles:holes:3",
|
||||
DGEMM_HOLES(VhhhB, TAChh, "N")
|
||||
REORDER(i, j, k)
|
||||
chrono["doubles:holes:3"].stop();
|
||||
)
|
||||
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
|
||||
chrono["doubles:holes:4"].start();
|
||||
WITH_CHRONO("doubles:holes:4",
|
||||
DGEMM_HOLES(VhhhB, TAChh, "T")
|
||||
REORDER(k, j, i)
|
||||
chrono["doubles:holes:4"].stop();
|
||||
)
|
||||
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
|
||||
chrono["doubles:holes:5"].start();
|
||||
WITH_CHRONO("doubles:holes:5",
|
||||
DGEMM_HOLES(VhhhA, TBChh, "N")
|
||||
REORDER(j, i, k)
|
||||
chrono["doubles:holes:5"].stop();
|
||||
)
|
||||
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
|
||||
chrono["doubles:holes:6"].start();
|
||||
WITH_CHRONO("doubles:holes:6",
|
||||
DGEMM_HOLES(VhhhA, TBChh, "T")
|
||||
REORDER(k, i, j)
|
||||
chrono["doubles:holes:6"].stop();
|
||||
)
|
||||
}
|
||||
chrono["doubles:holes"].stop();
|
||||
)
|
||||
|
||||
chrono["doubles:particles"].start();
|
||||
{ // Particle part =========================================================
|
||||
WITH_CHRONO("doubles:particles",
|
||||
{ // Particle part ===========================================
|
||||
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
|
||||
chrono["doubles:particles:1"].start();
|
||||
WITH_CHRONO("doubles:particles:1",
|
||||
DGEMM_PARTICLES(TAphh, VBCph)
|
||||
REORDER(i, j, k)
|
||||
chrono["doubles:particles:1"].stop();
|
||||
)
|
||||
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
|
||||
chrono["doubles:particles:2"].start();
|
||||
WITH_CHRONO("doubles:particles:2",
|
||||
DGEMM_PARTICLES(TAphh, VCBph)
|
||||
REORDER(i, k, j)
|
||||
chrono["doubles:particles:2"].stop();
|
||||
)
|
||||
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
|
||||
chrono["doubles:particles:3"].start();
|
||||
WITH_CHRONO("doubles:particles:3",
|
||||
DGEMM_PARTICLES(TCphh, VABph)
|
||||
REORDER(k, i, j)
|
||||
chrono["doubles:particles:3"].stop();
|
||||
)
|
||||
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
|
||||
chrono["doubles:particles:4"].start();
|
||||
WITH_CHRONO("doubles:particles:4",
|
||||
DGEMM_PARTICLES(TCphh, VBAph)
|
||||
REORDER(k, j, i)
|
||||
chrono["doubles:particles:4"].stop();
|
||||
)
|
||||
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
|
||||
chrono["doubles:particles:5"].start();
|
||||
WITH_CHRONO("doubles:particles:5",
|
||||
DGEMM_PARTICLES(TBphh, VACph)
|
||||
REORDER(j, i, k)
|
||||
chrono["doubles:particles:5"].stop();
|
||||
)
|
||||
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
|
||||
chrono["doubles:particles:6"].start();
|
||||
WITH_CHRONO("doubles:particles:6",
|
||||
DGEMM_PARTICLES(TBphh, VCAph)
|
||||
REORDER(j, k, i)
|
||||
chrono["doubles:particles:6"].stop();
|
||||
)
|
||||
}
|
||||
chrono["doubles:particles"].stop();
|
||||
)
|
||||
|
||||
#undef REORDER
|
||||
#undef DGEMM_HOLES
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]]
|
||||
// [[file:~/atrip/atrip.org::*The%20rank%20mapping][The rank mapping:1]]
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Prolog][Prolog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
@ -16,11 +16,11 @@ struct Slice {
|
||||
using F = double;
|
||||
// Prolog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Location][Location:1]]
|
||||
// [[file:~/atrip/atrip.org::*Location][Location:1]]
|
||||
struct Location { size_t rank; size_t source; };
|
||||
// Location:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Type][Type:1]]
|
||||
// [[file:~/atrip/atrip.org::*Type][Type:1]]
|
||||
enum Type
|
||||
{ A = 10
|
||||
, B
|
||||
@ -38,7 +38,7 @@ enum Type
|
||||
};
|
||||
// Type:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*State][State:1]]
|
||||
// [[file:~/atrip/atrip.org::*State][State:1]]
|
||||
enum State {
|
||||
Fetch = 0,
|
||||
Dispatched = 2,
|
||||
@ -49,7 +49,7 @@ enum State {
|
||||
};
|
||||
// State:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*The Info structure][The Info structure:1]]
|
||||
// [[file:~/atrip/atrip.org::*The%20Info%20structure][The Info structure:1]]
|
||||
struct Info {
|
||||
// which part of a,b,c the slice holds
|
||||
PartialTuple tuple;
|
||||
@ -73,7 +73,7 @@ struct Info {
|
||||
using Ty_x_Tu = std::pair< Type, PartialTuple >;
|
||||
// The Info structure:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Name][Name:1]]
|
||||
// [[file:~/atrip/atrip.org::*Name][Name:1]]
|
||||
enum Name
|
||||
{ TA = 100
|
||||
, VIJKA = 101
|
||||
@ -83,19 +83,19 @@ enum Name
|
||||
};
|
||||
// Name:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Database][Database:1]]
|
||||
// [[file:~/atrip/atrip.org::*Database][Database:1]]
|
||||
struct LocalDatabaseElement {
|
||||
Slice::Name name;
|
||||
Slice::Info info;
|
||||
};
|
||||
// Database:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Database][Database:2]]
|
||||
// [[file:~/atrip/atrip.org::*Database][Database:2]]
|
||||
using LocalDatabase = std::vector<LocalDatabaseElement>;
|
||||
using Database = LocalDatabase;
|
||||
// Database:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*MPI Types][MPI Types:1]]
|
||||
// [[file:~/atrip/atrip.org::*MPI%20Types][MPI Types:1]]
|
||||
struct mpi {
|
||||
|
||||
static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) {
|
||||
@ -185,7 +185,7 @@ struct mpi {
|
||||
};
|
||||
// MPI Types:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Static utilities][Static utilities:1]]
|
||||
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:1]]
|
||||
static
|
||||
PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
|
||||
switch (sliceType) {
|
||||
@ -203,7 +203,7 @@ PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
|
||||
}
|
||||
// Static utilities:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Static utilities][Static utilities:2]]
|
||||
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:2]]
|
||||
static std::vector<Slice*> hasRecycledReferencingToIt
|
||||
( std::vector<Slice> &slices
|
||||
, Info const& info
|
||||
@ -220,7 +220,7 @@ static std::vector<Slice*> hasRecycledReferencingToIt
|
||||
}
|
||||
// Static utilities:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Static utilities][Static utilities:3]]
|
||||
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:3]]
|
||||
static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
@ -236,7 +236,7 @@ static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
|
||||
}
|
||||
// Static utilities:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Static utilities][Static utilities:4]]
|
||||
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:4]]
|
||||
static Slice&
|
||||
findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
|
||||
const auto sliceIt
|
||||
@ -262,7 +262,7 @@ findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
|
||||
}
|
||||
// Static utilities:4 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Static utilities][Static utilities:5]]
|
||||
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:5]]
|
||||
static Slice& findByTypeAbc
|
||||
( std::vector<Slice> &slices
|
||||
, Slice::Type type
|
||||
@ -292,7 +292,7 @@ static Slice& findByTypeAbc
|
||||
}
|
||||
// Static utilities:5 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Static utilities][Static utilities:6]]
|
||||
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:6]]
|
||||
static Slice& findByInfo(std::vector<Slice> &slices,
|
||||
Slice::Info const& info) {
|
||||
const auto sliceIt
|
||||
@ -315,30 +315,30 @@ static Slice& findByInfo(std::vector<Slice> &slices,
|
||||
}
|
||||
// Static utilities:6 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:1]]
|
||||
// [[file:~/atrip/atrip.org::*Attributes][Attributes:1]]
|
||||
Info info;
|
||||
// Attributes:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:2]]
|
||||
// [[file:~/atrip/atrip.org::*Attributes][Attributes:2]]
|
||||
F *data;
|
||||
// Attributes:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:3]]
|
||||
// [[file:~/atrip/atrip.org::*Attributes][Attributes:3]]
|
||||
MPI_Request request;
|
||||
// Attributes:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:4]]
|
||||
// [[file:~/atrip/atrip.org::*Attributes][Attributes:4]]
|
||||
const size_t size;
|
||||
// Attributes:4 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:1]]
|
||||
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:1]]
|
||||
void markReady() noexcept {
|
||||
info.state = Ready;
|
||||
info.recycling = Blank;
|
||||
}
|
||||
// Member functions:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:2]]
|
||||
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:2]]
|
||||
bool isUnwrapped() const noexcept {
|
||||
return info.state == Ready
|
||||
|| info.state == SelfSufficient
|
||||
@ -346,7 +346,7 @@ bool isUnwrapped() const noexcept {
|
||||
}
|
||||
// Member functions:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:3]]
|
||||
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:3]]
|
||||
bool isUnwrappable() const noexcept {
|
||||
return isUnwrapped()
|
||||
|| info.state == Recycled
|
||||
@ -379,7 +379,7 @@ inline bool isFree() const noexcept {
|
||||
}
|
||||
// Member functions:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:4]]
|
||||
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:4]]
|
||||
inline bool isRecyclable() const noexcept {
|
||||
return ( info.state == Dispatched
|
||||
|| info.state == Ready
|
||||
@ -390,7 +390,7 @@ inline bool isRecyclable() const noexcept {
|
||||
}
|
||||
// Member functions:4 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:5]]
|
||||
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:5]]
|
||||
inline bool hasValidDataPointer() const noexcept {
|
||||
return data != nullptr
|
||||
&& info.state != Acceptor
|
||||
@ -399,7 +399,7 @@ inline bool hasValidDataPointer() const noexcept {
|
||||
}
|
||||
// Member functions:5 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:6]]
|
||||
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:6]]
|
||||
void unwrapAndMarkReady() {
|
||||
if (info.state == Ready) return;
|
||||
if (info.state != Dispatched)
|
||||
@ -431,7 +431,7 @@ void unwrapAndMarkReady() {
|
||||
}
|
||||
// Member functions:6 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
|
||||
Slice(size_t size_)
|
||||
: info({})
|
||||
, data(nullptr)
|
||||
@ -442,7 +442,7 @@ Slice(size_t size_)
|
||||
}; // struct Slice
|
||||
// Epilog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Debug][Debug:1]]
|
||||
// [[file:~/atrip/atrip.org::*Debug][Debug:1]]
|
||||
std::ostream& operator<<(std::ostream& out, Slice::Location const& v) {
|
||||
// TODO: remove me
|
||||
out << "{.r(" << v.rank << "), .s(" << v.source << ")};";
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*The slice union][The slice union:1]]
|
||||
// [[file:~/atrip/atrip.org::*The%20slice%20union][The slice union:1]]
|
||||
#pragma once
|
||||
#include <atrip/Debug.hpp>
|
||||
#include <atrip/Slice.hpp>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Prolog][Prolog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
@ -21,7 +21,7 @@
|
||||
namespace atrip {
|
||||
// Prolog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Tuples types][Tuples types:1]]
|
||||
// [[file:~/atrip/atrip.org::*Tuples%20types][Tuples types:1]]
|
||||
using ABCTuple = std::array<size_t, 3>;
|
||||
using PartialTuple = std::array<size_t, 2>;
|
||||
using ABCTuples = std::vector<ABCTuple>;
|
||||
@ -29,14 +29,14 @@ using ABCTuples = std::vector<ABCTuple>;
|
||||
constexpr ABCTuple FAKE_TUPLE = {0, 0, 0};
|
||||
// Tuples types:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Distributing the tuples][Distributing the tuples:1]]
|
||||
// [[file:~/atrip/atrip.org::*Distributing%20the%20tuples][Distributing the tuples:1]]
|
||||
struct TuplesDistribution {
|
||||
virtual ABCTuples getTuples(size_t Nv, MPI_Comm universe) = 0;
|
||||
virtual bool tupleIsFake(ABCTuple const& t) { return t == FAKE_TUPLE; }
|
||||
};
|
||||
// Distributing the tuples:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Naive list][Naive list:1]]
|
||||
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:1]]
|
||||
ABCTuples getTuplesList(size_t Nv) {
|
||||
const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv;
|
||||
ABCTuples result(n);
|
||||
@ -54,7 +54,7 @@ ABCTuples getTuplesList(size_t Nv) {
|
||||
}
|
||||
// Naive list:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Naive list][Naive list:2]]
|
||||
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:2]]
|
||||
std::pair<size_t, size_t>
|
||||
getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
|
||||
|
||||
@ -91,7 +91,7 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
|
||||
}
|
||||
// Naive list:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Naive list][Naive list:3]]
|
||||
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:3]]
|
||||
struct NaiveDistribution : public TuplesDistribution {
|
||||
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
|
||||
int rank, np;
|
||||
@ -113,23 +113,29 @@ struct NaiveDistribution : public TuplesDistribution {
|
||||
WITH_RANK << "range = "
|
||||
<< range.first << " -> " << range.second
|
||||
<< std::endl;
|
||||
std::vector<ABCTuple> result(range.second - range.first + 1, FAKE_TUPLE);
|
||||
WITH_RANK << "number of global tuples = " << all.size() << std::endl;
|
||||
WITH_RANK << "number of local tuples = " << result.size() << std::endl;
|
||||
|
||||
std::vector<ABCTuple> result(range.second - range.first, FAKE_TUPLE);
|
||||
std::copy(all.begin() + range.first,
|
||||
std::copy(range.first >= all.size()
|
||||
? all.end()
|
||||
: all.begin() + range.first,
|
||||
// --
|
||||
range.second >= all.size()
|
||||
? all.end()
|
||||
: all.begin() + range.first + range.second,
|
||||
// --
|
||||
result.begin());
|
||||
return result;
|
||||
}
|
||||
};
|
||||
// Naive list:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Prolog][Prolog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
|
||||
namespace group_and_sort {
|
||||
// Prolog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Node information][Node information:1]]
|
||||
// [[file:~/atrip/atrip.org::*Node%20information][Node information:1]]
|
||||
std::vector<std::string> getNodeNames(MPI_Comm comm){
|
||||
int rank, np;
|
||||
MPI_Comm_rank(comm, &rank);
|
||||
@ -169,7 +175,7 @@ std::vector<std::string> getNodeNames(MPI_Comm comm){
|
||||
}
|
||||
// Node information:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Node information][Node information:2]]
|
||||
// [[file:~/atrip/atrip.org::*Node%20information][Node information:2]]
|
||||
struct RankInfo {
|
||||
const std::string name;
|
||||
const size_t nodeId;
|
||||
@ -208,7 +214,7 @@ getNodeInfos(std::vector<string> const& nodeNames) {
|
||||
}
|
||||
// Node information:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Utils][Utils:1]]
|
||||
// [[file:~/atrip/atrip.org::*Utils][Utils:1]]
|
||||
// Provides the node on which the slice-element is found
|
||||
// Right now we distribute the slices in a round robin fashion
|
||||
// over the different nodes (NOTE: not mpi ranks but nodes)
|
||||
@ -239,7 +245,7 @@ std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
|
||||
}
|
||||
// Utils:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Distribution][Distribution:1]]
|
||||
// [[file:~/atrip/atrip.org::*Distribution][Distribution:1]]
|
||||
std::vector<ABCTuple>
|
||||
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
|
||||
|
||||
@ -380,7 +386,7 @@ std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
|
||||
}
|
||||
// Distribution:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:1]]
|
||||
// [[file:~/atrip/atrip.org::*Main][Main:1]]
|
||||
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
|
||||
|
||||
int rank, np;
|
||||
@ -432,7 +438,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
|
||||
MPI_Comm_split(universe, color, key, &INTRA_COMM);
|
||||
// Main:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:2]]
|
||||
// [[file:~/atrip/atrip.org::*Main][Main:2]]
|
||||
const size_t
|
||||
tuplesPerRankLocal
|
||||
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
|
||||
@ -456,7 +462,7 @@ MPI_Bcast(&tuplesPerRankGlobal,
|
||||
universe);
|
||||
// Main:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:3]]
|
||||
// [[file:~/atrip/atrip.org::*Main][Main:3]]
|
||||
size_t const totalTuplesLocal
|
||||
= tuplesPerRankLocal
|
||||
* nodeInfos[rank].ranksPerNode;
|
||||
@ -467,7 +473,7 @@ if (makeDistribution)
|
||||
FAKE_TUPLE);
|
||||
// Main:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:4]]
|
||||
// [[file:~/atrip/atrip.org::*Main][Main:4]]
|
||||
{
|
||||
std::vector<int> const
|
||||
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
|
||||
@ -503,7 +509,7 @@ if (makeDistribution)
|
||||
}
|
||||
// Main:4 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:5]]
|
||||
// [[file:~/atrip/atrip.org::*Main][Main:5]]
|
||||
result.insert(result.end(),
|
||||
tuplesPerRankGlobal - result.size(),
|
||||
FAKE_TUPLE);
|
||||
@ -513,7 +519,7 @@ result.insert(result.end(),
|
||||
}
|
||||
// Main:5 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Interface][Interface:1]]
|
||||
// [[file:~/atrip/atrip.org::*Interface][Interface:1]]
|
||||
struct Distribution : public TuplesDistribution {
|
||||
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
|
||||
return main(universe, Nv);
|
||||
@ -521,10 +527,10 @@ struct Distribution : public TuplesDistribution {
|
||||
};
|
||||
// Interface:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
|
||||
} // namespace group_and_sort
|
||||
// Epilog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
|
||||
}
|
||||
// Epilog:1 ends here
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Unions][Unions:1]]
|
||||
// [[file:~/atrip/atrip.org::*Unions][Unions:1]]
|
||||
#pragma once
|
||||
#include <atrip/SliceUnion.hpp>
|
||||
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Prolog][Prolog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
|
||||
#pragma once
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@ -11,7 +11,7 @@
|
||||
namespace atrip {
|
||||
// Prolog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Pretty printing][Pretty printing:1]]
|
||||
// [[file:~/atrip/atrip.org::*Pretty%20printing][Pretty printing:1]]
|
||||
template <typename T>
|
||||
std::string pretty_print(T&& value) {
|
||||
std::stringstream stream;
|
||||
@ -22,11 +22,11 @@ template <typename T>
|
||||
}
|
||||
// Pretty printing:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Chrono][Chrono:1]]
|
||||
#define WITH_CHRONO(__chrono, ...) \
|
||||
__chrono.start(); \
|
||||
// [[file:~/atrip/atrip.org::*Chrono][Chrono:1]]
|
||||
#define WITH_CHRONO(__chrono_name, ...) \
|
||||
Atrip::chrono[__chrono_name].start(); \
|
||||
__VA_ARGS__ \
|
||||
__chrono.stop();
|
||||
Atrip::chrono[__chrono_name].stop();
|
||||
|
||||
struct Timer {
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
@ -41,6 +41,6 @@ struct Timer {
|
||||
using Timings = std::map<std::string, Timer>;
|
||||
// Chrono:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
|
||||
}
|
||||
// Epilog:1 ends here
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Main][Main:1]]
|
||||
// [[file:~/atrip/atrip.org::*Main][Main:1]]
|
||||
#include <iomanip>
|
||||
|
||||
#include <atrip/Atrip.hpp>
|
||||
@ -24,9 +24,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
const int rank = Atrip::rank;
|
||||
MPI_Comm universe = in.ei->wrld->comm;
|
||||
|
||||
// Timings in seconds ================================================{{{1
|
||||
Timings chrono{};
|
||||
|
||||
const size_t No = in.ei->lens[0];
|
||||
const size_t Nv = in.ea->lens[0];
|
||||
LOG(0,"Atrip") << "No: " << No << "\n";
|
||||
@ -66,20 +63,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
}
|
||||
|
||||
|
||||
chrono["nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
|
||||
WITH_CHRONO("nv-slices",
|
||||
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
|
||||
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-slices"].stop();
|
||||
)
|
||||
|
||||
chrono["nv-nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
|
||||
WITH_CHRONO("nv-nv-slices",
|
||||
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
|
||||
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-nv-slices"].stop();
|
||||
)
|
||||
|
||||
// all tensors
|
||||
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
||||
@ -96,7 +93,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
}
|
||||
|
||||
LOG(0,"Atrip") << "BUILDING TUPLE LIST\n";
|
||||
WITH_CHRONO(chrono["tuples:build"],
|
||||
WITH_CHRONO("tuples:build",
|
||||
auto const tuplesList = distribution->getTuples(Nv, universe);
|
||||
)
|
||||
size_t nIterations = tuplesList.size();
|
||||
@ -119,26 +116,24 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
auto communicateDatabase
|
||||
= [ &unions
|
||||
, np
|
||||
, &chrono
|
||||
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
|
||||
|
||||
chrono["db:comm:type:do"].start();
|
||||
WITH_CHRONO("db:comm:type:do",
|
||||
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
|
||||
chrono["db:comm:type:do"].stop();
|
||||
)
|
||||
|
||||
chrono["db:comm:ldb"].start();
|
||||
WITH_CHRONO("db:comm:ldb",
|
||||
Slice::LocalDatabase ldb;
|
||||
|
||||
for (auto const& tensor: unions) {
|
||||
auto const& tensorDb = tensor->buildLocalDatabase(abc);
|
||||
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
|
||||
}
|
||||
chrono["db:comm:ldb"].stop();
|
||||
)
|
||||
|
||||
Slice::Database db(np * ldb.size(), ldb[0]);
|
||||
|
||||
chrono["oneshot-db:comm:allgather"].start();
|
||||
chrono["db:comm:allgather"].start();
|
||||
WITH_CHRONO("oneshot-db:comm:allgather",
|
||||
WITH_CHRONO("db:comm:allgather",
|
||||
MPI_Allgather( ldb.data()
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
@ -146,18 +141,17 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
, c);
|
||||
chrono["db:comm:allgather"].stop();
|
||||
chrono["oneshot-db:comm:allgather"].stop();
|
||||
))
|
||||
|
||||
chrono["db:comm:type:free"].start();
|
||||
WITH_CHRONO("db:comm:type:free",
|
||||
MPI_Type_free(&MPI_LDB_ELEMENT);
|
||||
chrono["db:comm:type:free"].stop();
|
||||
)
|
||||
|
||||
return db;
|
||||
};
|
||||
|
||||
auto doIOPhase
|
||||
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
|
||||
= [&unions, &rank, &np, &universe] (Slice::Database const& db) {
|
||||
|
||||
const size_t localDBLength = db.size() / np;
|
||||
|
||||
@ -193,9 +187,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
<< "\n"
|
||||
;
|
||||
|
||||
chrono["db:io:recv"].start();
|
||||
WITH_CHRONO("db:io:recv",
|
||||
u.receive(el.info, recvTag);
|
||||
chrono["db:io:recv"].stop();
|
||||
)
|
||||
|
||||
} // recv
|
||||
}
|
||||
@ -229,9 +223,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
<< "\n"
|
||||
;
|
||||
|
||||
chrono["db:io:send"].start();
|
||||
WITH_CHRONO("db:io:send",
|
||||
u.send(otherRank, el.info, sendTag);
|
||||
chrono["db:io:send"].stop();
|
||||
)
|
||||
|
||||
} // send phase
|
||||
|
||||
@ -262,14 +256,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
; i < tuplesList.size()
|
||||
; i++, iteration++
|
||||
) {
|
||||
chrono["iterations"].start();
|
||||
Atrip::chrono["iterations"].start();
|
||||
|
||||
// check overhead from chrono over all iterations
|
||||
chrono["start:stop"].start(); chrono["start:stop"].stop();
|
||||
WITH_CHRONO("start:stop", {})
|
||||
|
||||
// check overhead of doing a barrier at the beginning
|
||||
WITH_CHRONO(chrono["oneshot-mpi:barrier"],
|
||||
WITH_CHRONO(chrono["mpi:barrier"],
|
||||
WITH_CHRONO("oneshot-mpi:barrier",
|
||||
WITH_CHRONO("mpi:barrier",
|
||||
if (in.barrier) MPI_Barrier(universe);
|
||||
))
|
||||
|
||||
@ -277,15 +271,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
LOG(0,"Atrip")
|
||||
<< "iteration " << iteration
|
||||
<< " [" << 100 * iteration / nIterations << "%]"
|
||||
<< " (" << doublesFlops * iteration / chrono["doubles"].count()
|
||||
<< " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
|
||||
<< "GF)"
|
||||
<< " (" << doublesFlops * iteration / chrono["iterations"].count()
|
||||
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
|
||||
<< "GF)"
|
||||
<< " ===========================\n";
|
||||
|
||||
// PRINT TIMINGS
|
||||
if (in.chrono)
|
||||
for (auto const& pair: chrono)
|
||||
for (auto const& pair: Atrip::chrono)
|
||||
LOG(1, " ") << pair.first << " :: "
|
||||
<< pair.second.count()
|
||||
<< std::endl;
|
||||
@ -302,13 +296,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
: &tuplesList[i + 1]
|
||||
;
|
||||
|
||||
chrono["with_rank"].start();
|
||||
WITH_CHRONO("with_rank",
|
||||
WITH_RANK << " :it " << iteration
|
||||
<< " :abc " << pretty_print(abc)
|
||||
<< " :abcN "
|
||||
<< (abcNext ? pretty_print(*abcNext) : "None")
|
||||
<< "\n";
|
||||
chrono["with_rank"].stop();
|
||||
)
|
||||
|
||||
|
||||
// COMM FIRST DATABASE ================================================{{{1
|
||||
@ -321,19 +315,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
WITH_RANK << "__first__:first database io phase DONE\n";
|
||||
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
|
||||
for (auto& u: unions) u->unwrapAll(abc);
|
||||
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n";
|
||||
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
|
||||
MPI_Barrier(universe);
|
||||
}
|
||||
|
||||
// COMM NEXT DATABASE ================================================={{{1
|
||||
if (abcNext) {
|
||||
WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
|
||||
chrono["db:comm"].start();
|
||||
WITH_CHRONO("db:comm",
|
||||
const auto db = communicateDatabase(*abcNext, universe);
|
||||
chrono["db:comm"].stop();
|
||||
chrono["db:io"].start();
|
||||
)
|
||||
WITH_CHRONO("db:io",
|
||||
doIOPhase(db);
|
||||
chrono["db:io"].stop();
|
||||
)
|
||||
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
|
||||
}
|
||||
|
||||
@ -341,15 +335,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
OCD_Barrier(universe);
|
||||
if (!isFakeTuple(i)) {
|
||||
WITH_RANK << iteration << "-th doubles\n";
|
||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap:doubles"],
|
||||
WITH_CHRONO("oneshot-unwrap",
|
||||
WITH_CHRONO("unwrap",
|
||||
WITH_CHRONO("unwrap:doubles",
|
||||
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
|
||||
u->unwrapAll(abc);
|
||||
}
|
||||
)))
|
||||
chrono["oneshot-doubles"].start();
|
||||
chrono["doubles"].start();
|
||||
WITH_CHRONO("oneshot-doubles",
|
||||
WITH_CHRONO("doubles",
|
||||
doublesContribution( abc, (size_t)No, (size_t)Nv
|
||||
// -- VABCI
|
||||
, abph.unwrapSlice(Slice::AB, abc)
|
||||
@ -372,32 +366,30 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
, tabhh.unwrapSlice(Slice::BC, abc)
|
||||
// -- TIJK
|
||||
, Tijk.data()
|
||||
, chrono
|
||||
);
|
||||
WITH_RANK << iteration << "-th doubles done\n";
|
||||
chrono["doubles"].stop();
|
||||
chrono["oneshot-doubles"].stop();
|
||||
))
|
||||
}
|
||||
|
||||
// COMPUTE SINGLES =================================================== {{{1
|
||||
OCD_Barrier(universe);
|
||||
if (!isFakeTuple(i)) {
|
||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap:singles"],
|
||||
WITH_CHRONO("oneshot-unwrap",
|
||||
WITH_CHRONO("unwrap",
|
||||
WITH_CHRONO("unwrap:singles",
|
||||
abhh.unwrapAll(abc);
|
||||
)))
|
||||
chrono["reorder"].start();
|
||||
WITH_CHRONO("reorder",
|
||||
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
||||
chrono["reorder"].stop();
|
||||
chrono["singles"].start();
|
||||
)
|
||||
WITH_CHRONO("singles",
|
||||
singlesContribution( No, Nv, abc
|
||||
, Tai.data()
|
||||
, abhh.unwrapSlice(Slice::AB, abc)
|
||||
, abhh.unwrapSlice(Slice::AC, abc)
|
||||
, abhh.unwrapSlice(Slice::BC, abc)
|
||||
, Zijk.data());
|
||||
chrono["singles"].stop();
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@ -410,12 +402,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
if (abc[1] == abc[2]) distinct--;
|
||||
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
||||
|
||||
chrono["energy"].start();
|
||||
WITH_CHRONO("energy",
|
||||
if ( distinct == 0)
|
||||
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
|
||||
else
|
||||
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
|
||||
chrono["energy"].stop();
|
||||
)
|
||||
|
||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||
tupleEnergies[abc] = tupleEnergy;
|
||||
@ -446,7 +438,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
// CLEANUP UNIONS ===================================================={{{1
|
||||
OCD_Barrier(universe);
|
||||
if (abcNext) {
|
||||
chrono["gc"].start();
|
||||
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
|
||||
for (auto& u: unions) {
|
||||
|
||||
@ -480,12 +471,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
|
||||
|
||||
}
|
||||
chrono["gc"].stop();
|
||||
}
|
||||
|
||||
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
|
||||
|
||||
chrono["iterations"].stop();
|
||||
Atrip::chrono["iterations"].stop();
|
||||
// ITERATION END ====================================================={{{1
|
||||
|
||||
}
|
||||
@ -523,15 +513,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
|
||||
// PRINT TIMINGS {{{1
|
||||
if (in.chrono)
|
||||
for (auto const& pair: chrono)
|
||||
for (auto const& pair: Atrip::chrono)
|
||||
LOG(0,"atrip:chrono") << pair.first << " "
|
||||
<< pair.second.count() << std::endl;
|
||||
|
||||
|
||||
LOG(0, "atrip:flops(doubles)")
|
||||
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n";
|
||||
<< nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n";
|
||||
LOG(0, "atrip:flops(iterations)")
|
||||
<< nIterations * doublesFlops / chrono["iterations"].count() << "\n";
|
||||
<< nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n";
|
||||
|
||||
// TODO: change the sign in the getEnergy routines
|
||||
return { - globalEnergy };
|
||||
|
||||
Loading…
Reference in New Issue
Block a user