From 79a3f99cb3a5ee4dd4c922861221f39e02954c47 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Thu, 21 Oct 2021 15:25:01 +0200 Subject: [PATCH] Update all chronos to use the static chrono --- atrip.org | 503 +++++++++++++++++------------------ include/atrip.hpp | 2 +- include/atrip/Atrip.hpp | 2 +- include/atrip/Blas.hpp | 2 +- include/atrip/Debug.hpp | 2 +- include/atrip/Equations.hpp | 241 ++++++++--------- include/atrip/RankMap.hpp | 2 +- include/atrip/Slice.hpp | 54 ++-- include/atrip/SliceUnion.hpp | 2 +- include/atrip/Tuples.hpp | 52 ++-- include/atrip/Unions.hpp | 2 +- include/atrip/Utils.hpp | 14 +- src/atrip/Atrip.cxx | 246 ++++++++--------- 13 files changed, 553 insertions(+), 571 deletions(-) diff --git a/atrip.org b/atrip.org index 76818d5..ccd1699 100644 --- a/atrip.org +++ b/atrip.org @@ -748,10 +748,10 @@ The chrono is just a simple wrapper for a high resolution clock that can be found in the =std::chrono= namespace of the standard library. #+begin_src c++ :tangle (atrip-utils-h) -#define WITH_CHRONO(__chrono, ...) \ - __chrono.start(); \ +#define WITH_CHRONO(__chrono_name, ...) \ + Atrip::chrono[__chrono_name].start(); \ __VA_ARGS__ \ - __chrono.stop(); + Atrip::chrono[__chrono_name].stop(); struct Timer { using Clock = std::chrono::high_resolution_clock; @@ -1549,12 +1549,18 @@ struct NaiveDistribution : public TuplesDistribution { WITH_RANK << "range = " << range.first << " -> " << range.second << std::endl; + std::vector result(range.second - range.first + 1, FAKE_TUPLE); + WITH_RANK << "number of global tuples = " << all.size() << std::endl; + WITH_RANK << "number of local tuples = " << result.size() << std::endl; - std::vector result(range.second - range.first, FAKE_TUPLE); - std::copy(all.begin() + range.first, + std::copy(range.first >= all.size() + ? all.end() + : all.begin() + range.first, + // -- range.second >= all.size() - ? all.end() - : all.begin() + range.first + range.second, + ? all.end() + : all.begin() + range.first + range.second, + // -- result.begin()); return result; } @@ -2467,54 +2473,51 @@ namespace atrip { , double const* TBChh // -- TIJK , double *Tijk - , atrip::Timings& chrono ) { - auto& t_reorder = chrono["doubles:reorder"]; const size_t a = abc[0], b = abc[1], c = abc[2] , NoNo = No*No, NoNv = No*Nv ; - #if defined(ATRIP_USE_DGEMM) - #define _IJK_(i, j, k) i + j*No + k*NoNo - #define REORDER(__II, __JJ, __KK) \ - t_reorder.start(); \ - for (size_t k = 0; k < No; k++) \ - for (size_t j = 0; j < No; j++) \ - for (size_t i = 0; i < No; i++) { \ - Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ - } \ - t_reorder.stop(); - #define DGEMM_PARTICLES(__A, __B) \ - atrip::dgemm_( "T" \ - , "N" \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&Nv \ - , &one \ - , __A \ - , (int const*)&Nv \ - , __B \ - , (int const*)&Nv \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); - #define DGEMM_HOLES(__A, __B, __TRANSB) \ - atrip::dgemm_( "N" \ - , __TRANSB \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&No \ - , &m_one \ - , __A \ - , (int const*)&NoNo \ - , __B \ - , (int const*)&No \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); +#if defined(ATRIP_USE_DGEMM) +#define _IJK_(i, j, k) i + j*No + k*NoNo +#define REORDER(__II, __JJ, __KK) \ + WITH_CHRONO("double:reorder", \ + for (size_t k = 0; k < No; k++) \ + for (size_t j = 0; j < No; j++) \ + for (size_t i = 0; i < No; i++) { \ + Tijk[_IJK_(i, j, k)] \ + += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ + } \ + ) +#define DGEMM_PARTICLES(__A, __B) \ + atrip::dgemm_("T", \ + "N", \ + (int const*)&NoNo, \ + (int const*)&No, \ + (int const*)&Nv, \ + &one, \ + __A, \ + (int const*)&Nv, \ + __B, \ + (int const*)&Nv, \ + &zero, \ + _t_buffer.data(), \ + (int const*)&NoNo); +#define DGEMM_HOLES(__A, __B, __TRANSB) \ + atrip::dgemm_("N", \ + __TRANSB, \ + (int const*)&NoNo, \ + (int const*)&No, \ + (int const*)&No, \ + &m_one, \ + __A, \ + (int const*)&NoNo, \ + __B, \ + (int const*)&No, \ + &zero, \ + _t_buffer.data(), \ + (int const*)&NoNo); using F = double; const size_t NoNoNo = No*NoNo; @@ -2522,88 +2525,86 @@ namespace atrip { _t_buffer.reserve(NoNoNo); F one{1.0}, m_one{-1.0}, zero{0.0}; - t_reorder.start(); - for (size_t k = 0; k < NoNoNo; k++) { - // zero the Tijk - Tijk[k] = 0.0; - } - t_reorder.stop(); + WITH_CHRONO("double:reorder", + for (size_t k = 0; k < NoNoNo; k++) { + Tijk[k] = 0.0; + }) - chrono["doubles:holes"].start(); - { // Holes part ============================================================ - // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 - chrono["doubles:holes:1"].start(); - DGEMM_HOLES(VhhhC, TABhh, "N") - REORDER(i, k, j) - chrono["doubles:holes:1"].stop(); - // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 - chrono["doubles:holes:2"].start(); - DGEMM_HOLES(VhhhC, TABhh, "T") - REORDER(j, k, i) - chrono["doubles:holes:2"].stop(); - // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 - chrono["doubles:holes:3"].start(); - DGEMM_HOLES(VhhhB, TAChh, "N") - REORDER(i, j, k) - chrono["doubles:holes:3"].stop(); - // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 - chrono["doubles:holes:4"].start(); - DGEMM_HOLES(VhhhB, TAChh, "T") - REORDER(k, j, i) - chrono["doubles:holes:4"].stop(); - // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 - chrono["doubles:holes:5"].start(); - DGEMM_HOLES(VhhhA, TBChh, "N") - REORDER(j, i, k) - chrono["doubles:holes:5"].stop(); - // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 - chrono["doubles:holes:6"].start(); - DGEMM_HOLES(VhhhA, TBChh, "T") - REORDER(k, i, j) - chrono["doubles:holes:6"].stop(); - } - chrono["doubles:holes"].stop(); + WITH_CHRONO("doubles:holes", + { // Holes part ================================================ + // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 + WITH_CHRONO("doubles:holes:1", + DGEMM_HOLES(VhhhC, TABhh, "N") + REORDER(i, k, j) + ) + // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 + WITH_CHRONO("doubles:holes:2", + DGEMM_HOLES(VhhhC, TABhh, "T") + REORDER(j, k, i) + ) + // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 + WITH_CHRONO("doubles:holes:3", + DGEMM_HOLES(VhhhB, TAChh, "N") + REORDER(i, j, k) + ) + // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 + WITH_CHRONO("doubles:holes:4", + DGEMM_HOLES(VhhhB, TAChh, "T") + REORDER(k, j, i) + ) + // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 + WITH_CHRONO("doubles:holes:5", + DGEMM_HOLES(VhhhA, TBChh, "N") + REORDER(j, i, k) + ) + // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 + WITH_CHRONO("doubles:holes:6", + DGEMM_HOLES(VhhhA, TBChh, "T") + REORDER(k, i, j) + ) + } + ) - chrono["doubles:particles"].start(); - { // Particle part ========================================================= - // TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0 - chrono["doubles:particles:1"].start(); - DGEMM_PARTICLES(TAphh, VBCph) - REORDER(i, j, k) - chrono["doubles:particles:1"].stop(); - // TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3 - chrono["doubles:particles:2"].start(); - DGEMM_PARTICLES(TAphh, VCBph) - REORDER(i, k, j) - chrono["doubles:particles:2"].stop(); - // TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5 - chrono["doubles:particles:3"].start(); - DGEMM_PARTICLES(TCphh, VABph) - REORDER(k, i, j) - chrono["doubles:particles:3"].stop(); - // TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2 - chrono["doubles:particles:4"].start(); - DGEMM_PARTICLES(TCphh, VBAph) - REORDER(k, j, i) - chrono["doubles:particles:4"].stop(); - // TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1 - chrono["doubles:particles:5"].start(); - DGEMM_PARTICLES(TBphh, VACph) - REORDER(j, i, k) - chrono["doubles:particles:5"].stop(); - // TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4 - chrono["doubles:particles:6"].start(); - DGEMM_PARTICLES(TBphh, VCAph) - REORDER(j, k, i) - chrono["doubles:particles:6"].stop(); - } - chrono["doubles:particles"].stop(); + WITH_CHRONO("doubles:particles", + { // Particle part =========================================== + // TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0 + WITH_CHRONO("doubles:particles:1", + DGEMM_PARTICLES(TAphh, VBCph) + REORDER(i, j, k) + ) + // TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3 + WITH_CHRONO("doubles:particles:2", + DGEMM_PARTICLES(TAphh, VCBph) + REORDER(i, k, j) + ) + // TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5 + WITH_CHRONO("doubles:particles:3", + DGEMM_PARTICLES(TCphh, VABph) + REORDER(k, i, j) + ) + // TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2 + WITH_CHRONO("doubles:particles:4", + DGEMM_PARTICLES(TCphh, VBAph) + REORDER(k, j, i) + ) + // TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1 + WITH_CHRONO("doubles:particles:5", + DGEMM_PARTICLES(TBphh, VACph) + REORDER(j, i, k) + ) + // TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4 + WITH_CHRONO("doubles:particles:6", + DGEMM_PARTICLES(TBphh, VCAph) + REORDER(j, k, i) + ) + } + ) - #undef REORDER - #undef DGEMM_HOLES - #undef DGEMM_PARTICLES - #undef _IJK_ - #else +#undef REORDER +#undef DGEMM_HOLES +#undef DGEMM_PARTICLES +#undef _IJK_ +#else for (size_t k = 0; k < No; k++) for (size_t j = 0; j < No; j++) for (size_t i = 0; i < No; i++){ @@ -2647,7 +2648,7 @@ namespace atrip { } } - #endif +#endif } } @@ -2777,9 +2778,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { const int rank = Atrip::rank; MPI_Comm universe = in.ei->wrld->comm; - // Timings in seconds ================================================{{{1 - Timings chrono{}; - const size_t No = in.ei->lens[0]; const size_t Nv = in.ea->lens[0]; LOG(0,"Atrip") << "No: " << No << "\n"; @@ -2819,20 +2817,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } - chrono["nv-slices"].start(); // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1 - LOG(0,"Atrip") << "BUILD NV-SLICES\n"; - TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - chrono["nv-slices"].stop(); + WITH_CHRONO("nv-slices", + LOG(0,"Atrip") << "BUILD NV-SLICES\n"; + TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ) - chrono["nv-nv-slices"].start(); // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1 - LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; - ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - chrono["nv-nv-slices"].stop(); + WITH_CHRONO("nv-nv-slices", + LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; + ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ) // all tensors std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; @@ -2849,7 +2847,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } LOG(0,"Atrip") << "BUILDING TUPLE LIST\n"; - WITH_CHRONO(chrono["tuples:build"], + WITH_CHRONO("tuples:build", auto const tuplesList = distribution->getTuples(Nv, universe); ) size_t nIterations = tuplesList.size(); @@ -2872,45 +2870,42 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { auto communicateDatabase = [ &unions , np - , &chrono ] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database { - chrono["db:comm:type:do"].start(); - auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); - chrono["db:comm:type:do"].stop(); + WITH_CHRONO("db:comm:type:do", + auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); + ) - chrono["db:comm:ldb"].start(); - Slice::LocalDatabase ldb; - - for (auto const& tensor: unions) { - auto const& tensorDb = tensor->buildLocalDatabase(abc); - ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); - } - chrono["db:comm:ldb"].stop(); + WITH_CHRONO("db:comm:ldb", + Slice::LocalDatabase ldb; + for (auto const& tensor: unions) { + auto const& tensorDb = tensor->buildLocalDatabase(abc); + ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); + } + ) Slice::Database db(np * ldb.size(), ldb[0]); - chrono["oneshot-db:comm:allgather"].start(); - chrono["db:comm:allgather"].start(); - MPI_Allgather( ldb.data() - , ldb.size() - , MPI_LDB_ELEMENT - , db.data() - , ldb.size() - , MPI_LDB_ELEMENT - , c); - chrono["db:comm:allgather"].stop(); - chrono["oneshot-db:comm:allgather"].stop(); + WITH_CHRONO("oneshot-db:comm:allgather", + WITH_CHRONO("db:comm:allgather", + MPI_Allgather( ldb.data() + , ldb.size() + , MPI_LDB_ELEMENT + , db.data() + , ldb.size() + , MPI_LDB_ELEMENT + , c); + )) - chrono["db:comm:type:free"].start(); - MPI_Type_free(&MPI_LDB_ELEMENT); - chrono["db:comm:type:free"].stop(); + WITH_CHRONO("db:comm:type:free", + MPI_Type_free(&MPI_LDB_ELEMENT); + ) return db; }; auto doIOPhase - = [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) { + = [&unions, &rank, &np, &universe] (Slice::Database const& db) { const size_t localDBLength = db.size() / np; @@ -2946,9 +2941,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { << "\n" ; - chrono["db:io:recv"].start(); - u.receive(el.info, recvTag); - chrono["db:io:recv"].stop(); + WITH_CHRONO("db:io:recv", + u.receive(el.info, recvTag); + ) } // recv } @@ -2982,9 +2977,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { << "\n" ; - chrono["db:io:send"].start(); - u.send(otherRank, el.info, sendTag); - chrono["db:io:send"].stop(); + WITH_CHRONO("db:io:send", + u.send(otherRank, el.info, sendTag); + ) } // send phase @@ -3015,14 +3010,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { ; i < tuplesList.size() ; i++, iteration++ ) { - chrono["iterations"].start(); + Atrip::chrono["iterations"].start(); // check overhead from chrono over all iterations - chrono["start:stop"].start(); chrono["start:stop"].stop(); + WITH_CHRONO("start:stop", {}) // check overhead of doing a barrier at the beginning - WITH_CHRONO(chrono["oneshot-mpi:barrier"], - WITH_CHRONO(chrono["mpi:barrier"], + WITH_CHRONO("oneshot-mpi:barrier", + WITH_CHRONO("mpi:barrier", if (in.barrier) MPI_Barrier(universe); )) @@ -3030,15 +3025,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { LOG(0,"Atrip") << "iteration " << iteration << " [" << 100 * iteration / nIterations << "%]" - << " (" << doublesFlops * iteration / chrono["doubles"].count() + << " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count() << "GF)" - << " (" << doublesFlops * iteration / chrono["iterations"].count() + << " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count() << "GF)" << " ===========================\n"; // PRINT TIMINGS if (in.chrono) - for (auto const& pair: chrono) + for (auto const& pair: Atrip::chrono) LOG(1, " ") << pair.first << " :: " << pair.second.count() << std::endl; @@ -3055,13 +3050,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { : &tuplesList[i + 1] ; - chrono["with_rank"].start(); - WITH_RANK << " :it " << iteration - << " :abc " << pretty_print(abc) - << " :abcN " - << (abcNext ? pretty_print(*abcNext) : "None") - << "\n"; - chrono["with_rank"].stop(); + WITH_CHRONO("with_rank", + WITH_RANK << " :it " << iteration + << " :abc " << pretty_print(abc) + << " :abcN " + << (abcNext ? pretty_print(*abcNext) : "None") + << "\n"; + ) // COMM FIRST DATABASE ================================================{{{1 @@ -3074,19 +3069,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { WITH_RANK << "__first__:first database io phase DONE\n"; WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; for (auto& u: unions) u->unwrapAll(abc); - WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n"; + WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n"; MPI_Barrier(universe); } // COMM NEXT DATABASE ================================================={{{1 if (abcNext) { WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; - chrono["db:comm"].start(); - const auto db = communicateDatabase(*abcNext, universe); - chrono["db:comm"].stop(); - chrono["db:io"].start(); - doIOPhase(db); - chrono["db:io"].stop(); + WITH_CHRONO("db:comm", + const auto db = communicateDatabase(*abcNext, universe); + ) + WITH_CHRONO("db:io", + doIOPhase(db); + ) WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n"; } @@ -3094,63 +3089,61 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { OCD_Barrier(universe); if (!isFakeTuple(i)) { WITH_RANK << iteration << "-th doubles\n"; - WITH_CHRONO(chrono["oneshot-unwrap"], - WITH_CHRONO(chrono["unwrap"], - WITH_CHRONO(chrono["unwrap:doubles"], + WITH_CHRONO("oneshot-unwrap", + WITH_CHRONO("unwrap", + WITH_CHRONO("unwrap:doubles", for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) { u->unwrapAll(abc); } ))) - chrono["oneshot-doubles"].start(); - chrono["doubles"].start(); - doublesContribution( abc, (size_t)No, (size_t)Nv - // -- VABCI - , abph.unwrapSlice(Slice::AB, abc) - , abph.unwrapSlice(Slice::AC, abc) - , abph.unwrapSlice(Slice::BC, abc) - , abph.unwrapSlice(Slice::BA, abc) - , abph.unwrapSlice(Slice::CA, abc) - , abph.unwrapSlice(Slice::CB, abc) - // -- VHHHA - , hhha.unwrapSlice(Slice::A, abc) - , hhha.unwrapSlice(Slice::B, abc) - , hhha.unwrapSlice(Slice::C, abc) - // -- TA - , taphh.unwrapSlice(Slice::A, abc) - , taphh.unwrapSlice(Slice::B, abc) - , taphh.unwrapSlice(Slice::C, abc) - // -- TABIJ - , tabhh.unwrapSlice(Slice::AB, abc) - , tabhh.unwrapSlice(Slice::AC, abc) - , tabhh.unwrapSlice(Slice::BC, abc) - // -- TIJK - , Tijk.data() - , chrono - ); - WITH_RANK << iteration << "-th doubles done\n"; - chrono["doubles"].stop(); - chrono["oneshot-doubles"].stop(); + WITH_CHRONO("oneshot-doubles", + WITH_CHRONO("doubles", + doublesContribution( abc, (size_t)No, (size_t)Nv + // -- VABCI + , abph.unwrapSlice(Slice::AB, abc) + , abph.unwrapSlice(Slice::AC, abc) + , abph.unwrapSlice(Slice::BC, abc) + , abph.unwrapSlice(Slice::BA, abc) + , abph.unwrapSlice(Slice::CA, abc) + , abph.unwrapSlice(Slice::CB, abc) + // -- VHHHA + , hhha.unwrapSlice(Slice::A, abc) + , hhha.unwrapSlice(Slice::B, abc) + , hhha.unwrapSlice(Slice::C, abc) + // -- TA + , taphh.unwrapSlice(Slice::A, abc) + , taphh.unwrapSlice(Slice::B, abc) + , taphh.unwrapSlice(Slice::C, abc) + // -- TABIJ + , tabhh.unwrapSlice(Slice::AB, abc) + , tabhh.unwrapSlice(Slice::AC, abc) + , tabhh.unwrapSlice(Slice::BC, abc) + // -- TIJK + , Tijk.data() + ); + WITH_RANK << iteration << "-th doubles done\n"; + )) } // COMPUTE SINGLES =================================================== {{{1 OCD_Barrier(universe); if (!isFakeTuple(i)) { - WITH_CHRONO(chrono["oneshot-unwrap"], - WITH_CHRONO(chrono["unwrap"], - WITH_CHRONO(chrono["unwrap:singles"], + WITH_CHRONO("oneshot-unwrap", + WITH_CHRONO("unwrap", + WITH_CHRONO("unwrap:singles", abhh.unwrapAll(abc); ))) - chrono["reorder"].start(); - for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; - chrono["reorder"].stop(); - chrono["singles"].start(); - singlesContribution( No, Nv, abc - , Tai.data() - , abhh.unwrapSlice(Slice::AB, abc) - , abhh.unwrapSlice(Slice::AC, abc) - , abhh.unwrapSlice(Slice::BC, abc) - , Zijk.data()); - chrono["singles"].stop(); + WITH_CHRONO("reorder", + for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; + ) + WITH_CHRONO("singles", + singlesContribution( No, Nv, abc + , Tai.data() + , abhh.unwrapSlice(Slice::AB, abc) + , abhh.unwrapSlice(Slice::AC, abc) + , abhh.unwrapSlice(Slice::BC, abc) + , Zijk.data()); + ) } @@ -3163,12 +3156,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { if (abc[1] == abc[2]) distinct--; const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); - chrono["energy"].start(); - if ( distinct == 0) - tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); - else - tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); - chrono["energy"].stop(); + WITH_CHRONO("energy", + if ( distinct == 0) + tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); + else + tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); + ) #if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES) tupleEnergies[abc] = tupleEnergy; @@ -3199,7 +3192,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // CLEANUP UNIONS ===================================================={{{1 OCD_Barrier(universe); if (abcNext) { - chrono["gc"].start(); WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n"; for (auto& u: unions) { @@ -3233,12 +3225,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } - chrono["gc"].stop(); } WITH_RANK << iteration << "-th cleaning up....... DONE\n"; - chrono["iterations"].stop(); + Atrip::chrono["iterations"].stop(); // ITERATION END ====================================================={{{1 } @@ -3276,15 +3267,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // PRINT TIMINGS {{{1 if (in.chrono) - for (auto const& pair: chrono) + for (auto const& pair: Atrip::chrono) LOG(0,"atrip:chrono") << pair.first << " " << pair.second.count() << std::endl; LOG(0, "atrip:flops(doubles)") - << nIterations * doublesFlops / chrono["doubles"].count() << "\n"; + << nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n"; LOG(0, "atrip:flops(iterations)") - << nIterations * doublesFlops / chrono["iterations"].count() << "\n"; + << nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n"; // TODO: change the sign in the getEnergy routines return { - globalEnergy }; diff --git a/include/atrip.hpp b/include/atrip.hpp index b3ef823..aec4738 100644 --- a/include/atrip.hpp +++ b/include/atrip.hpp @@ -1,4 +1,4 @@ -// [[file:../atrip.org::*Include header][Include header:1]] +// [[file:~/atrip/atrip.org::*Include%20header][Include header:1]] #pragma once #include diff --git a/include/atrip/Atrip.hpp b/include/atrip/Atrip.hpp index bdcaec9..438d03e 100644 --- a/include/atrip/Atrip.hpp +++ b/include/atrip/Atrip.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Atrip][Atrip:1]] +// [[file:~/atrip/atrip.org::*Atrip][Atrip:1]] #pragma once #include #include diff --git a/include/atrip/Blas.hpp b/include/atrip/Blas.hpp index fa63028..654d4ff 100644 --- a/include/atrip/Blas.hpp +++ b/include/atrip/Blas.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Blas][Blas:1]] +// [[file:~/atrip/atrip.org::*Blas][Blas:1]] #pragma once namespace atrip { extern "C" { diff --git a/include/atrip/Debug.hpp b/include/atrip/Debug.hpp index ba04314..4f667a1 100644 --- a/include/atrip/Debug.hpp +++ b/include/atrip/Debug.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Debug][Debug:1]] +// [[file:~/atrip/atrip.org::*Debug][Debug:1]] #pragma once #define ATRIP_BENCHMARK //#define ATRIP_DONT_SLICE diff --git a/include/atrip/Equations.hpp b/include/atrip/Equations.hpp index b8496f6..a4e298c 100644 --- a/include/atrip/Equations.hpp +++ b/include/atrip/Equations.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Equations][Equations:1]] +// [[file:~/atrip/atrip.org::*Equations][Equations:1]] #pragma once #include @@ -150,54 +150,51 @@ namespace atrip { , double const* TBChh // -- TIJK , double *Tijk - , atrip::Timings& chrono ) { - auto& t_reorder = chrono["doubles:reorder"]; const size_t a = abc[0], b = abc[1], c = abc[2] , NoNo = No*No, NoNv = No*Nv ; - #if defined(ATRIP_USE_DGEMM) - #define _IJK_(i, j, k) i + j*No + k*NoNo - #define REORDER(__II, __JJ, __KK) \ - t_reorder.start(); \ - for (size_t k = 0; k < No; k++) \ - for (size_t j = 0; j < No; j++) \ - for (size_t i = 0; i < No; i++) { \ - Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ - } \ - t_reorder.stop(); - #define DGEMM_PARTICLES(__A, __B) \ - atrip::dgemm_( "T" \ - , "N" \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&Nv \ - , &one \ - , __A \ - , (int const*)&Nv \ - , __B \ - , (int const*)&Nv \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); - #define DGEMM_HOLES(__A, __B, __TRANSB) \ - atrip::dgemm_( "N" \ - , __TRANSB \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&No \ - , &m_one \ - , __A \ - , (int const*)&NoNo \ - , __B \ - , (int const*)&No \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); +#if defined(ATRIP_USE_DGEMM) +#define _IJK_(i, j, k) i + j*No + k*NoNo +#define REORDER(__II, __JJ, __KK) \ + WITH_CHRONO("double:reorder", \ + for (size_t k = 0; k < No; k++) \ + for (size_t j = 0; j < No; j++) \ + for (size_t i = 0; i < No; i++) { \ + Tijk[_IJK_(i, j, k)] \ + += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ + } \ + ) +#define DGEMM_PARTICLES(__A, __B) \ + atrip::dgemm_("T", \ + "N", \ + (int const*)&NoNo, \ + (int const*)&No, \ + (int const*)&Nv, \ + &one, \ + __A, \ + (int const*)&Nv, \ + __B, \ + (int const*)&Nv, \ + &zero, \ + _t_buffer.data(), \ + (int const*)&NoNo); +#define DGEMM_HOLES(__A, __B, __TRANSB) \ + atrip::dgemm_("N", \ + __TRANSB, \ + (int const*)&NoNo, \ + (int const*)&No, \ + (int const*)&No, \ + &m_one, \ + __A, \ + (int const*)&NoNo, \ + __B, \ + (int const*)&No, \ + &zero, \ + _t_buffer.data(), \ + (int const*)&NoNo); using F = double; const size_t NoNoNo = No*NoNo; @@ -205,88 +202,86 @@ namespace atrip { _t_buffer.reserve(NoNoNo); F one{1.0}, m_one{-1.0}, zero{0.0}; - t_reorder.start(); - for (size_t k = 0; k < NoNoNo; k++) { - // zero the Tijk - Tijk[k] = 0.0; - } - t_reorder.stop(); + WITH_CHRONO("double:reorder", + for (size_t k = 0; k < NoNoNo; k++) { + Tijk[k] = 0.0; + }) - chrono["doubles:holes"].start(); - { // Holes part ============================================================ - // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 - chrono["doubles:holes:1"].start(); - DGEMM_HOLES(VhhhC, TABhh, "N") - REORDER(i, k, j) - chrono["doubles:holes:1"].stop(); - // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 - chrono["doubles:holes:2"].start(); - DGEMM_HOLES(VhhhC, TABhh, "T") - REORDER(j, k, i) - chrono["doubles:holes:2"].stop(); - // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 - chrono["doubles:holes:3"].start(); - DGEMM_HOLES(VhhhB, TAChh, "N") - REORDER(i, j, k) - chrono["doubles:holes:3"].stop(); - // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 - chrono["doubles:holes:4"].start(); - DGEMM_HOLES(VhhhB, TAChh, "T") - REORDER(k, j, i) - chrono["doubles:holes:4"].stop(); - // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 - chrono["doubles:holes:5"].start(); - DGEMM_HOLES(VhhhA, TBChh, "N") - REORDER(j, i, k) - chrono["doubles:holes:5"].stop(); - // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 - chrono["doubles:holes:6"].start(); - DGEMM_HOLES(VhhhA, TBChh, "T") - REORDER(k, i, j) - chrono["doubles:holes:6"].stop(); - } - chrono["doubles:holes"].stop(); + WITH_CHRONO("doubles:holes", + { // Holes part ================================================ + // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 + WITH_CHRONO("doubles:holes:1", + DGEMM_HOLES(VhhhC, TABhh, "N") + REORDER(i, k, j) + ) + // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 + WITH_CHRONO("doubles:holes:2", + DGEMM_HOLES(VhhhC, TABhh, "T") + REORDER(j, k, i) + ) + // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 + WITH_CHRONO("doubles:holes:3", + DGEMM_HOLES(VhhhB, TAChh, "N") + REORDER(i, j, k) + ) + // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 + WITH_CHRONO("doubles:holes:4", + DGEMM_HOLES(VhhhB, TAChh, "T") + REORDER(k, j, i) + ) + // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 + WITH_CHRONO("doubles:holes:5", + DGEMM_HOLES(VhhhA, TBChh, "N") + REORDER(j, i, k) + ) + // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 + WITH_CHRONO("doubles:holes:6", + DGEMM_HOLES(VhhhA, TBChh, "T") + REORDER(k, i, j) + ) + } + ) - chrono["doubles:particles"].start(); - { // Particle part ========================================================= - // TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0 - chrono["doubles:particles:1"].start(); - DGEMM_PARTICLES(TAphh, VBCph) - REORDER(i, j, k) - chrono["doubles:particles:1"].stop(); - // TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3 - chrono["doubles:particles:2"].start(); - DGEMM_PARTICLES(TAphh, VCBph) - REORDER(i, k, j) - chrono["doubles:particles:2"].stop(); - // TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5 - chrono["doubles:particles:3"].start(); - DGEMM_PARTICLES(TCphh, VABph) - REORDER(k, i, j) - chrono["doubles:particles:3"].stop(); - // TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2 - chrono["doubles:particles:4"].start(); - DGEMM_PARTICLES(TCphh, VBAph) - REORDER(k, j, i) - chrono["doubles:particles:4"].stop(); - // TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1 - chrono["doubles:particles:5"].start(); - DGEMM_PARTICLES(TBphh, VACph) - REORDER(j, i, k) - chrono["doubles:particles:5"].stop(); - // TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4 - chrono["doubles:particles:6"].start(); - DGEMM_PARTICLES(TBphh, VCAph) - REORDER(j, k, i) - chrono["doubles:particles:6"].stop(); - } - chrono["doubles:particles"].stop(); + WITH_CHRONO("doubles:particles", + { // Particle part =========================================== + // TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0 + WITH_CHRONO("doubles:particles:1", + DGEMM_PARTICLES(TAphh, VBCph) + REORDER(i, j, k) + ) + // TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3 + WITH_CHRONO("doubles:particles:2", + DGEMM_PARTICLES(TAphh, VCBph) + REORDER(i, k, j) + ) + // TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5 + WITH_CHRONO("doubles:particles:3", + DGEMM_PARTICLES(TCphh, VABph) + REORDER(k, i, j) + ) + // TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2 + WITH_CHRONO("doubles:particles:4", + DGEMM_PARTICLES(TCphh, VBAph) + REORDER(k, j, i) + ) + // TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1 + WITH_CHRONO("doubles:particles:5", + DGEMM_PARTICLES(TBphh, VACph) + REORDER(j, i, k) + ) + // TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4 + WITH_CHRONO("doubles:particles:6", + DGEMM_PARTICLES(TBphh, VCAph) + REORDER(j, k, i) + ) + } + ) - #undef REORDER - #undef DGEMM_HOLES - #undef DGEMM_PARTICLES - #undef _IJK_ - #else +#undef REORDER +#undef DGEMM_HOLES +#undef DGEMM_PARTICLES +#undef _IJK_ +#else for (size_t k = 0; k < No; k++) for (size_t j = 0; j < No; j++) for (size_t i = 0; i < No; i++){ @@ -330,7 +325,7 @@ namespace atrip { } } - #endif +#endif } } diff --git a/include/atrip/RankMap.hpp b/include/atrip/RankMap.hpp index 82bb674..cc613ae 100644 --- a/include/atrip/RankMap.hpp +++ b/include/atrip/RankMap.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]] +// [[file:~/atrip/atrip.org::*The%20rank%20mapping][The rank mapping:1]] #pragma once #include diff --git a/include/atrip/Slice.hpp b/include/atrip/Slice.hpp index caafefc..6f16c06 100644 --- a/include/atrip/Slice.hpp +++ b/include/atrip/Slice.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Prolog][Prolog:1]] +// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]] #pragma once #include #include @@ -16,11 +16,11 @@ struct Slice { using F = double; // Prolog:1 ends here -// [[file:../../atrip.org::*Location][Location:1]] +// [[file:~/atrip/atrip.org::*Location][Location:1]] struct Location { size_t rank; size_t source; }; // Location:1 ends here -// [[file:../../atrip.org::*Type][Type:1]] +// [[file:~/atrip/atrip.org::*Type][Type:1]] enum Type { A = 10 , B @@ -38,7 +38,7 @@ enum Type }; // Type:1 ends here -// [[file:../../atrip.org::*State][State:1]] +// [[file:~/atrip/atrip.org::*State][State:1]] enum State { Fetch = 0, Dispatched = 2, @@ -49,7 +49,7 @@ enum State { }; // State:1 ends here -// [[file:../../atrip.org::*The Info structure][The Info structure:1]] +// [[file:~/atrip/atrip.org::*The%20Info%20structure][The Info structure:1]] struct Info { // which part of a,b,c the slice holds PartialTuple tuple; @@ -73,7 +73,7 @@ struct Info { using Ty_x_Tu = std::pair< Type, PartialTuple >; // The Info structure:1 ends here -// [[file:../../atrip.org::*Name][Name:1]] +// [[file:~/atrip/atrip.org::*Name][Name:1]] enum Name { TA = 100 , VIJKA = 101 @@ -83,19 +83,19 @@ enum Name }; // Name:1 ends here -// [[file:../../atrip.org::*Database][Database:1]] +// [[file:~/atrip/atrip.org::*Database][Database:1]] struct LocalDatabaseElement { Slice::Name name; Slice::Info info; }; // Database:1 ends here -// [[file:../../atrip.org::*Database][Database:2]] +// [[file:~/atrip/atrip.org::*Database][Database:2]] using LocalDatabase = std::vector; using Database = LocalDatabase; // Database:2 ends here -// [[file:../../atrip.org::*MPI Types][MPI Types:1]] +// [[file:~/atrip/atrip.org::*MPI%20Types][MPI Types:1]] struct mpi { static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) { @@ -185,7 +185,7 @@ struct mpi { }; // MPI Types:1 ends here -// [[file:../../atrip.org::*Static utilities][Static utilities:1]] +// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:1]] static PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) { switch (sliceType) { @@ -203,7 +203,7 @@ PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) { } // Static utilities:1 ends here -// [[file:../../atrip.org::*Static utilities][Static utilities:2]] +// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:2]] static std::vector hasRecycledReferencingToIt ( std::vector &slices , Info const& info @@ -220,7 +220,7 @@ static std::vector hasRecycledReferencingToIt } // Static utilities:2 ends here -// [[file:../../atrip.org::*Static utilities][Static utilities:3]] +// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:3]] static Slice& findOneByType(std::vector &slices, Slice::Type type) { const auto sliceIt = std::find_if(slices.begin(), slices.end(), @@ -236,7 +236,7 @@ static Slice& findOneByType(std::vector &slices, Slice::Type type) { } // Static utilities:3 ends here -// [[file:../../atrip.org::*Static utilities][Static utilities:4]] +// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:4]] static Slice& findRecycledSource (std::vector &slices, Slice::Info info) { const auto sliceIt @@ -262,7 +262,7 @@ findRecycledSource (std::vector &slices, Slice::Info info) { } // Static utilities:4 ends here -// [[file:../../atrip.org::*Static utilities][Static utilities:5]] +// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:5]] static Slice& findByTypeAbc ( std::vector &slices , Slice::Type type @@ -292,7 +292,7 @@ static Slice& findByTypeAbc } // Static utilities:5 ends here -// [[file:../../atrip.org::*Static utilities][Static utilities:6]] +// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:6]] static Slice& findByInfo(std::vector &slices, Slice::Info const& info) { const auto sliceIt @@ -315,30 +315,30 @@ static Slice& findByInfo(std::vector &slices, } // Static utilities:6 ends here -// [[file:../../atrip.org::*Attributes][Attributes:1]] +// [[file:~/atrip/atrip.org::*Attributes][Attributes:1]] Info info; // Attributes:1 ends here -// [[file:../../atrip.org::*Attributes][Attributes:2]] +// [[file:~/atrip/atrip.org::*Attributes][Attributes:2]] F *data; // Attributes:2 ends here -// [[file:../../atrip.org::*Attributes][Attributes:3]] +// [[file:~/atrip/atrip.org::*Attributes][Attributes:3]] MPI_Request request; // Attributes:3 ends here -// [[file:../../atrip.org::*Attributes][Attributes:4]] +// [[file:~/atrip/atrip.org::*Attributes][Attributes:4]] const size_t size; // Attributes:4 ends here -// [[file:../../atrip.org::*Member functions][Member functions:1]] +// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:1]] void markReady() noexcept { info.state = Ready; info.recycling = Blank; } // Member functions:1 ends here -// [[file:../../atrip.org::*Member functions][Member functions:2]] +// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:2]] bool isUnwrapped() const noexcept { return info.state == Ready || info.state == SelfSufficient @@ -346,7 +346,7 @@ bool isUnwrapped() const noexcept { } // Member functions:2 ends here -// [[file:../../atrip.org::*Member functions][Member functions:3]] +// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:3]] bool isUnwrappable() const noexcept { return isUnwrapped() || info.state == Recycled @@ -379,7 +379,7 @@ inline bool isFree() const noexcept { } // Member functions:3 ends here -// [[file:../../atrip.org::*Member functions][Member functions:4]] +// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:4]] inline bool isRecyclable() const noexcept { return ( info.state == Dispatched || info.state == Ready @@ -390,7 +390,7 @@ inline bool isRecyclable() const noexcept { } // Member functions:4 ends here -// [[file:../../atrip.org::*Member functions][Member functions:5]] +// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:5]] inline bool hasValidDataPointer() const noexcept { return data != nullptr && info.state != Acceptor @@ -399,7 +399,7 @@ inline bool hasValidDataPointer() const noexcept { } // Member functions:5 ends here -// [[file:../../atrip.org::*Member functions][Member functions:6]] +// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:6]] void unwrapAndMarkReady() { if (info.state == Ready) return; if (info.state != Dispatched) @@ -431,7 +431,7 @@ void unwrapAndMarkReady() { } // Member functions:6 ends here -// [[file:../../atrip.org::*Epilog][Epilog:1]] +// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]] Slice(size_t size_) : info({}) , data(nullptr) @@ -442,7 +442,7 @@ Slice(size_t size_) }; // struct Slice // Epilog:1 ends here -// [[file:../../atrip.org::*Debug][Debug:1]] +// [[file:~/atrip/atrip.org::*Debug][Debug:1]] std::ostream& operator<<(std::ostream& out, Slice::Location const& v) { // TODO: remove me out << "{.r(" << v.rank << "), .s(" << v.source << ")};"; diff --git a/include/atrip/SliceUnion.hpp b/include/atrip/SliceUnion.hpp index 060dcc2..0150745 100644 --- a/include/atrip/SliceUnion.hpp +++ b/include/atrip/SliceUnion.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*The slice union][The slice union:1]] +// [[file:~/atrip/atrip.org::*The%20slice%20union][The slice union:1]] #pragma once #include #include diff --git a/include/atrip/Tuples.hpp b/include/atrip/Tuples.hpp index f15d1c6..ea54dbe 100644 --- a/include/atrip/Tuples.hpp +++ b/include/atrip/Tuples.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Prolog][Prolog:1]] +// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]] #pragma once #include @@ -21,7 +21,7 @@ namespace atrip { // Prolog:1 ends here -// [[file:../../atrip.org::*Tuples types][Tuples types:1]] +// [[file:~/atrip/atrip.org::*Tuples%20types][Tuples types:1]] using ABCTuple = std::array; using PartialTuple = std::array; using ABCTuples = std::vector; @@ -29,14 +29,14 @@ using ABCTuples = std::vector; constexpr ABCTuple FAKE_TUPLE = {0, 0, 0}; // Tuples types:1 ends here -// [[file:../../atrip.org::*Distributing the tuples][Distributing the tuples:1]] +// [[file:~/atrip/atrip.org::*Distributing%20the%20tuples][Distributing the tuples:1]] struct TuplesDistribution { virtual ABCTuples getTuples(size_t Nv, MPI_Comm universe) = 0; virtual bool tupleIsFake(ABCTuple const& t) { return t == FAKE_TUPLE; } }; // Distributing the tuples:1 ends here -// [[file:../../atrip.org::*Naive list][Naive list:1]] +// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:1]] ABCTuples getTuplesList(size_t Nv) { const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv; ABCTuples result(n); @@ -54,7 +54,7 @@ ABCTuples getTuplesList(size_t Nv) { } // Naive list:1 ends here -// [[file:../../atrip.org::*Naive list][Naive list:2]] +// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:2]] std::pair getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) { @@ -91,7 +91,7 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) { } // Naive list:2 ends here -// [[file:../../atrip.org::*Naive list][Naive list:3]] +// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:3]] struct NaiveDistribution : public TuplesDistribution { ABCTuples getTuples(size_t Nv, MPI_Comm universe) override { int rank, np; @@ -113,23 +113,29 @@ struct NaiveDistribution : public TuplesDistribution { WITH_RANK << "range = " << range.first << " -> " << range.second << std::endl; + std::vector result(range.second - range.first + 1, FAKE_TUPLE); + WITH_RANK << "number of global tuples = " << all.size() << std::endl; + WITH_RANK << "number of local tuples = " << result.size() << std::endl; - std::vector result(range.second - range.first, FAKE_TUPLE); - std::copy(all.begin() + range.first, + std::copy(range.first >= all.size() + ? all.end() + : all.begin() + range.first, + // -- range.second >= all.size() - ? all.end() - : all.begin() + range.first + range.second, + ? all.end() + : all.begin() + range.first + range.second, + // -- result.begin()); return result; } }; // Naive list:3 ends here -// [[file:../../atrip.org::*Prolog][Prolog:1]] +// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]] namespace group_and_sort { // Prolog:1 ends here -// [[file:../../atrip.org::*Node information][Node information:1]] +// [[file:~/atrip/atrip.org::*Node%20information][Node information:1]] std::vector getNodeNames(MPI_Comm comm){ int rank, np; MPI_Comm_rank(comm, &rank); @@ -169,7 +175,7 @@ std::vector getNodeNames(MPI_Comm comm){ } // Node information:1 ends here -// [[file:../../atrip.org::*Node information][Node information:2]] +// [[file:~/atrip/atrip.org::*Node%20information][Node information:2]] struct RankInfo { const std::string name; const size_t nodeId; @@ -208,7 +214,7 @@ getNodeInfos(std::vector const& nodeNames) { } // Node information:2 ends here -// [[file:../../atrip.org::*Utils][Utils:1]] +// [[file:~/atrip/atrip.org::*Utils][Utils:1]] // Provides the node on which the slice-element is found // Right now we distribute the slices in a round robin fashion // over the different nodes (NOTE: not mpi ranks but nodes) @@ -239,7 +245,7 @@ std::vector getTupleNodes(ABCTuple t, size_t nNodes) { } // Utils:1 ends here -// [[file:../../atrip.org::*Distribution][Distribution:1]] +// [[file:~/atrip/atrip.org::*Distribution][Distribution:1]] std::vector specialDistribution(Info info, std::vector const& allTuples) { @@ -380,7 +386,7 @@ std::vector fetchElement(ABCTuple cur, ABCTuple suc){ } // Distribution:1 ends here -// [[file:../../atrip.org::*Main][Main:1]] +// [[file:~/atrip/atrip.org::*Main][Main:1]] std::vector main(MPI_Comm universe, size_t Nv) { int rank, np; @@ -432,7 +438,7 @@ std::vector main(MPI_Comm universe, size_t Nv) { MPI_Comm_split(universe, color, key, &INTRA_COMM); // Main:1 ends here -// [[file:../../atrip.org::*Main][Main:2]] +// [[file:~/atrip/atrip.org::*Main][Main:2]] const size_t tuplesPerRankLocal = nodeTuples.size() / nodeInfos[rank].ranksPerNode @@ -456,7 +462,7 @@ MPI_Bcast(&tuplesPerRankGlobal, universe); // Main:2 ends here -// [[file:../../atrip.org::*Main][Main:3]] +// [[file:~/atrip/atrip.org::*Main][Main:3]] size_t const totalTuplesLocal = tuplesPerRankLocal * nodeInfos[rank].ranksPerNode; @@ -467,7 +473,7 @@ if (makeDistribution) FAKE_TUPLE); // Main:3 ends here -// [[file:../../atrip.org::*Main][Main:4]] +// [[file:~/atrip/atrip.org::*Main][Main:4]] { std::vector const sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal); @@ -503,7 +509,7 @@ if (makeDistribution) } // Main:4 ends here -// [[file:../../atrip.org::*Main][Main:5]] +// [[file:~/atrip/atrip.org::*Main][Main:5]] result.insert(result.end(), tuplesPerRankGlobal - result.size(), FAKE_TUPLE); @@ -513,7 +519,7 @@ result.insert(result.end(), } // Main:5 ends here -// [[file:../../atrip.org::*Interface][Interface:1]] +// [[file:~/atrip/atrip.org::*Interface][Interface:1]] struct Distribution : public TuplesDistribution { ABCTuples getTuples(size_t Nv, MPI_Comm universe) override { return main(universe, Nv); @@ -521,10 +527,10 @@ struct Distribution : public TuplesDistribution { }; // Interface:1 ends here -// [[file:../../atrip.org::*Epilog][Epilog:1]] +// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]] } // namespace group_and_sort // Epilog:1 ends here -// [[file:../../atrip.org::*Epilog][Epilog:1]] +// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]] } // Epilog:1 ends here diff --git a/include/atrip/Unions.hpp b/include/atrip/Unions.hpp index de924ee..290c584 100644 --- a/include/atrip/Unions.hpp +++ b/include/atrip/Unions.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Unions][Unions:1]] +// [[file:~/atrip/atrip.org::*Unions][Unions:1]] #pragma once #include diff --git a/include/atrip/Utils.hpp b/include/atrip/Utils.hpp index 85eec8f..b21e5fa 100644 --- a/include/atrip/Utils.hpp +++ b/include/atrip/Utils.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Prolog][Prolog:1]] +// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]] #pragma once #include #include @@ -11,7 +11,7 @@ namespace atrip { // Prolog:1 ends here -// [[file:../../atrip.org::*Pretty printing][Pretty printing:1]] +// [[file:~/atrip/atrip.org::*Pretty%20printing][Pretty printing:1]] template std::string pretty_print(T&& value) { std::stringstream stream; @@ -22,11 +22,11 @@ template } // Pretty printing:1 ends here -// [[file:../../atrip.org::*Chrono][Chrono:1]] -#define WITH_CHRONO(__chrono, ...) \ - __chrono.start(); \ +// [[file:~/atrip/atrip.org::*Chrono][Chrono:1]] +#define WITH_CHRONO(__chrono_name, ...) \ + Atrip::chrono[__chrono_name].start(); \ __VA_ARGS__ \ - __chrono.stop(); + Atrip::chrono[__chrono_name].stop(); struct Timer { using Clock = std::chrono::high_resolution_clock; @@ -41,6 +41,6 @@ struct Timer { using Timings = std::map; // Chrono:1 ends here -// [[file:../../atrip.org::*Epilog][Epilog:1]] +// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]] } // Epilog:1 ends here diff --git a/src/atrip/Atrip.cxx b/src/atrip/Atrip.cxx index 80a35c7..eac7c61 100644 --- a/src/atrip/Atrip.cxx +++ b/src/atrip/Atrip.cxx @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Main][Main:1]] +// [[file:~/atrip/atrip.org::*Main][Main:1]] #include #include @@ -24,9 +24,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { const int rank = Atrip::rank; MPI_Comm universe = in.ei->wrld->comm; - // Timings in seconds ================================================{{{1 - Timings chrono{}; - const size_t No = in.ei->lens[0]; const size_t Nv = in.ea->lens[0]; LOG(0,"Atrip") << "No: " << No << "\n"; @@ -66,20 +63,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } - chrono["nv-slices"].start(); // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1 - LOG(0,"Atrip") << "BUILD NV-SLICES\n"; - TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - chrono["nv-slices"].stop(); + WITH_CHRONO("nv-slices", + LOG(0,"Atrip") << "BUILD NV-SLICES\n"; + TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ) - chrono["nv-nv-slices"].start(); // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1 - LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; - ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - chrono["nv-nv-slices"].stop(); + WITH_CHRONO("nv-nv-slices", + LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; + ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ) // all tensors std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; @@ -96,7 +93,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } LOG(0,"Atrip") << "BUILDING TUPLE LIST\n"; - WITH_CHRONO(chrono["tuples:build"], + WITH_CHRONO("tuples:build", auto const tuplesList = distribution->getTuples(Nv, universe); ) size_t nIterations = tuplesList.size(); @@ -119,45 +116,42 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { auto communicateDatabase = [ &unions , np - , &chrono ] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database { - chrono["db:comm:type:do"].start(); - auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); - chrono["db:comm:type:do"].stop(); + WITH_CHRONO("db:comm:type:do", + auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); + ) - chrono["db:comm:ldb"].start(); - Slice::LocalDatabase ldb; - - for (auto const& tensor: unions) { - auto const& tensorDb = tensor->buildLocalDatabase(abc); - ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); - } - chrono["db:comm:ldb"].stop(); + WITH_CHRONO("db:comm:ldb", + Slice::LocalDatabase ldb; + for (auto const& tensor: unions) { + auto const& tensorDb = tensor->buildLocalDatabase(abc); + ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); + } + ) Slice::Database db(np * ldb.size(), ldb[0]); - chrono["oneshot-db:comm:allgather"].start(); - chrono["db:comm:allgather"].start(); - MPI_Allgather( ldb.data() - , ldb.size() - , MPI_LDB_ELEMENT - , db.data() - , ldb.size() - , MPI_LDB_ELEMENT - , c); - chrono["db:comm:allgather"].stop(); - chrono["oneshot-db:comm:allgather"].stop(); + WITH_CHRONO("oneshot-db:comm:allgather", + WITH_CHRONO("db:comm:allgather", + MPI_Allgather( ldb.data() + , ldb.size() + , MPI_LDB_ELEMENT + , db.data() + , ldb.size() + , MPI_LDB_ELEMENT + , c); + )) - chrono["db:comm:type:free"].start(); - MPI_Type_free(&MPI_LDB_ELEMENT); - chrono["db:comm:type:free"].stop(); + WITH_CHRONO("db:comm:type:free", + MPI_Type_free(&MPI_LDB_ELEMENT); + ) return db; }; auto doIOPhase - = [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) { + = [&unions, &rank, &np, &universe] (Slice::Database const& db) { const size_t localDBLength = db.size() / np; @@ -193,9 +187,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { << "\n" ; - chrono["db:io:recv"].start(); - u.receive(el.info, recvTag); - chrono["db:io:recv"].stop(); + WITH_CHRONO("db:io:recv", + u.receive(el.info, recvTag); + ) } // recv } @@ -229,9 +223,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { << "\n" ; - chrono["db:io:send"].start(); - u.send(otherRank, el.info, sendTag); - chrono["db:io:send"].stop(); + WITH_CHRONO("db:io:send", + u.send(otherRank, el.info, sendTag); + ) } // send phase @@ -262,14 +256,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { ; i < tuplesList.size() ; i++, iteration++ ) { - chrono["iterations"].start(); + Atrip::chrono["iterations"].start(); // check overhead from chrono over all iterations - chrono["start:stop"].start(); chrono["start:stop"].stop(); + WITH_CHRONO("start:stop", {}) // check overhead of doing a barrier at the beginning - WITH_CHRONO(chrono["oneshot-mpi:barrier"], - WITH_CHRONO(chrono["mpi:barrier"], + WITH_CHRONO("oneshot-mpi:barrier", + WITH_CHRONO("mpi:barrier", if (in.barrier) MPI_Barrier(universe); )) @@ -277,15 +271,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { LOG(0,"Atrip") << "iteration " << iteration << " [" << 100 * iteration / nIterations << "%]" - << " (" << doublesFlops * iteration / chrono["doubles"].count() + << " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count() << "GF)" - << " (" << doublesFlops * iteration / chrono["iterations"].count() + << " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count() << "GF)" << " ===========================\n"; // PRINT TIMINGS if (in.chrono) - for (auto const& pair: chrono) + for (auto const& pair: Atrip::chrono) LOG(1, " ") << pair.first << " :: " << pair.second.count() << std::endl; @@ -302,13 +296,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { : &tuplesList[i + 1] ; - chrono["with_rank"].start(); - WITH_RANK << " :it " << iteration - << " :abc " << pretty_print(abc) - << " :abcN " - << (abcNext ? pretty_print(*abcNext) : "None") - << "\n"; - chrono["with_rank"].stop(); + WITH_CHRONO("with_rank", + WITH_RANK << " :it " << iteration + << " :abc " << pretty_print(abc) + << " :abcN " + << (abcNext ? pretty_print(*abcNext) : "None") + << "\n"; + ) // COMM FIRST DATABASE ================================================{{{1 @@ -321,19 +315,19 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { WITH_RANK << "__first__:first database io phase DONE\n"; WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; for (auto& u: unions) u->unwrapAll(abc); - WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n"; + WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n"; MPI_Barrier(universe); } // COMM NEXT DATABASE ================================================={{{1 if (abcNext) { WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; - chrono["db:comm"].start(); - const auto db = communicateDatabase(*abcNext, universe); - chrono["db:comm"].stop(); - chrono["db:io"].start(); - doIOPhase(db); - chrono["db:io"].stop(); + WITH_CHRONO("db:comm", + const auto db = communicateDatabase(*abcNext, universe); + ) + WITH_CHRONO("db:io", + doIOPhase(db); + ) WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n"; } @@ -341,63 +335,61 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { OCD_Barrier(universe); if (!isFakeTuple(i)) { WITH_RANK << iteration << "-th doubles\n"; - WITH_CHRONO(chrono["oneshot-unwrap"], - WITH_CHRONO(chrono["unwrap"], - WITH_CHRONO(chrono["unwrap:doubles"], + WITH_CHRONO("oneshot-unwrap", + WITH_CHRONO("unwrap", + WITH_CHRONO("unwrap:doubles", for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) { u->unwrapAll(abc); } ))) - chrono["oneshot-doubles"].start(); - chrono["doubles"].start(); - doublesContribution( abc, (size_t)No, (size_t)Nv - // -- VABCI - , abph.unwrapSlice(Slice::AB, abc) - , abph.unwrapSlice(Slice::AC, abc) - , abph.unwrapSlice(Slice::BC, abc) - , abph.unwrapSlice(Slice::BA, abc) - , abph.unwrapSlice(Slice::CA, abc) - , abph.unwrapSlice(Slice::CB, abc) - // -- VHHHA - , hhha.unwrapSlice(Slice::A, abc) - , hhha.unwrapSlice(Slice::B, abc) - , hhha.unwrapSlice(Slice::C, abc) - // -- TA - , taphh.unwrapSlice(Slice::A, abc) - , taphh.unwrapSlice(Slice::B, abc) - , taphh.unwrapSlice(Slice::C, abc) - // -- TABIJ - , tabhh.unwrapSlice(Slice::AB, abc) - , tabhh.unwrapSlice(Slice::AC, abc) - , tabhh.unwrapSlice(Slice::BC, abc) - // -- TIJK - , Tijk.data() - , chrono - ); - WITH_RANK << iteration << "-th doubles done\n"; - chrono["doubles"].stop(); - chrono["oneshot-doubles"].stop(); + WITH_CHRONO("oneshot-doubles", + WITH_CHRONO("doubles", + doublesContribution( abc, (size_t)No, (size_t)Nv + // -- VABCI + , abph.unwrapSlice(Slice::AB, abc) + , abph.unwrapSlice(Slice::AC, abc) + , abph.unwrapSlice(Slice::BC, abc) + , abph.unwrapSlice(Slice::BA, abc) + , abph.unwrapSlice(Slice::CA, abc) + , abph.unwrapSlice(Slice::CB, abc) + // -- VHHHA + , hhha.unwrapSlice(Slice::A, abc) + , hhha.unwrapSlice(Slice::B, abc) + , hhha.unwrapSlice(Slice::C, abc) + // -- TA + , taphh.unwrapSlice(Slice::A, abc) + , taphh.unwrapSlice(Slice::B, abc) + , taphh.unwrapSlice(Slice::C, abc) + // -- TABIJ + , tabhh.unwrapSlice(Slice::AB, abc) + , tabhh.unwrapSlice(Slice::AC, abc) + , tabhh.unwrapSlice(Slice::BC, abc) + // -- TIJK + , Tijk.data() + ); + WITH_RANK << iteration << "-th doubles done\n"; + )) } // COMPUTE SINGLES =================================================== {{{1 OCD_Barrier(universe); if (!isFakeTuple(i)) { - WITH_CHRONO(chrono["oneshot-unwrap"], - WITH_CHRONO(chrono["unwrap"], - WITH_CHRONO(chrono["unwrap:singles"], + WITH_CHRONO("oneshot-unwrap", + WITH_CHRONO("unwrap", + WITH_CHRONO("unwrap:singles", abhh.unwrapAll(abc); ))) - chrono["reorder"].start(); - for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; - chrono["reorder"].stop(); - chrono["singles"].start(); - singlesContribution( No, Nv, abc - , Tai.data() - , abhh.unwrapSlice(Slice::AB, abc) - , abhh.unwrapSlice(Slice::AC, abc) - , abhh.unwrapSlice(Slice::BC, abc) - , Zijk.data()); - chrono["singles"].stop(); + WITH_CHRONO("reorder", + for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; + ) + WITH_CHRONO("singles", + singlesContribution( No, Nv, abc + , Tai.data() + , abhh.unwrapSlice(Slice::AB, abc) + , abhh.unwrapSlice(Slice::AC, abc) + , abhh.unwrapSlice(Slice::BC, abc) + , Zijk.data()); + ) } @@ -410,12 +402,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { if (abc[1] == abc[2]) distinct--; const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); - chrono["energy"].start(); - if ( distinct == 0) - tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); - else - tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); - chrono["energy"].stop(); + WITH_CHRONO("energy", + if ( distinct == 0) + tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); + else + tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); + ) #if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES) tupleEnergies[abc] = tupleEnergy; @@ -446,7 +438,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // CLEANUP UNIONS ===================================================={{{1 OCD_Barrier(universe); if (abcNext) { - chrono["gc"].start(); WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n"; for (auto& u: unions) { @@ -480,12 +471,11 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } - chrono["gc"].stop(); } WITH_RANK << iteration << "-th cleaning up....... DONE\n"; - chrono["iterations"].stop(); + Atrip::chrono["iterations"].stop(); // ITERATION END ====================================================={{{1 } @@ -523,15 +513,15 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // PRINT TIMINGS {{{1 if (in.chrono) - for (auto const& pair: chrono) + for (auto const& pair: Atrip::chrono) LOG(0,"atrip:chrono") << pair.first << " " << pair.second.count() << std::endl; LOG(0, "atrip:flops(doubles)") - << nIterations * doublesFlops / chrono["doubles"].count() << "\n"; + << nIterations * doublesFlops / Atrip::chrono["doubles"].count() << "\n"; LOG(0, "atrip:flops(iterations)") - << nIterations * doublesFlops / chrono["iterations"].count() << "\n"; + << nIterations * doublesFlops / Atrip::chrono["iterations"].count() << "\n"; // TODO: change the sign in the getEnergy routines return { - globalEnergy };