From 0063518e5841edd8b1819d56058bb8fd98317d45 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Fri, 3 Sep 2021 15:54:06 +0200 Subject: [PATCH] Update the readme --- README.org | 665 ++++------------------------------------------------- 1 file changed, 45 insertions(+), 620 deletions(-) diff --git a/README.org b/README.org index 54f53f9..551eeeb 100644 --- a/README.org +++ b/README.org @@ -594,6 +594,7 @@ namespace atrip { ** The slice union #+begin_src c++ :tangle (atrip-slice-union-h) #pragma once +#include #include namespace atrip { @@ -763,7 +764,7 @@ namespace atrip { blank.info.from = from; // Handle self sufficiency - blank.info.state = cc4s::Cc4s::world->rank == from.rank + blank.info.state = Atrip::rank == from.rank ? Slice::SelfSufficient : Slice::Fetch ; @@ -983,7 +984,7 @@ namespace atrip { void init(Tensor const& sourceTensor) { CTF::World w(world); - const int rank = cc4s::Cc4s::world->rank + const int rank = Atrip::rank , order = sliceLength.size() ; std::vector const syms(order, NS); @@ -1040,7 +1041,7 @@ namespace atrip { void receive(Slice::Info const& info, size_t tag) noexcept { auto& slice = Slice::findByInfo(slices, info); - if (cc4s::Cc4s::world->rank == info.from.rank) return; + if (Atrip::rank == info.from.rank) return; if (slice.info.state == Slice::Fetch) { // TODO: do it through the slice class @@ -1143,6 +1144,7 @@ namespace atrip { #include #include +#include namespace atrip { @@ -1278,7 +1280,7 @@ namespace atrip { void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override { - const int rank = cc4s::Cc4s::world->rank + const int rank = Atrip::rank , Nv = sliceLength[0] , No = sliceLength[1] , a = rankMap.find({rank, it}); @@ -1317,7 +1319,7 @@ namespace atrip { void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override { - const int rank = cc4s::Cc4s::world->rank + const int rank = Atrip::rank , No = sliceLength[0] , a = rankMap.find({rank, it}) ; @@ -1355,7 +1357,7 @@ namespace atrip { const int Nv = sliceLength[0] , No = sliceLength[1] - , rank = cc4s::Cc4s::world->rank + , rank = Atrip::rank , el = rankMap.find({rank, it}) , a = el % Nv , b = el / Nv @@ -1394,7 +1396,7 @@ namespace atrip { const int Nv = from.lens[0] , No = sliceLength[1] - , rank = cc4s::Cc4s::world->rank + , rank = Atrip::rank , el = rankMap.find({rank, it}) , a = el % Nv , b = el / Nv @@ -1435,7 +1437,7 @@ namespace atrip { const int Nv = from.lens[0] , No = sliceLength[1] - , rank = cc4s::Cc4s::world->rank + , rank = Atrip::rank , el = rankMap.find({rank, it}) , a = el % Nv , b = el / Nv @@ -1561,7 +1563,9 @@ namespace atrip { } void singlesContribution - ( const ABCTuple &abc + ( size_t No + , size_t Nv + , const ABCTuple &abc , double const* Tph , double const* VABij , double const* VACij @@ -1835,11 +1839,7 @@ namespace atrip { static int rank; static int np; - - static void init() { - MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank); - MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np); - } + static void init(); struct Input { CTF::Tensor *ei = nullptr @@ -1850,6 +1850,8 @@ namespace atrip { , *Vhhhp = nullptr , *Vppph = nullptr ; + int maxIterations = 0, iterationMod = -1; + bool barrier = true; Input& with_epsilon_i(CTF::Tensor * t) { ei = t; return *this; } Input& with_epsilon_a(CTF::Tensor * t) { ea = t; return *this; } Input& with_Tai(CTF::Tensor * t) { Tph = t; return *this; } @@ -1864,20 +1866,30 @@ namespace atrip { }; static Output run(Input const& in); }; - int Atrip::rank; - int Atrip::np; } #+end_src #+begin_src c++ :tangle (atrip-atrip-cxx) +#include + #include #include +#include +#include +#include using namespace atrip; +int Atrip::rank; +int Atrip::np; -Output Atrip::run(Input const& in){ +void Atrip::init() { + MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank); + MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np); +} + +Atrip::Output Atrip::run(Atrip::Input const& in){ const int np = Atrip::np; const int rank = Atrip::rank; @@ -1886,8 +1898,8 @@ Output Atrip::run(Input const& in){ // Timings in seconds ================================================{{{1 Timings chrono{}; - No = in.ei->lens[0]; - Nv = in.ea->lens[0]; + const size_t No = in.ei->lens[0]; + const size_t Nv = in.ea->lens[0]; LOG(0,"NEW_TRIPLES") << "No: " << No << "\n"; LOG(0,"NEW_TRIPLES") << "Nv: " << Nv << "\n"; @@ -1954,7 +1966,7 @@ Output Atrip::run(Input const& in){ size_t nIterations = abcIndex.second - abcIndex.first; #ifdef TRIPLES_BENCHMARK - { const size_t maxIterations = getIntegerArgument("maxIterations", 0); + { const size_t maxIterations = in.maxIterations; if (maxIterations != 0) { abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1); nIterations = maxIterations % (nIterations + 1); @@ -2132,12 +2144,12 @@ Output Atrip::run(Input const& in){ chrono["oneshot-mpi:barrier"].start(); chrono["mpi:barrier"].start(); // TODO: REMOVE - if (getIntegerArgument("barrier", 1) == 1) + if (in.barrier == 1) MPI_Barrier(universe); chrono["mpi:barrier"].stop(); chrono["oneshot-mpi:barrier"].stop(); - if (iteration % getIntegerArgument("iterationMod", 100) == 0) { + if (iteration % in.iterationMod == 0) { LOG(0,"NEW_TRIPLES") << "iteration " << iteration << " [" << 100 * iteration / nIterations << "%]" @@ -2214,8 +2226,6 @@ Output Atrip::run(Input const& in){ ))) chrono["oneshot-doubles"].start(); chrono["doubles"].start(); - // TODO: REMOVE - for (size_t __i=0; __i < getIntegerArgument("doublesLoops", 1); __i++) doublesContribution( abc, (size_t)No, (size_t)Nv // -- VABCI , abph.unwrapSlice(Slice::AB, abc) @@ -2257,7 +2267,7 @@ Output Atrip::run(Input const& in){ for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; chrono["reorder"].stop(); chrono["singles"].start(); - singlesContribution( abc + singlesContribution( No, Nv, abc , Tai.data() , abhh.unwrapSlice(Slice::AB, abc) , abhh.unwrapSlice(Slice::AC, abc) @@ -2423,17 +2433,17 @@ Output Atrip::run(Input const& in){ #+end_src -** Include header - -#+begin_src c++ :tangle (atrip-main-h) +** Debug +#+begin_src c++ :tangle (atrip-debug-h) #pragma once - #define TRIPLES_BENCHMARK #define TRIPLES_DEBUG 1 //#define TRIPLES_WORKLOAD_DUMP #define TRIPLES_USE_DGEMM //#define TRIPLES_PRINT_TUPLES +#define LOG(name, level) if (Atrip::rank == 0) std::cout << name << ": " + #if TRIPLES_DEBUG == 4 # pragma message("WARNING: You have OCD debugging ABC triples "\ "expect GB of output and consult your therapist") @@ -2482,598 +2492,13 @@ Output Atrip::run(Input const& in){ #else # error("TRIPLES_DEBUG is not defined!") #endif +#+end_src + +** Include header + +#+begin_src c++ :tangle (atrip-main-h) +#pragma once -#include -#include #include - #+end_src - - -** Todo :noexport: - #+begin_src c++ :tangle todo.hpp -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace cc4s; - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#+end_src - -#+begin_src c++ :tangle todo.hpp - - - - -// MAIN ALGORITHM ======================================================{{{1 -void cc4s::PerturbativeTriplesAbcijk::run(){ - - const int np = cc4s::Cc4s::world->np; - const int rank = cc4s::Cc4s::world->rank; - MPI_Comm universe = cc4s::Cc4s::world->comm; - - // Timings in seconds ================================================{{{1 - atrip::Timings chrono{}; - - // Get the distributed ctf tensor data - CTF::Tensor<> *ei(getTensorArgument("HoleEigenEnergies")) - , *ea(getTensorArgument("ParticleEigenEnergies")) - , *Tph(getTensorArgument("CcsdSinglesAmplitudes")) - , *Tpphh(getTensorArgument("CcsdDoublesAmplitudes")) - , *Vpphh(getTensorArgument("PPHHCoulombIntegrals")) - , *Vhhhp(getTensorArgument("HHHPCoulombIntegrals")) - , *Vppph(getTensorArgument("PPPHCoulombIntegrals")) - ; - - No = ei->lens[0]; - Nv = ea->lens[0]; - LOG(0,"NEW_TRIPLES") << "No: " << No << "\n"; - LOG(0,"NEW_TRIPLES") << "Nv: " << Nv << "\n"; - - // allocate the three scratches, see piecuch - std::vector Tijk(No*No*No) // doubles only (see piecuch) - , Zijk(No*No*No) // singles + doubles (see piecuch) - // we need local copies of the following tensors on every - // rank - , epsi(No) - , epsa(Nv) - , Tai(No * Nv) - ; - - ei->read_all(epsi.data()); - ea->read_all(epsa.data()); - Tph->read_all(Tai.data()); - - // COMMUNICATOR CONSTRUCTION ========================================={{{1 - // - // Construct a new communicator living only on a single rank - int child_size = 1 - , child_rank - ; - const - int color = rank / child_size - , crank = rank % child_size - ; - MPI_Comm child_comm; - if (np == 1) { - child_comm = universe; - } else { - MPI_Comm_split(cc4s::Cc4s::world->comm, color, crank, &child_comm); - MPI_Comm_rank(child_comm, &child_rank); - MPI_Comm_size(child_comm, &child_size); - //CTF::World child_world(child_comm); - } - - - chrono["nv-slices"].start(); - // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1 - LOG(0,"NEW_TRIPLES") << "BUILD NV-SLICES\n"; - TAPHH taphh(*Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - HHHA hhha(*Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - chrono["nv-slices"].stop(); - - chrono["nv-nv-slices"].start(); - // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1 - LOG(0,"NEW_TRIPLES") << "BUILD NV x NV-SLICES\n"; - ABPH abph(*Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - ABHH abhh(*Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - TABHH tabhh(*Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - chrono["nv-nv-slices"].stop(); - - // all tensors - std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; - - //CONSTRUCT TUPLE LIST ==============================================={{{1 - LOG(0,"NEW_TRIPLES") << "BUILD TUPLE LIST\n"; - const auto tuplesList = std::move(getTuplesList(Nv)); - WITH_RANK << "tupList.size() = " << tuplesList.size() << "\n"; - - // GET ABC INDEX RANGE FOR RANK ======================================{{{1 - auto abcIndex = getABCRange(np, rank, tuplesList); - size_t nIterations = abcIndex.second - abcIndex.first; - -#ifdef TRIPLES_BENCHMARK - { const size_t maxIterations = getIntegerArgument("maxIterations", 0); - if (maxIterations != 0) { - abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1); - nIterations = maxIterations % (nIterations + 1); - } - } -#endif - - WITH_RANK << "abcIndex = " << pretty_print(abcIndex) << "\n"; - LOG(0,"NEW_TRIPLES") << "#iterations: " - << nIterations << "\n"; - - // first abc - const ABCTuple firstAbc = tuplesList[abcIndex.first]; - - - double energy(0.); - - - auto const isFakeTuple - = [&tuplesList](size_t const i) { return i >= tuplesList.size(); }; - - - auto communicateDatabase - = [ &unions - , np - , &chrono - ] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database { - - chrono["db:comm:type:do"].start(); - auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); - chrono["db:comm:type:do"].stop(); - - chrono["db:comm:ldb"].start(); - Slice::LocalDatabase ldb; - - for (auto const& tensor: unions) { - auto const& tensorDb = tensor->buildLocalDatabase(abc); - ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end()); - } - chrono["db:comm:ldb"].stop(); - - Slice::Database db(np * ldb.size(), ldb[0]); - - chrono["oneshot-db:comm:allgather"].start(); - chrono["db:comm:allgather"].start(); - MPI_Allgather( ldb.data() - , ldb.size() - , MPI_LDB_ELEMENT - , db.data() - , ldb.size() - , MPI_LDB_ELEMENT - , c); - chrono["db:comm:allgather"].stop(); - chrono["oneshot-db:comm:allgather"].stop(); - - chrono["db:comm:type:free"].start(); - MPI_Type_free(&MPI_LDB_ELEMENT); - chrono["db:comm:type:free"].stop(); - - return db; - }; - - auto doIOPhase - = [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) { - - const size_t localDBLength = db.size() / np; - - size_t sendTag = 0 - , recvTag = rank * localDBLength - ; - - // RECIEVE PHASE ====================================================== - { - // At this point, we have already send to everyone that fits - auto const& begin = &db[rank * localDBLength] - , end = begin + localDBLength - ; - for (auto it = begin; it != end; ++it) { - recvTag++; - auto const& el = *it; - auto& u = unionByName(unions, el.name); - - WITH_DBG std::cout - << rank << ":r" - << "♯" << recvTag << " =>" - << " «n" << el.name - << ", t" << el.info.type - << ", s" << el.info.state - << "»" - << " ⊙ {" << rank << "⇐" << el.info.from.rank - << ", " - << el.info.from.source << "}" - << " ∴ {" << el.info.tuple[0] - << ", " - << el.info.tuple[1] - << "}" - << "\n" - ; - - chrono["db:io:recv"].start(); - u.receive(el.info, recvTag); - chrono["db:io:recv"].stop(); - - } // recv - } - - // SEND PHASE ========================================================= - for (size_t otherRank = 0; otherRank" - << " «n" << el.name - << ", t" << el.info.type - << ", s" << el.info.state - << "»" - << " ⊙ {" << el.info.from.rank << "⇒" << otherRank - << ", " - << el.info.from.source << "}" - << " ∴ {" << el.info.tuple[0] - << ", " - << el.info.tuple[1] - << "}" - << "\n" - ; - - chrono["db:io:send"].start(); - u.send(otherRank, el.info, sendTag); - chrono["db:io:send"].stop(); - - } // send phase - - } // otherRank - - - }; - -#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES) - std::map tupleEnergies; -#endif - - const double doublesFlops - = double(No) - ,* double(No) - ,* double(No) - ,* (double(No) + double(Nv)) - ,* 2 - ,* 6 - / 1e9 - ; - - // START MAIN LOOP ======================================================{{{1 - - Slice::Database db; - - for ( size_t i = abcIndex.first, iteration = 1 - ; i < abcIndex.second - ; i++, iteration++ - ) { - chrono["iterations"].start(); - - // check overhead from chrono over all iterations - chrono["start:stop"].start(); chrono["start:stop"].stop(); - - // check overhead of doing a barrier at the beginning - chrono["oneshot-mpi:barrier"].start(); - chrono["mpi:barrier"].start(); - // TODO: REMOVE - if (getIntegerArgument("barrier", 1) == 1) - MPI_Barrier(universe); - chrono["mpi:barrier"].stop(); - chrono["oneshot-mpi:barrier"].stop(); - - if (iteration % getIntegerArgument("iterationMod", 100) == 0) { - LOG(0,"NEW_TRIPLES") - << "iteration " << iteration - << " [" << 100 * iteration / nIterations << "%]" - << " (" << doublesFlops * iteration / chrono["doubles"].count() - << "GF)" - << " (" << doublesFlops * iteration / chrono["iterations"].count() - << "GF)" - << " ===========================\n"; - - // PRINT TIMINGS - for (auto const& pair: chrono) - LOG(1, " ") << pair.first << " :: " - << pair.second.count() - << std::endl; - - } - - const ABCTuple abc = isFakeTuple(i) - ? tuplesList[tuplesList.size() - 1] - : tuplesList[i] - , *abcNext = i == (abcIndex.second - 1) - ? nullptr - : isFakeTuple(i + 1) - ? &tuplesList[tuplesList.size() - 1] - : &tuplesList[i + 1] - ; - - chrono["with_rank"].start(); - WITH_RANK << " :it " << iteration - << " :abc " << pretty_print(abc) - << " :abcN " - << (abcNext ? pretty_print(*abcNext) : "None") - << "\n"; - chrono["with_rank"].stop(); - - - // COMM FIRST DATABASE ================================================{{{1 - if (i == abcIndex.first) { - WITH_RANK << "__first__:first database ............ \n"; - const auto __db = communicateDatabase(abc, universe); - WITH_RANK << "__first__:first database communicated \n"; - WITH_RANK << "__first__:first database io phase \n"; - doIOPhase(__db); - WITH_RANK << "__first__:first database io phase DONE\n"; - WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; - for (auto& u: unions) u->unwrapAll(abc); - WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n"; - MPI_Barrier(universe); - } - - // COMM NEXT DATABASE ================================================={{{1 - if (abcNext) { - WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; - chrono["db:comm"].start(); - //const auto db = communicateDatabase(*abcNext, universe); - db = communicateDatabase(*abcNext, universe); - chrono["db:comm"].stop(); - chrono["db:io"].start(); - doIOPhase(db); - chrono["db:io"].stop(); - WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n"; - } - - // COMPUTE DOUBLES ===================================================={{{1 - OCD_Barrier(universe); - if (!isFakeTuple(i)) { - WITH_RANK << iteration << "-th doubles\n"; - WITH_CHRONO(chrono["oneshot-unwrap"], - WITH_CHRONO(chrono["unwrap"], - WITH_CHRONO(chrono["unwrap:doubles"], - for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) { - u->unwrapAll(abc); - } - ))) - chrono["oneshot-doubles"].start(); - chrono["doubles"].start(); - // TODO: REMOVE - for (size_t __i=0; __i < getIntegerArgument("doublesLoops", 1); __i++) - doublesContribution( abc, (size_t)No, (size_t)Nv - // -- VABCI - , abph.unwrapSlice(Slice::AB, abc) - , abph.unwrapSlice(Slice::AC, abc) - , abph.unwrapSlice(Slice::BC, abc) - , abph.unwrapSlice(Slice::BA, abc) - , abph.unwrapSlice(Slice::CA, abc) - , abph.unwrapSlice(Slice::CB, abc) - // -- VHHHA - , hhha.unwrapSlice(Slice::A, abc) - , hhha.unwrapSlice(Slice::B, abc) - , hhha.unwrapSlice(Slice::C, abc) - // -- TA - , taphh.unwrapSlice(Slice::A, abc) - , taphh.unwrapSlice(Slice::B, abc) - , taphh.unwrapSlice(Slice::C, abc) - // -- TABIJ - , tabhh.unwrapSlice(Slice::AB, abc) - , tabhh.unwrapSlice(Slice::AC, abc) - , tabhh.unwrapSlice(Slice::BC, abc) - // -- TIJK - , Tijk.data() - , chrono - ); - WITH_RANK << iteration << "-th doubles done\n"; - chrono["doubles"].stop(); - chrono["oneshot-doubles"].stop(); - } - - // COMPUTE SINGLES =================================================== {{{1 - OCD_Barrier(universe); - if (!isFakeTuple(i)) { - WITH_CHRONO(chrono["oneshot-unwrap"], - WITH_CHRONO(chrono["unwrap"], - WITH_CHRONO(chrono["unwrap:singles"], - abhh.unwrapAll(abc); - ))) - chrono["reorder"].start(); - for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; - chrono["reorder"].stop(); - chrono["singles"].start(); - singlesContribution( abc - , Tai.data() - , abhh.unwrapSlice(Slice::AB, abc) - , abhh.unwrapSlice(Slice::AC, abc) - , abhh.unwrapSlice(Slice::BC, abc) - , Zijk.data()); - chrono["singles"].stop(); - } - - - // COMPUTE ENERGY ==================================================== {{{1 - if (!isFakeTuple(i)) { - double tupleEnergy(0.); - - int distinct(0); - if (abc[0] == abc[1]) distinct++; - if (abc[1] == abc[2]) distinct--; - const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); - - chrono["energy"].start(); - if ( distinct == 0) - tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); - else - tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); - chrono["energy"].stop(); - -#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES) - tupleEnergies[abc] = tupleEnergy; -#endif - - energy += tupleEnergy; - -#ifdef HAVE_OCD - auto const print_slices - = [](ABCTuple const& abc, ABCTuple const& want, SliceUnion& u) { - if (abc != want) return; - - for (auto type: u.sliceTypes) { - auto const& ptr = u.unwrapSlice(type, abc); - auto const& slice = Slice::findByTypeAbc(u.slices, type, abc); - WITH_RANK << "__print_slice__:n" << u.name << " " - << pretty_print(abc) << " " - << pretty_print(slice.info) - ; - for (size_t i = 0; i < 20; i++) std::cout << ptr[i] << ", "; - std::cout << std::endl; - } - }; -#endif - - if (isFakeTuple(i)) { - // fake iterations should also unwrap whatever they got - WITH_RANK << iteration - << "th unwrapping because of fake in " - << i << "\n"; - for (auto& u: unions) u->unwrapAll(abc); - } - -#ifdef HAVE_OCD - for (auto const& u: unions) { - WITH_RANK << "__dups__:" - << iteration - << "-th n" << u->name << " checking duplicates\n"; - u->checkForDuplicates(); - } -#endif - - - // CLEANUP UNIONS ===================================================={{{1 - OCD_Barrier(universe); - if (abcNext) { - chrono["gc"].start(); - WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n"; - for (auto& u: unions) { - - u->unwrapAll(abc); - WITH_RANK << "__gc__:n" << u->name << " :it " << iteration - << " :abc " << pretty_print(abc) - << " :abcN " << pretty_print(*abcNext) - << "\n"; - for (auto const& slice: u->slices) - WITH_RANK << "__gc__:guts:" << slice.info << "\n"; - u->clearUnusedSlicesForNext(*abcNext); - - WITH_RANK << "__gc__: checking validity\n"; - -#ifdef HAVE_OCD - // check for validity of the slices - for (auto type: u->sliceTypes) { - auto tuple = Slice::subtupleBySlice(abc, type); - for (auto& slice: u->slices) { - if ( slice.info.type == type - && slice.info.tuple == tuple - && slice.isDirectlyFetchable() - ) { - if (slice.info.state == Slice::Dispatched) - throw std::domain_error( "This slice should not be undispatched! " - + pretty_print(slice.info)); - } - } - } -#endif - - - } - chrono["gc"].stop(); - } - - WITH_RANK << iteration << "-th cleaning up....... DONE\n"; - } - - // CLEAN CHRONO ======================================================{{{1 - chrono["iterations"].stop(); - { // TODO: REMOVEME - chrono["oneshot-doubles"].clear(); - chrono["oneshot-mpi:barrier"].clear(); - chrono["oneshot-db:comm:allgather"].clear(); - chrono["oneshot-unwrap"].clear(); - } - - // ITERATION END ====================================================={{{1 - } // END OF MAIN LOOP - - MPI_Barrier(universe); - - // PRINT TUPLES ========================================================={{{1 -#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES) - LOG(0,"NEW_TRIPLES") << "tuple energies" << "\n"; - for (size_t i = 0; i < np; i++) { - MPI_Barrier(universe); - for (auto const& pair: tupleEnergies) { - if (i == rank) - std::cout << pair.first[0] - << " " << pair.first[1] - << " " << pair.first[2] - << std::setprecision(15) << std::setw(23) - << " tupleEnergy: " << pair.second - << "\n" - ; - } - } -#endif - - // COMMUNICATE THE ENERGIES ============================================={{{1 - LOG(0,"NEW_TRIPLES") << "COMMUNICATING ENERGIES \n"; - double globalEnergy = 0; - MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe); - - WITH_RANK << "local energy " << energy << "\n"; - LOG(0,"LOOP FINISHED, energy") - << std::setprecision(15) << std::setw(23) - << globalEnergy << std::endl; - - // PRINT TIMINGS {{{1 - for (auto const& pair: chrono) - LOG(0,"atrip:chrono") << pair.first << " " - << pair.second.count() << std::endl; - - - LOG(0, "atrip:flops") - << nIterations * doublesFlops / chrono["doubles"].count() << "\n"; - -} - #+end_src