Update the readme

2021-09-03 15:54:06 +02:00
parent 087bb57c85
commit 0063518e58
1 changed files with 45 additions and 620 deletions
--- a/README.org
+++ b/README.org
@@ -594,6 +594,7 @@ namespace atrip {
 ** The slice union
 #+begin_src c++ :tangle (atrip-slice-union-h)
 #pragma once
+#include <atrip/Debug.hpp>
 #include <atrip/Slice.hpp>

 namespace atrip {
@@ -763,7 +764,7 @@ namespace atrip {
          blank.info.from = from;

          // Handle self sufficiency
-          blank.info.state = cc4s::Cc4s::world->rank == from.rank
+          blank.info.state = Atrip::rank == from.rank
                           ? Slice::SelfSufficient
                           : Slice::Fetch
                           ;
@@ -983,7 +984,7 @@ namespace atrip {
    void init(Tensor const& sourceTensor) {

      CTF::World w(world);
-      const int rank = cc4s::Cc4s::world->rank
+      const int rank = Atrip::rank
              , order = sliceLength.size()
              ;
      std::vector<int> const syms(order, NS);
@@ -1040,7 +1041,7 @@ namespace atrip {
    void receive(Slice::Info const& info, size_t tag) noexcept {
      auto& slice = Slice::findByInfo(slices, info);

-      if (cc4s::Cc4s::world->rank == info.from.rank) return;
+      if (Atrip::rank == info.from.rank) return;

      if (slice.info.state == Slice::Fetch) {
        // TODO: do it through the slice class
@@ -1143,6 +1144,7 @@ namespace atrip {
 #include <array>

 #include <atrip/Utils.hpp>
+#include <atrip/Debug.hpp>

 namespace atrip {

@@ -1278,7 +1280,7 @@ namespace atrip {

    void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
    {
-      const int rank = cc4s::Cc4s::world->rank
+      const int rank = Atrip::rank
              , Nv = sliceLength[0]
              , No = sliceLength[1]
              , a = rankMap.find({rank, it});
@@ -1317,7 +1319,7 @@ namespace atrip {
    void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
    {

-      const int rank = cc4s::Cc4s::world->rank
+      const int rank = Atrip::rank
              , No = sliceLength[0]
              , a = rankMap.find({rank, it})
              ;
@@ -1355,7 +1357,7 @@ namespace atrip {

      const int Nv = sliceLength[0]
              , No = sliceLength[1]
-              , rank = cc4s::Cc4s::world->rank
+              , rank = Atrip::rank
              , el = rankMap.find({rank, it})
              , a = el % Nv
              , b = el / Nv
@@ -1394,7 +1396,7 @@ namespace atrip {

      const int Nv = from.lens[0]
              , No = sliceLength[1]
-              , rank = cc4s::Cc4s::world->rank
+              , rank = Atrip::rank
              , el = rankMap.find({rank, it})
              , a = el % Nv
              , b = el / Nv
@@ -1435,7 +1437,7 @@ namespace atrip {

      const int Nv = from.lens[0]
              , No = sliceLength[1]
-              , rank = cc4s::Cc4s::world->rank
+              , rank = Atrip::rank
              , el = rankMap.find({rank, it})
              , a = el % Nv
              , b = el / Nv
@@ -1561,7 +1563,9 @@ namespace atrip {
  }

  void singlesContribution
-    ( const ABCTuple &abc
+    ( size_t No
+    , size_t Nv
+    , const ABCTuple &abc
    , double const* Tph
    , double const* VABij
    , double const* VACij
@@ -1835,11 +1839,7 @@ namespace atrip {

    static int rank;
    static int np;
-
-    static void init() {
-      MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
-      MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
-    }
+    static void init();

    struct Input {
      CTF::Tensor<double> *ei = nullptr
@@ -1850,6 +1850,8 @@ namespace atrip {
                        , *Vhhhp = nullptr
                        , *Vppph = nullptr
                        ;
+      int maxIterations = 0, iterationMod = -1;
+      bool barrier = true;
      Input& with_epsilon_i(CTF::Tensor<double> * t) { ei = t; return *this; }
      Input& with_epsilon_a(CTF::Tensor<double> * t) { ea = t; return *this; }
      Input& with_Tai(CTF::Tensor<double> * t) { Tph = t; return *this; }
@@ -1864,20 +1866,30 @@ namespace atrip {
    };
    static Output run(Input const& in);
  };
-  int Atrip::rank;
-  int Atrip::np;

 }
 #+end_src

 #+begin_src c++ :tangle (atrip-atrip-cxx)
+#include <iomanip>
+
 #include <atrip/Atrip.hpp>
 #include <atrip/Utils.hpp>
+#include <atrip/Equations.hpp>
+#include <atrip/SliceUnion.hpp>
+#include <atrip/Unions.hpp>

 using namespace atrip;

+int Atrip::rank;
+int Atrip::np;

-Output Atrip::run(Input const& in){
+void Atrip::init()  {
+  MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
+}
+
+Atrip::Output Atrip::run(Atrip::Input const& in){

  const int np = Atrip::np;
  const int rank = Atrip::rank;
@@ -1886,8 +1898,8 @@ Output Atrip::run(Input const& in){
  // Timings in seconds ================================================{{{1
  Timings chrono{};

-  No = in.ei->lens[0];
-  Nv = in.ea->lens[0];
+  const size_t No = in.ei->lens[0];
+  const size_t Nv = in.ea->lens[0];
  LOG(0,"NEW_TRIPLES") << "No: " << No << "\n";
  LOG(0,"NEW_TRIPLES") << "Nv: " << Nv << "\n";

@@ -1954,7 +1966,7 @@ Output Atrip::run(Input const& in){
  size_t nIterations = abcIndex.second - abcIndex.first;

 #ifdef TRIPLES_BENCHMARK
-  { const size_t maxIterations = getIntegerArgument("maxIterations", 0);
+  { const size_t maxIterations = in.maxIterations;
    if (maxIterations != 0) {
      abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1);
      nIterations = maxIterations % (nIterations + 1);
@@ -2132,12 +2144,12 @@ Output Atrip::run(Input const& in){
    chrono["oneshot-mpi:barrier"].start();
    chrono["mpi:barrier"].start();
    // TODO: REMOVE
-    if (getIntegerArgument("barrier", 1) == 1)
+    if (in.barrier == 1)
    MPI_Barrier(universe);
    chrono["mpi:barrier"].stop();
    chrono["oneshot-mpi:barrier"].stop();

-    if (iteration % getIntegerArgument("iterationMod", 100) == 0) {
+    if (iteration % in.iterationMod == 0) {
      LOG(0,"NEW_TRIPLES")
        << "iteration " << iteration
        << " [" << 100 * iteration / nIterations << "%]"
@@ -2214,8 +2226,6 @@ Output Atrip::run(Input const& in){
      )))
      chrono["oneshot-doubles"].start();
      chrono["doubles"].start();
-      // TODO: REMOVE
-      for (size_t __i=0; __i < getIntegerArgument("doublesLoops", 1); __i++)
      doublesContribution( abc, (size_t)No, (size_t)Nv
                         // -- VABCI
                         , abph.unwrapSlice(Slice::AB, abc)
@@ -2257,7 +2267,7 @@ Output Atrip::run(Input const& in){
      for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
      chrono["reorder"].stop();
      chrono["singles"].start();
-      singlesContribution( abc
+      singlesContribution( No, Nv, abc
                         , Tai.data()
                         , abhh.unwrapSlice(Slice::AB, abc)
                         , abhh.unwrapSlice(Slice::AC, abc)
@@ -2423,17 +2433,17 @@ Output Atrip::run(Input const& in){
 #+end_src


-** Include header
-
-#+begin_src c++ :tangle (atrip-main-h)
+** Debug
+#+begin_src c++ :tangle (atrip-debug-h)
 #pragma once
-
 #define TRIPLES_BENCHMARK
 #define TRIPLES_DEBUG 1
 //#define TRIPLES_WORKLOAD_DUMP
 #define TRIPLES_USE_DGEMM
 //#define TRIPLES_PRINT_TUPLES

+#define LOG(name, level) if (Atrip::rank == 0) std::cout << name << ": "
+
 #if TRIPLES_DEBUG == 4
 #  pragma message("WARNING: You have OCD debugging ABC triples "\
                  "expect GB of output and consult your therapist")
@@ -2482,598 +2492,13 @@ Output Atrip::run(Input const& in){
 #else
 #  error("TRIPLES_DEBUG is not defined!")
 #endif
+#+end_src
+
+** Include header
+
+#+begin_src c++ :tangle (atrip-main-h)
+#pragma once

-#include <atrip/Utils.hpp>
-#include <atrip/Slice.hpp>
 #include <atrip/Atrip.hpp>

-
 #+end_src
-
-
-** Todo                                                            :noexport:
- #+begin_src c++  :tangle todo.hpp
-#include <algorithms/PerturbativeTriplesAbcijk.hpp>
-#include <math/MathFunctions.hpp>
-#include <tcc/DryTensor.hpp>
-#include <util/Log.hpp>
-#include <util/Exception.hpp>
-#include <Cc4s.hpp>
-#include <ctf.hpp>
-#include <util/MpiCommunicator.hpp>
-
-using namespace cc4s;
-
-#include <sstream>
-#include <algorithm>
-#include <numeric>
-#include <array>
-#include <vector>
-#include <map>
-#include <memory>
-#include <set>
-#include <chrono>
-
-
-#+end_src
-
-#+begin_src c++ :tangle todo.hpp
-
-
-
-
-// MAIN ALGORITHM ======================================================{{{1
-void cc4s::PerturbativeTriplesAbcijk::run(){
-
-  const int np = cc4s::Cc4s::world->np;
-  const int rank = cc4s::Cc4s::world->rank;
-  MPI_Comm universe = cc4s::Cc4s::world->comm;
-
-  // Timings in seconds ================================================{{{1
-  atrip::Timings chrono{};
-
-  // Get the distributed ctf tensor data
-  CTF::Tensor<> *ei(getTensorArgument("HoleEigenEnergies"))
-              , *ea(getTensorArgument("ParticleEigenEnergies"))
-              , *Tph(getTensorArgument("CcsdSinglesAmplitudes"))
-              , *Tpphh(getTensorArgument("CcsdDoublesAmplitudes"))
-              , *Vpphh(getTensorArgument("PPHHCoulombIntegrals"))
-              , *Vhhhp(getTensorArgument("HHHPCoulombIntegrals"))
-              , *Vppph(getTensorArgument("PPPHCoulombIntegrals"))
-              ;
-
-  No = ei->lens[0];
-  Nv = ea->lens[0];
-  LOG(0,"NEW_TRIPLES") << "No: " << No << "\n";
-  LOG(0,"NEW_TRIPLES") << "Nv: " << Nv << "\n";
-
-  // allocate the three scratches, see piecuch
-  std::vector<double> Tijk(No*No*No) // doubles only (see piecuch)
-                    , Zijk(No*No*No) // singles + doubles (see piecuch)
-                    // we need local copies of the following tensors on every
-                    // rank
-                    , epsi(No)
-                    , epsa(Nv)
-                    , Tai(No * Nv)
-                    ;
-
-  ei->read_all(epsi.data());
-  ea->read_all(epsa.data());
-  Tph->read_all(Tai.data());
-
-  // COMMUNICATOR CONSTRUCTION ========================================={{{1
-  //
-  // Construct a new communicator living only on a single rank
-  int child_size = 1
-    , child_rank
-    ;
-  const
-  int color = rank / child_size
-    , crank = rank % child_size
-    ;
-  MPI_Comm child_comm;
-  if (np == 1) {
-    child_comm = universe;
-  } else {
-    MPI_Comm_split(cc4s::Cc4s::world->comm, color, crank, &child_comm);
-    MPI_Comm_rank(child_comm, &child_rank);
-    MPI_Comm_size(child_comm, &child_size);
-    //CTF::World child_world(child_comm);
-  }
-
-
-  chrono["nv-slices"].start();
-  // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
-  LOG(0,"NEW_TRIPLES") << "BUILD NV-SLICES\n";
-  TAPHH taphh(*Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
-  HHHA  hhha(*Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
-  chrono["nv-slices"].stop();
-
-  chrono["nv-nv-slices"].start();
-  // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
-  LOG(0,"NEW_TRIPLES") << "BUILD NV x NV-SLICES\n";
-  ABPH abph(*Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
-  ABHH abhh(*Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
-  TABHH tabhh(*Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
-  chrono["nv-nv-slices"].stop();
-
-  // all tensors
-  std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
-
-  //CONSTRUCT TUPLE LIST ==============================================={{{1
-  LOG(0,"NEW_TRIPLES") << "BUILD TUPLE LIST\n";
-  const auto tuplesList = std::move(getTuplesList(Nv));
-  WITH_RANK << "tupList.size() = " << tuplesList.size() << "\n";
-
-  // GET ABC INDEX RANGE FOR RANK ======================================{{{1
-  auto abcIndex = getABCRange(np, rank, tuplesList);
-  size_t nIterations = abcIndex.second - abcIndex.first;
-
-#ifdef TRIPLES_BENCHMARK
-  { const size_t maxIterations = getIntegerArgument("maxIterations", 0);
-    if (maxIterations != 0) {
-      abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1);
-      nIterations = maxIterations % (nIterations + 1);
-    }
-  }
-#endif
-
-  WITH_RANK << "abcIndex = " << pretty_print(abcIndex) << "\n";
-  LOG(0,"NEW_TRIPLES") << "#iterations: "
-                       << nIterations << "\n";
-
-  // first abc
-  const ABCTuple firstAbc = tuplesList[abcIndex.first];
-
-
-  double energy(0.);
-
-
-  auto const isFakeTuple
-    = [&tuplesList](size_t const i) { return i >= tuplesList.size(); };
-
-
-  auto communicateDatabase
-    = [ &unions
-      , np
-      , &chrono
-      ] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
-
-        chrono["db:comm:type:do"].start();
-        auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
-        chrono["db:comm:type:do"].stop();
-
-        chrono["db:comm:ldb"].start();
-        Slice::LocalDatabase ldb;
-
-        for (auto const& tensor: unions) {
-          auto const& tensorDb = tensor->buildLocalDatabase(abc);
-          ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
-        }
-        chrono["db:comm:ldb"].stop();
-
-        Slice::Database db(np * ldb.size(), ldb[0]);
-
-        chrono["oneshot-db:comm:allgather"].start();
-        chrono["db:comm:allgather"].start();
-        MPI_Allgather( ldb.data()
-                     , ldb.size()
-                     , MPI_LDB_ELEMENT
-                     , db.data()
-                     , ldb.size()
-                     , MPI_LDB_ELEMENT
-                     , c);
-        chrono["db:comm:allgather"].stop();
-        chrono["oneshot-db:comm:allgather"].stop();
-
-        chrono["db:comm:type:free"].start();
-        MPI_Type_free(&MPI_LDB_ELEMENT);
-        chrono["db:comm:type:free"].stop();
-
-        return db;
-      };
-
-  auto doIOPhase
-    = [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
-
-    const size_t localDBLength = db.size() / np;
-
-    size_t sendTag = 0
-         , recvTag = rank * localDBLength
-         ;
-
-    // RECIEVE PHASE ======================================================
-    {
-      // At this point, we have already send to everyone that fits
-      auto const& begin = &db[rank * localDBLength]
-                , end   = begin + localDBLength
-                ;
-      for (auto it = begin; it != end; ++it) {
-        recvTag++;
-        auto const& el = *it;
-        auto& u = unionByName(unions, el.name);
-
-        WITH_DBG std::cout
-          << rank << ":r"
-          << "♯" << recvTag << " =>"
-          << " «n" << el.name
-          << ", t" << el.info.type
-          << ", s" << el.info.state
-          << "»"
-          << " ⊙ {" << rank << "⇐" << el.info.from.rank
-                    << ", "
-                    << el.info.from.source << "}"
-          << " ∴ {" << el.info.tuple[0]
-                    << ", "
-                    << el.info.tuple[1]
-                    << "}"
-          << "\n"
-          ;
-
-        chrono["db:io:recv"].start();
-        u.receive(el.info, recvTag);
-        chrono["db:io:recv"].stop();
-
-      } // recv
-    }
-
-    // SEND PHASE =========================================================
-    for (size_t otherRank = 0; otherRank<np; otherRank++) {
-      auto const& begin = &db[otherRank * localDBLength]
-                , end = begin + localDBLength
-                ;
-      for (auto it = begin; it != end; ++it) {
-        sendTag++;
-        Slice::LocalDatabaseElement const& el = *it;
-
-        if (el.info.from.rank != rank) continue;
-
-        auto& u = unionByName(unions, el.name);
-        WITH_DBG std::cout
-          << rank << ":s"
-          << "♯" << sendTag << " =>"
-          << " «n" << el.name
-          << ", t" << el.info.type
-          << ", s" << el.info.state
-          << "»"
-          << " ⊙ {" << el.info.from.rank << "⇒" << otherRank
-                    << ", "
-                    << el.info.from.source << "}"
-          << " ∴ {" << el.info.tuple[0]
-                    << ", "
-                    << el.info.tuple[1]
-                    << "}"
-          << "\n"
-          ;
-
-        chrono["db:io:send"].start();
-        u.send(otherRank, el.info, sendTag);
-        chrono["db:io:send"].stop();
-
-      } // send phase
-
-    } // otherRank
-
-
-  };
-
-#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES)
-  std::map<ABCTuple, double> tupleEnergies;
-#endif
-
-  const double doublesFlops
-    = double(No)
-    ,* double(No)
-    ,* double(No)
-    ,* (double(No) + double(Nv))
-    ,* 2
-    ,* 6
-    / 1e9
-    ;
-
-  // START MAIN LOOP ======================================================{{{1
-
-  Slice::Database db;
-
-  for ( size_t i = abcIndex.first, iteration = 1
-      ; i < abcIndex.second
-      ; i++, iteration++
-      ) {
-    chrono["iterations"].start();
-
-    // check overhead from chrono over all iterations
-    chrono["start:stop"].start(); chrono["start:stop"].stop();
-
-    // check overhead of doing a barrier at the beginning
-    chrono["oneshot-mpi:barrier"].start();
-    chrono["mpi:barrier"].start();
-    // TODO: REMOVE
-    if (getIntegerArgument("barrier", 1) == 1)
-    MPI_Barrier(universe);
-    chrono["mpi:barrier"].stop();
-    chrono["oneshot-mpi:barrier"].stop();
-
-    if (iteration % getIntegerArgument("iterationMod", 100) == 0) {
-      LOG(0,"NEW_TRIPLES")
-        << "iteration " << iteration
-        << " [" << 100 * iteration / nIterations << "%]"
-        << " (" << doublesFlops * iteration / chrono["doubles"].count()
-        << "GF)"
-        << " (" << doublesFlops * iteration / chrono["iterations"].count()
-        << "GF)"
-        << " ===========================\n";
-
-      // PRINT TIMINGS
-      for (auto const& pair: chrono)
-        LOG(1, " ") << pair.first << " :: "
-                    << pair.second.count()
-                    << std::endl;
-
-    }
-
-    const ABCTuple abc = isFakeTuple(i)
-                       ? tuplesList[tuplesList.size() - 1]
-                       : tuplesList[i]
-                 , *abcNext = i == (abcIndex.second - 1)
-                            ? nullptr
-                            : isFakeTuple(i + 1)
-                            ? &tuplesList[tuplesList.size() - 1]
-                            : &tuplesList[i + 1]
-                 ;
-
-    chrono["with_rank"].start();
-    WITH_RANK << " :it " << iteration
-              << " :abc " << pretty_print(abc)
-              << " :abcN "
-              << (abcNext ? pretty_print(*abcNext) : "None")
-              << "\n";
-    chrono["with_rank"].stop();
-
-
-    // COMM FIRST DATABASE ================================================{{{1
-    if (i == abcIndex.first) {
-      WITH_RANK << "__first__:first database ............ \n";
-      const auto __db = communicateDatabase(abc, universe);
-      WITH_RANK << "__first__:first database communicated \n";
-      WITH_RANK << "__first__:first database io phase \n";
-      doIOPhase(__db);
-      WITH_RANK << "__first__:first database io phase DONE\n";
-      WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
-      for (auto& u: unions) u->unwrapAll(abc);
-      WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n";
-      MPI_Barrier(universe);
-    }
-
-    // COMM NEXT DATABASE ================================================={{{1
-    if (abcNext) {
-      WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
-      chrono["db:comm"].start();
-      //const auto db = communicateDatabase(*abcNext, universe);
-      db = communicateDatabase(*abcNext, universe);
-      chrono["db:comm"].stop();
-      chrono["db:io"].start();
-      doIOPhase(db);
-      chrono["db:io"].stop();
-      WITH_RANK << "__comm__:" <<  iteration << "th database io phase DONE\n";
-    }
-
-    // COMPUTE DOUBLES ===================================================={{{1
-    OCD_Barrier(universe);
-    if (!isFakeTuple(i)) {
-      WITH_RANK << iteration << "-th doubles\n";
-      WITH_CHRONO(chrono["oneshot-unwrap"],
-      WITH_CHRONO(chrono["unwrap"],
-      WITH_CHRONO(chrono["unwrap:doubles"],
-        for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
-          u->unwrapAll(abc);
-        }
-      )))
-      chrono["oneshot-doubles"].start();
-      chrono["doubles"].start();
-      // TODO: REMOVE
-      for (size_t __i=0; __i < getIntegerArgument("doublesLoops", 1); __i++)
-      doublesContribution( abc, (size_t)No, (size_t)Nv
-                         // -- VABCI
-                         , abph.unwrapSlice(Slice::AB, abc)
-                         , abph.unwrapSlice(Slice::AC, abc)
-                         , abph.unwrapSlice(Slice::BC, abc)
-                         , abph.unwrapSlice(Slice::BA, abc)
-                         , abph.unwrapSlice(Slice::CA, abc)
-                         , abph.unwrapSlice(Slice::CB, abc)
-                         // -- VHHHA
-                         , hhha.unwrapSlice(Slice::A, abc)
-                         , hhha.unwrapSlice(Slice::B, abc)
-                         , hhha.unwrapSlice(Slice::C, abc)
-                         // -- TA
-                         , taphh.unwrapSlice(Slice::A, abc)
-                         , taphh.unwrapSlice(Slice::B, abc)
-                         , taphh.unwrapSlice(Slice::C, abc)
-                         // -- TABIJ
-                         , tabhh.unwrapSlice(Slice::AB, abc)
-                         , tabhh.unwrapSlice(Slice::AC, abc)
-                         , tabhh.unwrapSlice(Slice::BC, abc)
-                         // -- TIJK
-                         , Tijk.data()
-                         , chrono
-                         );
-      WITH_RANK << iteration << "-th doubles done\n";
-      chrono["doubles"].stop();
-      chrono["oneshot-doubles"].stop();
-    }
-
-    // COMPUTE SINGLES =================================================== {{{1
-    OCD_Barrier(universe);
-    if (!isFakeTuple(i)) {
-      WITH_CHRONO(chrono["oneshot-unwrap"],
-      WITH_CHRONO(chrono["unwrap"],
-      WITH_CHRONO(chrono["unwrap:singles"],
-        abhh.unwrapAll(abc);
-      )))
-      chrono["reorder"].start();
-      for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
-      chrono["reorder"].stop();
-      chrono["singles"].start();
-      singlesContribution( abc
-                         , Tai.data()
-                         , abhh.unwrapSlice(Slice::AB, abc)
-                         , abhh.unwrapSlice(Slice::AC, abc)
-                         , abhh.unwrapSlice(Slice::BC, abc)
-                         , Zijk.data());
-      chrono["singles"].stop();
-    }
-
-
-    // COMPUTE ENERGY ==================================================== {{{1
-    if (!isFakeTuple(i)) {
-      double tupleEnergy(0.);
-
-      int distinct(0);
-      if (abc[0] == abc[1]) distinct++;
-      if (abc[1] == abc[2]) distinct--;
-      const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
-
-      chrono["energy"].start();
-      if ( distinct == 0)
-        tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
-      else
-        tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
-      chrono["energy"].stop();
-
-#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES)
-      tupleEnergies[abc] = tupleEnergy;
-#endif
-
-      energy += tupleEnergy;
-
-#ifdef HAVE_OCD
-      auto const print_slices
-        = [](ABCTuple const& abc, ABCTuple const& want, SliceUnion& u) {
-          if (abc != want) return;
-
-          for (auto type: u.sliceTypes) {
-            auto const& ptr = u.unwrapSlice(type, abc);
-            auto const& slice = Slice::findByTypeAbc(u.slices, type, abc);
-            WITH_RANK << "__print_slice__:n" << u.name << " "
-                      << pretty_print(abc) << " "
-                      << pretty_print(slice.info)
-                      ;
-            for (size_t i = 0; i < 20; i++) std::cout << ptr[i] << ", ";
-            std::cout << std::endl;
-          }
-        };
-#endif
-
-    if (isFakeTuple(i)) {
-      // fake iterations should also unwrap whatever they got
-      WITH_RANK << iteration
-                << "th unwrapping because of fake in "
-                << i << "\n";
-      for (auto& u: unions) u->unwrapAll(abc);
-    }
-
-#ifdef HAVE_OCD
-    for (auto const& u: unions) {
-      WITH_RANK << "__dups__:"
-                << iteration
-                << "-th n" << u->name << " checking duplicates\n";
-      u->checkForDuplicates();
-    }
-#endif
-
-
-    // CLEANUP UNIONS ===================================================={{{1
-    OCD_Barrier(universe);
-    if (abcNext) {
-      chrono["gc"].start();
-      WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
-      for (auto& u: unions) {
-
-        u->unwrapAll(abc);
-        WITH_RANK << "__gc__:n" << u->name  << " :it " << iteration
-                  << " :abc " << pretty_print(abc)
-                  << " :abcN " << pretty_print(*abcNext)
-                  << "\n";
-        for (auto const& slice: u->slices)
-          WITH_RANK << "__gc__:guts:" << slice.info << "\n";
-        u->clearUnusedSlicesForNext(*abcNext);
-
-        WITH_RANK << "__gc__: checking validity\n";
-
-#ifdef HAVE_OCD
-        // check for validity of the slices
-        for (auto type: u->sliceTypes) {
-          auto tuple = Slice::subtupleBySlice(abc, type);
-        for (auto& slice: u->slices) {
-          if ( slice.info.type == type
-             && slice.info.tuple == tuple
-             && slice.isDirectlyFetchable()
-             ) {
-            if (slice.info.state == Slice::Dispatched)
-              throw std::domain_error( "This slice should not be undispatched! "
-                                     + pretty_print(slice.info));
-          }
-        }
-        }
-#endif
-
-
-      }
-      chrono["gc"].stop();
-    }
-
-      WITH_RANK << iteration << "-th cleaning up....... DONE\n";
-    }
-
-    // CLEAN CHRONO ======================================================{{{1
-    chrono["iterations"].stop();
-    { // TODO: REMOVEME
-      chrono["oneshot-doubles"].clear();
-      chrono["oneshot-mpi:barrier"].clear();
-      chrono["oneshot-db:comm:allgather"].clear();
-      chrono["oneshot-unwrap"].clear();
-    }
-
-    // ITERATION END ====================================================={{{1
-  }  // END OF MAIN LOOP
-
-  MPI_Barrier(universe);
-
-  // PRINT TUPLES ========================================================={{{1
-#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES)
-  LOG(0,"NEW_TRIPLES") << "tuple energies" << "\n";
-  for (size_t i = 0; i < np; i++) {
-    MPI_Barrier(universe);
-    for (auto const& pair: tupleEnergies) {
-      if (i == rank)
-        std::cout << pair.first[0]
-                  << " " << pair.first[1]
-                  << " " << pair.first[2]
-                  << std::setprecision(15) << std::setw(23)
-                  << " tupleEnergy: " << pair.second
-                  << "\n"
-                  ;
-    }
-  }
-#endif
-
-  // COMMUNICATE THE ENERGIES ============================================={{{1
-  LOG(0,"NEW_TRIPLES") << "COMMUNICATING ENERGIES \n";
-  double globalEnergy = 0;
-  MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe);
-
-  WITH_RANK << "local energy " << energy << "\n";
-  LOG(0,"LOOP FINISHED, energy")
-    << std::setprecision(15) << std::setw(23)
-    << globalEnergy << std::endl;
-
-  // PRINT TIMINGS {{{1
-  for (auto const& pair: chrono)
-    LOG(0,"atrip:chrono") << pair.first << " "
-                          << pair.second.count() << std::endl;
-
-
-  LOG(0, "atrip:flops")
-    << nIterations * doublesFlops / chrono["doubles"].count() << "\n";
-
-}
- #+end_src