Update the readme
This commit is contained in:
parent
087bb57c85
commit
0063518e58
665
README.org
665
README.org
@ -594,6 +594,7 @@ namespace atrip {
|
|||||||
** The slice union
|
** The slice union
|
||||||
#+begin_src c++ :tangle (atrip-slice-union-h)
|
#+begin_src c++ :tangle (atrip-slice-union-h)
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include <atrip/Debug.hpp>
|
||||||
#include <atrip/Slice.hpp>
|
#include <atrip/Slice.hpp>
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
@ -763,7 +764,7 @@ namespace atrip {
|
|||||||
blank.info.from = from;
|
blank.info.from = from;
|
||||||
|
|
||||||
// Handle self sufficiency
|
// Handle self sufficiency
|
||||||
blank.info.state = cc4s::Cc4s::world->rank == from.rank
|
blank.info.state = Atrip::rank == from.rank
|
||||||
? Slice::SelfSufficient
|
? Slice::SelfSufficient
|
||||||
: Slice::Fetch
|
: Slice::Fetch
|
||||||
;
|
;
|
||||||
@ -983,7 +984,7 @@ namespace atrip {
|
|||||||
void init(Tensor const& sourceTensor) {
|
void init(Tensor const& sourceTensor) {
|
||||||
|
|
||||||
CTF::World w(world);
|
CTF::World w(world);
|
||||||
const int rank = cc4s::Cc4s::world->rank
|
const int rank = Atrip::rank
|
||||||
, order = sliceLength.size()
|
, order = sliceLength.size()
|
||||||
;
|
;
|
||||||
std::vector<int> const syms(order, NS);
|
std::vector<int> const syms(order, NS);
|
||||||
@ -1040,7 +1041,7 @@ namespace atrip {
|
|||||||
void receive(Slice::Info const& info, size_t tag) noexcept {
|
void receive(Slice::Info const& info, size_t tag) noexcept {
|
||||||
auto& slice = Slice::findByInfo(slices, info);
|
auto& slice = Slice::findByInfo(slices, info);
|
||||||
|
|
||||||
if (cc4s::Cc4s::world->rank == info.from.rank) return;
|
if (Atrip::rank == info.from.rank) return;
|
||||||
|
|
||||||
if (slice.info.state == Slice::Fetch) {
|
if (slice.info.state == Slice::Fetch) {
|
||||||
// TODO: do it through the slice class
|
// TODO: do it through the slice class
|
||||||
@ -1143,6 +1144,7 @@ namespace atrip {
|
|||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
#include <atrip/Utils.hpp>
|
#include <atrip/Utils.hpp>
|
||||||
|
#include <atrip/Debug.hpp>
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
@ -1278,7 +1280,7 @@ namespace atrip {
|
|||||||
|
|
||||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
||||||
{
|
{
|
||||||
const int rank = cc4s::Cc4s::world->rank
|
const int rank = Atrip::rank
|
||||||
, Nv = sliceLength[0]
|
, Nv = sliceLength[0]
|
||||||
, No = sliceLength[1]
|
, No = sliceLength[1]
|
||||||
, a = rankMap.find({rank, it});
|
, a = rankMap.find({rank, it});
|
||||||
@ -1317,7 +1319,7 @@ namespace atrip {
|
|||||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
||||||
{
|
{
|
||||||
|
|
||||||
const int rank = cc4s::Cc4s::world->rank
|
const int rank = Atrip::rank
|
||||||
, No = sliceLength[0]
|
, No = sliceLength[0]
|
||||||
, a = rankMap.find({rank, it})
|
, a = rankMap.find({rank, it})
|
||||||
;
|
;
|
||||||
@ -1355,7 +1357,7 @@ namespace atrip {
|
|||||||
|
|
||||||
const int Nv = sliceLength[0]
|
const int Nv = sliceLength[0]
|
||||||
, No = sliceLength[1]
|
, No = sliceLength[1]
|
||||||
, rank = cc4s::Cc4s::world->rank
|
, rank = Atrip::rank
|
||||||
, el = rankMap.find({rank, it})
|
, el = rankMap.find({rank, it})
|
||||||
, a = el % Nv
|
, a = el % Nv
|
||||||
, b = el / Nv
|
, b = el / Nv
|
||||||
@ -1394,7 +1396,7 @@ namespace atrip {
|
|||||||
|
|
||||||
const int Nv = from.lens[0]
|
const int Nv = from.lens[0]
|
||||||
, No = sliceLength[1]
|
, No = sliceLength[1]
|
||||||
, rank = cc4s::Cc4s::world->rank
|
, rank = Atrip::rank
|
||||||
, el = rankMap.find({rank, it})
|
, el = rankMap.find({rank, it})
|
||||||
, a = el % Nv
|
, a = el % Nv
|
||||||
, b = el / Nv
|
, b = el / Nv
|
||||||
@ -1435,7 +1437,7 @@ namespace atrip {
|
|||||||
|
|
||||||
const int Nv = from.lens[0]
|
const int Nv = from.lens[0]
|
||||||
, No = sliceLength[1]
|
, No = sliceLength[1]
|
||||||
, rank = cc4s::Cc4s::world->rank
|
, rank = Atrip::rank
|
||||||
, el = rankMap.find({rank, it})
|
, el = rankMap.find({rank, it})
|
||||||
, a = el % Nv
|
, a = el % Nv
|
||||||
, b = el / Nv
|
, b = el / Nv
|
||||||
@ -1561,7 +1563,9 @@ namespace atrip {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void singlesContribution
|
void singlesContribution
|
||||||
( const ABCTuple &abc
|
( size_t No
|
||||||
|
, size_t Nv
|
||||||
|
, const ABCTuple &abc
|
||||||
, double const* Tph
|
, double const* Tph
|
||||||
, double const* VABij
|
, double const* VABij
|
||||||
, double const* VACij
|
, double const* VACij
|
||||||
@ -1835,11 +1839,7 @@ namespace atrip {
|
|||||||
|
|
||||||
static int rank;
|
static int rank;
|
||||||
static int np;
|
static int np;
|
||||||
|
static void init();
|
||||||
static void init() {
|
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
|
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Input {
|
struct Input {
|
||||||
CTF::Tensor<double> *ei = nullptr
|
CTF::Tensor<double> *ei = nullptr
|
||||||
@ -1850,6 +1850,8 @@ namespace atrip {
|
|||||||
, *Vhhhp = nullptr
|
, *Vhhhp = nullptr
|
||||||
, *Vppph = nullptr
|
, *Vppph = nullptr
|
||||||
;
|
;
|
||||||
|
int maxIterations = 0, iterationMod = -1;
|
||||||
|
bool barrier = true;
|
||||||
Input& with_epsilon_i(CTF::Tensor<double> * t) { ei = t; return *this; }
|
Input& with_epsilon_i(CTF::Tensor<double> * t) { ei = t; return *this; }
|
||||||
Input& with_epsilon_a(CTF::Tensor<double> * t) { ea = t; return *this; }
|
Input& with_epsilon_a(CTF::Tensor<double> * t) { ea = t; return *this; }
|
||||||
Input& with_Tai(CTF::Tensor<double> * t) { Tph = t; return *this; }
|
Input& with_Tai(CTF::Tensor<double> * t) { Tph = t; return *this; }
|
||||||
@ -1864,20 +1866,30 @@ namespace atrip {
|
|||||||
};
|
};
|
||||||
static Output run(Input const& in);
|
static Output run(Input const& in);
|
||||||
};
|
};
|
||||||
int Atrip::rank;
|
|
||||||
int Atrip::np;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
#+begin_src c++ :tangle (atrip-atrip-cxx)
|
#+begin_src c++ :tangle (atrip-atrip-cxx)
|
||||||
|
#include <iomanip>
|
||||||
|
|
||||||
#include <atrip/Atrip.hpp>
|
#include <atrip/Atrip.hpp>
|
||||||
#include <atrip/Utils.hpp>
|
#include <atrip/Utils.hpp>
|
||||||
|
#include <atrip/Equations.hpp>
|
||||||
|
#include <atrip/SliceUnion.hpp>
|
||||||
|
#include <atrip/Unions.hpp>
|
||||||
|
|
||||||
using namespace atrip;
|
using namespace atrip;
|
||||||
|
|
||||||
|
int Atrip::rank;
|
||||||
|
int Atrip::np;
|
||||||
|
|
||||||
Output Atrip::run(Input const& in){
|
void Atrip::init() {
|
||||||
|
MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
|
||||||
|
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
|
||||||
|
}
|
||||||
|
|
||||||
|
Atrip::Output Atrip::run(Atrip::Input const& in){
|
||||||
|
|
||||||
const int np = Atrip::np;
|
const int np = Atrip::np;
|
||||||
const int rank = Atrip::rank;
|
const int rank = Atrip::rank;
|
||||||
@ -1886,8 +1898,8 @@ Output Atrip::run(Input const& in){
|
|||||||
// Timings in seconds ================================================{{{1
|
// Timings in seconds ================================================{{{1
|
||||||
Timings chrono{};
|
Timings chrono{};
|
||||||
|
|
||||||
No = in.ei->lens[0];
|
const size_t No = in.ei->lens[0];
|
||||||
Nv = in.ea->lens[0];
|
const size_t Nv = in.ea->lens[0];
|
||||||
LOG(0,"NEW_TRIPLES") << "No: " << No << "\n";
|
LOG(0,"NEW_TRIPLES") << "No: " << No << "\n";
|
||||||
LOG(0,"NEW_TRIPLES") << "Nv: " << Nv << "\n";
|
LOG(0,"NEW_TRIPLES") << "Nv: " << Nv << "\n";
|
||||||
|
|
||||||
@ -1954,7 +1966,7 @@ Output Atrip::run(Input const& in){
|
|||||||
size_t nIterations = abcIndex.second - abcIndex.first;
|
size_t nIterations = abcIndex.second - abcIndex.first;
|
||||||
|
|
||||||
#ifdef TRIPLES_BENCHMARK
|
#ifdef TRIPLES_BENCHMARK
|
||||||
{ const size_t maxIterations = getIntegerArgument("maxIterations", 0);
|
{ const size_t maxIterations = in.maxIterations;
|
||||||
if (maxIterations != 0) {
|
if (maxIterations != 0) {
|
||||||
abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1);
|
abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1);
|
||||||
nIterations = maxIterations % (nIterations + 1);
|
nIterations = maxIterations % (nIterations + 1);
|
||||||
@ -2132,12 +2144,12 @@ Output Atrip::run(Input const& in){
|
|||||||
chrono["oneshot-mpi:barrier"].start();
|
chrono["oneshot-mpi:barrier"].start();
|
||||||
chrono["mpi:barrier"].start();
|
chrono["mpi:barrier"].start();
|
||||||
// TODO: REMOVE
|
// TODO: REMOVE
|
||||||
if (getIntegerArgument("barrier", 1) == 1)
|
if (in.barrier == 1)
|
||||||
MPI_Barrier(universe);
|
MPI_Barrier(universe);
|
||||||
chrono["mpi:barrier"].stop();
|
chrono["mpi:barrier"].stop();
|
||||||
chrono["oneshot-mpi:barrier"].stop();
|
chrono["oneshot-mpi:barrier"].stop();
|
||||||
|
|
||||||
if (iteration % getIntegerArgument("iterationMod", 100) == 0) {
|
if (iteration % in.iterationMod == 0) {
|
||||||
LOG(0,"NEW_TRIPLES")
|
LOG(0,"NEW_TRIPLES")
|
||||||
<< "iteration " << iteration
|
<< "iteration " << iteration
|
||||||
<< " [" << 100 * iteration / nIterations << "%]"
|
<< " [" << 100 * iteration / nIterations << "%]"
|
||||||
@ -2214,8 +2226,6 @@ Output Atrip::run(Input const& in){
|
|||||||
)))
|
)))
|
||||||
chrono["oneshot-doubles"].start();
|
chrono["oneshot-doubles"].start();
|
||||||
chrono["doubles"].start();
|
chrono["doubles"].start();
|
||||||
// TODO: REMOVE
|
|
||||||
for (size_t __i=0; __i < getIntegerArgument("doublesLoops", 1); __i++)
|
|
||||||
doublesContribution( abc, (size_t)No, (size_t)Nv
|
doublesContribution( abc, (size_t)No, (size_t)Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, abph.unwrapSlice(Slice::AB, abc)
|
, abph.unwrapSlice(Slice::AB, abc)
|
||||||
@ -2257,7 +2267,7 @@ Output Atrip::run(Input const& in){
|
|||||||
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
||||||
chrono["reorder"].stop();
|
chrono["reorder"].stop();
|
||||||
chrono["singles"].start();
|
chrono["singles"].start();
|
||||||
singlesContribution( abc
|
singlesContribution( No, Nv, abc
|
||||||
, Tai.data()
|
, Tai.data()
|
||||||
, abhh.unwrapSlice(Slice::AB, abc)
|
, abhh.unwrapSlice(Slice::AB, abc)
|
||||||
, abhh.unwrapSlice(Slice::AC, abc)
|
, abhh.unwrapSlice(Slice::AC, abc)
|
||||||
@ -2423,17 +2433,17 @@ Output Atrip::run(Input const& in){
|
|||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
** Include header
|
** Debug
|
||||||
|
#+begin_src c++ :tangle (atrip-debug-h)
|
||||||
#+begin_src c++ :tangle (atrip-main-h)
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#define TRIPLES_BENCHMARK
|
#define TRIPLES_BENCHMARK
|
||||||
#define TRIPLES_DEBUG 1
|
#define TRIPLES_DEBUG 1
|
||||||
//#define TRIPLES_WORKLOAD_DUMP
|
//#define TRIPLES_WORKLOAD_DUMP
|
||||||
#define TRIPLES_USE_DGEMM
|
#define TRIPLES_USE_DGEMM
|
||||||
//#define TRIPLES_PRINT_TUPLES
|
//#define TRIPLES_PRINT_TUPLES
|
||||||
|
|
||||||
|
#define LOG(name, level) if (Atrip::rank == 0) std::cout << name << ": "
|
||||||
|
|
||||||
#if TRIPLES_DEBUG == 4
|
#if TRIPLES_DEBUG == 4
|
||||||
# pragma message("WARNING: You have OCD debugging ABC triples "\
|
# pragma message("WARNING: You have OCD debugging ABC triples "\
|
||||||
"expect GB of output and consult your therapist")
|
"expect GB of output and consult your therapist")
|
||||||
@ -2482,598 +2492,13 @@ Output Atrip::run(Input const& in){
|
|||||||
#else
|
#else
|
||||||
# error("TRIPLES_DEBUG is not defined!")
|
# error("TRIPLES_DEBUG is not defined!")
|
||||||
#endif
|
#endif
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Include header
|
||||||
|
|
||||||
|
#+begin_src c++ :tangle (atrip-main-h)
|
||||||
|
#pragma once
|
||||||
|
|
||||||
#include <atrip/Utils.hpp>
|
|
||||||
#include <atrip/Slice.hpp>
|
|
||||||
#include <atrip/Atrip.hpp>
|
#include <atrip/Atrip.hpp>
|
||||||
|
|
||||||
|
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
** Todo :noexport:
|
|
||||||
#+begin_src c++ :tangle todo.hpp
|
|
||||||
#include <algorithms/PerturbativeTriplesAbcijk.hpp>
|
|
||||||
#include <math/MathFunctions.hpp>
|
|
||||||
#include <tcc/DryTensor.hpp>
|
|
||||||
#include <util/Log.hpp>
|
|
||||||
#include <util/Exception.hpp>
|
|
||||||
#include <Cc4s.hpp>
|
|
||||||
#include <ctf.hpp>
|
|
||||||
#include <util/MpiCommunicator.hpp>
|
|
||||||
|
|
||||||
using namespace cc4s;
|
|
||||||
|
|
||||||
#include <sstream>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <numeric>
|
|
||||||
#include <array>
|
|
||||||
#include <vector>
|
|
||||||
#include <map>
|
|
||||||
#include <memory>
|
|
||||||
#include <set>
|
|
||||||
#include <chrono>
|
|
||||||
|
|
||||||
|
|
||||||
#+end_src
|
|
||||||
|
|
||||||
#+begin_src c++ :tangle todo.hpp
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// MAIN ALGORITHM ======================================================{{{1
|
|
||||||
void cc4s::PerturbativeTriplesAbcijk::run(){
|
|
||||||
|
|
||||||
const int np = cc4s::Cc4s::world->np;
|
|
||||||
const int rank = cc4s::Cc4s::world->rank;
|
|
||||||
MPI_Comm universe = cc4s::Cc4s::world->comm;
|
|
||||||
|
|
||||||
// Timings in seconds ================================================{{{1
|
|
||||||
atrip::Timings chrono{};
|
|
||||||
|
|
||||||
// Get the distributed ctf tensor data
|
|
||||||
CTF::Tensor<> *ei(getTensorArgument("HoleEigenEnergies"))
|
|
||||||
, *ea(getTensorArgument("ParticleEigenEnergies"))
|
|
||||||
, *Tph(getTensorArgument("CcsdSinglesAmplitudes"))
|
|
||||||
, *Tpphh(getTensorArgument("CcsdDoublesAmplitudes"))
|
|
||||||
, *Vpphh(getTensorArgument("PPHHCoulombIntegrals"))
|
|
||||||
, *Vhhhp(getTensorArgument("HHHPCoulombIntegrals"))
|
|
||||||
, *Vppph(getTensorArgument("PPPHCoulombIntegrals"))
|
|
||||||
;
|
|
||||||
|
|
||||||
No = ei->lens[0];
|
|
||||||
Nv = ea->lens[0];
|
|
||||||
LOG(0,"NEW_TRIPLES") << "No: " << No << "\n";
|
|
||||||
LOG(0,"NEW_TRIPLES") << "Nv: " << Nv << "\n";
|
|
||||||
|
|
||||||
// allocate the three scratches, see piecuch
|
|
||||||
std::vector<double> Tijk(No*No*No) // doubles only (see piecuch)
|
|
||||||
, Zijk(No*No*No) // singles + doubles (see piecuch)
|
|
||||||
// we need local copies of the following tensors on every
|
|
||||||
// rank
|
|
||||||
, epsi(No)
|
|
||||||
, epsa(Nv)
|
|
||||||
, Tai(No * Nv)
|
|
||||||
;
|
|
||||||
|
|
||||||
ei->read_all(epsi.data());
|
|
||||||
ea->read_all(epsa.data());
|
|
||||||
Tph->read_all(Tai.data());
|
|
||||||
|
|
||||||
// COMMUNICATOR CONSTRUCTION ========================================={{{1
|
|
||||||
//
|
|
||||||
// Construct a new communicator living only on a single rank
|
|
||||||
int child_size = 1
|
|
||||||
, child_rank
|
|
||||||
;
|
|
||||||
const
|
|
||||||
int color = rank / child_size
|
|
||||||
, crank = rank % child_size
|
|
||||||
;
|
|
||||||
MPI_Comm child_comm;
|
|
||||||
if (np == 1) {
|
|
||||||
child_comm = universe;
|
|
||||||
} else {
|
|
||||||
MPI_Comm_split(cc4s::Cc4s::world->comm, color, crank, &child_comm);
|
|
||||||
MPI_Comm_rank(child_comm, &child_rank);
|
|
||||||
MPI_Comm_size(child_comm, &child_size);
|
|
||||||
//CTF::World child_world(child_comm);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
chrono["nv-slices"].start();
|
|
||||||
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
|
|
||||||
LOG(0,"NEW_TRIPLES") << "BUILD NV-SLICES\n";
|
|
||||||
TAPHH taphh(*Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
|
||||||
HHHA hhha(*Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
|
||||||
chrono["nv-slices"].stop();
|
|
||||||
|
|
||||||
chrono["nv-nv-slices"].start();
|
|
||||||
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
|
|
||||||
LOG(0,"NEW_TRIPLES") << "BUILD NV x NV-SLICES\n";
|
|
||||||
ABPH abph(*Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
|
||||||
ABHH abhh(*Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
|
||||||
TABHH tabhh(*Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
|
||||||
chrono["nv-nv-slices"].stop();
|
|
||||||
|
|
||||||
// all tensors
|
|
||||||
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
|
||||||
|
|
||||||
//CONSTRUCT TUPLE LIST ==============================================={{{1
|
|
||||||
LOG(0,"NEW_TRIPLES") << "BUILD TUPLE LIST\n";
|
|
||||||
const auto tuplesList = std::move(getTuplesList(Nv));
|
|
||||||
WITH_RANK << "tupList.size() = " << tuplesList.size() << "\n";
|
|
||||||
|
|
||||||
// GET ABC INDEX RANGE FOR RANK ======================================{{{1
|
|
||||||
auto abcIndex = getABCRange(np, rank, tuplesList);
|
|
||||||
size_t nIterations = abcIndex.second - abcIndex.first;
|
|
||||||
|
|
||||||
#ifdef TRIPLES_BENCHMARK
|
|
||||||
{ const size_t maxIterations = getIntegerArgument("maxIterations", 0);
|
|
||||||
if (maxIterations != 0) {
|
|
||||||
abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1);
|
|
||||||
nIterations = maxIterations % (nIterations + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
WITH_RANK << "abcIndex = " << pretty_print(abcIndex) << "\n";
|
|
||||||
LOG(0,"NEW_TRIPLES") << "#iterations: "
|
|
||||||
<< nIterations << "\n";
|
|
||||||
|
|
||||||
// first abc
|
|
||||||
const ABCTuple firstAbc = tuplesList[abcIndex.first];
|
|
||||||
|
|
||||||
|
|
||||||
double energy(0.);
|
|
||||||
|
|
||||||
|
|
||||||
auto const isFakeTuple
|
|
||||||
= [&tuplesList](size_t const i) { return i >= tuplesList.size(); };
|
|
||||||
|
|
||||||
|
|
||||||
auto communicateDatabase
|
|
||||||
= [ &unions
|
|
||||||
, np
|
|
||||||
, &chrono
|
|
||||||
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
|
|
||||||
|
|
||||||
chrono["db:comm:type:do"].start();
|
|
||||||
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
|
|
||||||
chrono["db:comm:type:do"].stop();
|
|
||||||
|
|
||||||
chrono["db:comm:ldb"].start();
|
|
||||||
Slice::LocalDatabase ldb;
|
|
||||||
|
|
||||||
for (auto const& tensor: unions) {
|
|
||||||
auto const& tensorDb = tensor->buildLocalDatabase(abc);
|
|
||||||
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
|
|
||||||
}
|
|
||||||
chrono["db:comm:ldb"].stop();
|
|
||||||
|
|
||||||
Slice::Database db(np * ldb.size(), ldb[0]);
|
|
||||||
|
|
||||||
chrono["oneshot-db:comm:allgather"].start();
|
|
||||||
chrono["db:comm:allgather"].start();
|
|
||||||
MPI_Allgather( ldb.data()
|
|
||||||
, ldb.size()
|
|
||||||
, MPI_LDB_ELEMENT
|
|
||||||
, db.data()
|
|
||||||
, ldb.size()
|
|
||||||
, MPI_LDB_ELEMENT
|
|
||||||
, c);
|
|
||||||
chrono["db:comm:allgather"].stop();
|
|
||||||
chrono["oneshot-db:comm:allgather"].stop();
|
|
||||||
|
|
||||||
chrono["db:comm:type:free"].start();
|
|
||||||
MPI_Type_free(&MPI_LDB_ELEMENT);
|
|
||||||
chrono["db:comm:type:free"].stop();
|
|
||||||
|
|
||||||
return db;
|
|
||||||
};
|
|
||||||
|
|
||||||
auto doIOPhase
|
|
||||||
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
|
|
||||||
|
|
||||||
const size_t localDBLength = db.size() / np;
|
|
||||||
|
|
||||||
size_t sendTag = 0
|
|
||||||
, recvTag = rank * localDBLength
|
|
||||||
;
|
|
||||||
|
|
||||||
// RECIEVE PHASE ======================================================
|
|
||||||
{
|
|
||||||
// At this point, we have already send to everyone that fits
|
|
||||||
auto const& begin = &db[rank * localDBLength]
|
|
||||||
, end = begin + localDBLength
|
|
||||||
;
|
|
||||||
for (auto it = begin; it != end; ++it) {
|
|
||||||
recvTag++;
|
|
||||||
auto const& el = *it;
|
|
||||||
auto& u = unionByName(unions, el.name);
|
|
||||||
|
|
||||||
WITH_DBG std::cout
|
|
||||||
<< rank << ":r"
|
|
||||||
<< "♯" << recvTag << " =>"
|
|
||||||
<< " «n" << el.name
|
|
||||||
<< ", t" << el.info.type
|
|
||||||
<< ", s" << el.info.state
|
|
||||||
<< "»"
|
|
||||||
<< " ⊙ {" << rank << "⇐" << el.info.from.rank
|
|
||||||
<< ", "
|
|
||||||
<< el.info.from.source << "}"
|
|
||||||
<< " ∴ {" << el.info.tuple[0]
|
|
||||||
<< ", "
|
|
||||||
<< el.info.tuple[1]
|
|
||||||
<< "}"
|
|
||||||
<< "\n"
|
|
||||||
;
|
|
||||||
|
|
||||||
chrono["db:io:recv"].start();
|
|
||||||
u.receive(el.info, recvTag);
|
|
||||||
chrono["db:io:recv"].stop();
|
|
||||||
|
|
||||||
} // recv
|
|
||||||
}
|
|
||||||
|
|
||||||
// SEND PHASE =========================================================
|
|
||||||
for (size_t otherRank = 0; otherRank<np; otherRank++) {
|
|
||||||
auto const& begin = &db[otherRank * localDBLength]
|
|
||||||
, end = begin + localDBLength
|
|
||||||
;
|
|
||||||
for (auto it = begin; it != end; ++it) {
|
|
||||||
sendTag++;
|
|
||||||
Slice::LocalDatabaseElement const& el = *it;
|
|
||||||
|
|
||||||
if (el.info.from.rank != rank) continue;
|
|
||||||
|
|
||||||
auto& u = unionByName(unions, el.name);
|
|
||||||
WITH_DBG std::cout
|
|
||||||
<< rank << ":s"
|
|
||||||
<< "♯" << sendTag << " =>"
|
|
||||||
<< " «n" << el.name
|
|
||||||
<< ", t" << el.info.type
|
|
||||||
<< ", s" << el.info.state
|
|
||||||
<< "»"
|
|
||||||
<< " ⊙ {" << el.info.from.rank << "⇒" << otherRank
|
|
||||||
<< ", "
|
|
||||||
<< el.info.from.source << "}"
|
|
||||||
<< " ∴ {" << el.info.tuple[0]
|
|
||||||
<< ", "
|
|
||||||
<< el.info.tuple[1]
|
|
||||||
<< "}"
|
|
||||||
<< "\n"
|
|
||||||
;
|
|
||||||
|
|
||||||
chrono["db:io:send"].start();
|
|
||||||
u.send(otherRank, el.info, sendTag);
|
|
||||||
chrono["db:io:send"].stop();
|
|
||||||
|
|
||||||
} // send phase
|
|
||||||
|
|
||||||
} // otherRank
|
|
||||||
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES)
|
|
||||||
std::map<ABCTuple, double> tupleEnergies;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const double doublesFlops
|
|
||||||
= double(No)
|
|
||||||
,* double(No)
|
|
||||||
,* double(No)
|
|
||||||
,* (double(No) + double(Nv))
|
|
||||||
,* 2
|
|
||||||
,* 6
|
|
||||||
/ 1e9
|
|
||||||
;
|
|
||||||
|
|
||||||
// START MAIN LOOP ======================================================{{{1
|
|
||||||
|
|
||||||
Slice::Database db;
|
|
||||||
|
|
||||||
for ( size_t i = abcIndex.first, iteration = 1
|
|
||||||
; i < abcIndex.second
|
|
||||||
; i++, iteration++
|
|
||||||
) {
|
|
||||||
chrono["iterations"].start();
|
|
||||||
|
|
||||||
// check overhead from chrono over all iterations
|
|
||||||
chrono["start:stop"].start(); chrono["start:stop"].stop();
|
|
||||||
|
|
||||||
// check overhead of doing a barrier at the beginning
|
|
||||||
chrono["oneshot-mpi:barrier"].start();
|
|
||||||
chrono["mpi:barrier"].start();
|
|
||||||
// TODO: REMOVE
|
|
||||||
if (getIntegerArgument("barrier", 1) == 1)
|
|
||||||
MPI_Barrier(universe);
|
|
||||||
chrono["mpi:barrier"].stop();
|
|
||||||
chrono["oneshot-mpi:barrier"].stop();
|
|
||||||
|
|
||||||
if (iteration % getIntegerArgument("iterationMod", 100) == 0) {
|
|
||||||
LOG(0,"NEW_TRIPLES")
|
|
||||||
<< "iteration " << iteration
|
|
||||||
<< " [" << 100 * iteration / nIterations << "%]"
|
|
||||||
<< " (" << doublesFlops * iteration / chrono["doubles"].count()
|
|
||||||
<< "GF)"
|
|
||||||
<< " (" << doublesFlops * iteration / chrono["iterations"].count()
|
|
||||||
<< "GF)"
|
|
||||||
<< " ===========================\n";
|
|
||||||
|
|
||||||
// PRINT TIMINGS
|
|
||||||
for (auto const& pair: chrono)
|
|
||||||
LOG(1, " ") << pair.first << " :: "
|
|
||||||
<< pair.second.count()
|
|
||||||
<< std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
const ABCTuple abc = isFakeTuple(i)
|
|
||||||
? tuplesList[tuplesList.size() - 1]
|
|
||||||
: tuplesList[i]
|
|
||||||
, *abcNext = i == (abcIndex.second - 1)
|
|
||||||
? nullptr
|
|
||||||
: isFakeTuple(i + 1)
|
|
||||||
? &tuplesList[tuplesList.size() - 1]
|
|
||||||
: &tuplesList[i + 1]
|
|
||||||
;
|
|
||||||
|
|
||||||
chrono["with_rank"].start();
|
|
||||||
WITH_RANK << " :it " << iteration
|
|
||||||
<< " :abc " << pretty_print(abc)
|
|
||||||
<< " :abcN "
|
|
||||||
<< (abcNext ? pretty_print(*abcNext) : "None")
|
|
||||||
<< "\n";
|
|
||||||
chrono["with_rank"].stop();
|
|
||||||
|
|
||||||
|
|
||||||
// COMM FIRST DATABASE ================================================{{{1
|
|
||||||
if (i == abcIndex.first) {
|
|
||||||
WITH_RANK << "__first__:first database ............ \n";
|
|
||||||
const auto __db = communicateDatabase(abc, universe);
|
|
||||||
WITH_RANK << "__first__:first database communicated \n";
|
|
||||||
WITH_RANK << "__first__:first database io phase \n";
|
|
||||||
doIOPhase(__db);
|
|
||||||
WITH_RANK << "__first__:first database io phase DONE\n";
|
|
||||||
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
|
|
||||||
for (auto& u: unions) u->unwrapAll(abc);
|
|
||||||
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n";
|
|
||||||
MPI_Barrier(universe);
|
|
||||||
}
|
|
||||||
|
|
||||||
// COMM NEXT DATABASE ================================================={{{1
|
|
||||||
if (abcNext) {
|
|
||||||
WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
|
|
||||||
chrono["db:comm"].start();
|
|
||||||
//const auto db = communicateDatabase(*abcNext, universe);
|
|
||||||
db = communicateDatabase(*abcNext, universe);
|
|
||||||
chrono["db:comm"].stop();
|
|
||||||
chrono["db:io"].start();
|
|
||||||
doIOPhase(db);
|
|
||||||
chrono["db:io"].stop();
|
|
||||||
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
// COMPUTE DOUBLES ===================================================={{{1
|
|
||||||
OCD_Barrier(universe);
|
|
||||||
if (!isFakeTuple(i)) {
|
|
||||||
WITH_RANK << iteration << "-th doubles\n";
|
|
||||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
|
||||||
WITH_CHRONO(chrono["unwrap"],
|
|
||||||
WITH_CHRONO(chrono["unwrap:doubles"],
|
|
||||||
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
|
|
||||||
u->unwrapAll(abc);
|
|
||||||
}
|
|
||||||
)))
|
|
||||||
chrono["oneshot-doubles"].start();
|
|
||||||
chrono["doubles"].start();
|
|
||||||
// TODO: REMOVE
|
|
||||||
for (size_t __i=0; __i < getIntegerArgument("doublesLoops", 1); __i++)
|
|
||||||
doublesContribution( abc, (size_t)No, (size_t)Nv
|
|
||||||
// -- VABCI
|
|
||||||
, abph.unwrapSlice(Slice::AB, abc)
|
|
||||||
, abph.unwrapSlice(Slice::AC, abc)
|
|
||||||
, abph.unwrapSlice(Slice::BC, abc)
|
|
||||||
, abph.unwrapSlice(Slice::BA, abc)
|
|
||||||
, abph.unwrapSlice(Slice::CA, abc)
|
|
||||||
, abph.unwrapSlice(Slice::CB, abc)
|
|
||||||
// -- VHHHA
|
|
||||||
, hhha.unwrapSlice(Slice::A, abc)
|
|
||||||
, hhha.unwrapSlice(Slice::B, abc)
|
|
||||||
, hhha.unwrapSlice(Slice::C, abc)
|
|
||||||
// -- TA
|
|
||||||
, taphh.unwrapSlice(Slice::A, abc)
|
|
||||||
, taphh.unwrapSlice(Slice::B, abc)
|
|
||||||
, taphh.unwrapSlice(Slice::C, abc)
|
|
||||||
// -- TABIJ
|
|
||||||
, tabhh.unwrapSlice(Slice::AB, abc)
|
|
||||||
, tabhh.unwrapSlice(Slice::AC, abc)
|
|
||||||
, tabhh.unwrapSlice(Slice::BC, abc)
|
|
||||||
// -- TIJK
|
|
||||||
, Tijk.data()
|
|
||||||
, chrono
|
|
||||||
);
|
|
||||||
WITH_RANK << iteration << "-th doubles done\n";
|
|
||||||
chrono["doubles"].stop();
|
|
||||||
chrono["oneshot-doubles"].stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
// COMPUTE SINGLES =================================================== {{{1
|
|
||||||
OCD_Barrier(universe);
|
|
||||||
if (!isFakeTuple(i)) {
|
|
||||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
|
||||||
WITH_CHRONO(chrono["unwrap"],
|
|
||||||
WITH_CHRONO(chrono["unwrap:singles"],
|
|
||||||
abhh.unwrapAll(abc);
|
|
||||||
)))
|
|
||||||
chrono["reorder"].start();
|
|
||||||
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
|
||||||
chrono["reorder"].stop();
|
|
||||||
chrono["singles"].start();
|
|
||||||
singlesContribution( abc
|
|
||||||
, Tai.data()
|
|
||||||
, abhh.unwrapSlice(Slice::AB, abc)
|
|
||||||
, abhh.unwrapSlice(Slice::AC, abc)
|
|
||||||
, abhh.unwrapSlice(Slice::BC, abc)
|
|
||||||
, Zijk.data());
|
|
||||||
chrono["singles"].stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// COMPUTE ENERGY ==================================================== {{{1
|
|
||||||
if (!isFakeTuple(i)) {
|
|
||||||
double tupleEnergy(0.);
|
|
||||||
|
|
||||||
int distinct(0);
|
|
||||||
if (abc[0] == abc[1]) distinct++;
|
|
||||||
if (abc[1] == abc[2]) distinct--;
|
|
||||||
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
|
||||||
|
|
||||||
chrono["energy"].start();
|
|
||||||
if ( distinct == 0)
|
|
||||||
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
|
|
||||||
else
|
|
||||||
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
|
|
||||||
chrono["energy"].stop();
|
|
||||||
|
|
||||||
#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES)
|
|
||||||
tupleEnergies[abc] = tupleEnergy;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
energy += tupleEnergy;
|
|
||||||
|
|
||||||
#ifdef HAVE_OCD
|
|
||||||
auto const print_slices
|
|
||||||
= [](ABCTuple const& abc, ABCTuple const& want, SliceUnion& u) {
|
|
||||||
if (abc != want) return;
|
|
||||||
|
|
||||||
for (auto type: u.sliceTypes) {
|
|
||||||
auto const& ptr = u.unwrapSlice(type, abc);
|
|
||||||
auto const& slice = Slice::findByTypeAbc(u.slices, type, abc);
|
|
||||||
WITH_RANK << "__print_slice__:n" << u.name << " "
|
|
||||||
<< pretty_print(abc) << " "
|
|
||||||
<< pretty_print(slice.info)
|
|
||||||
;
|
|
||||||
for (size_t i = 0; i < 20; i++) std::cout << ptr[i] << ", ";
|
|
||||||
std::cout << std::endl;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (isFakeTuple(i)) {
|
|
||||||
// fake iterations should also unwrap whatever they got
|
|
||||||
WITH_RANK << iteration
|
|
||||||
<< "th unwrapping because of fake in "
|
|
||||||
<< i << "\n";
|
|
||||||
for (auto& u: unions) u->unwrapAll(abc);
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef HAVE_OCD
|
|
||||||
for (auto const& u: unions) {
|
|
||||||
WITH_RANK << "__dups__:"
|
|
||||||
<< iteration
|
|
||||||
<< "-th n" << u->name << " checking duplicates\n";
|
|
||||||
u->checkForDuplicates();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// CLEANUP UNIONS ===================================================={{{1
|
|
||||||
OCD_Barrier(universe);
|
|
||||||
if (abcNext) {
|
|
||||||
chrono["gc"].start();
|
|
||||||
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
|
|
||||||
for (auto& u: unions) {
|
|
||||||
|
|
||||||
u->unwrapAll(abc);
|
|
||||||
WITH_RANK << "__gc__:n" << u->name << " :it " << iteration
|
|
||||||
<< " :abc " << pretty_print(abc)
|
|
||||||
<< " :abcN " << pretty_print(*abcNext)
|
|
||||||
<< "\n";
|
|
||||||
for (auto const& slice: u->slices)
|
|
||||||
WITH_RANK << "__gc__:guts:" << slice.info << "\n";
|
|
||||||
u->clearUnusedSlicesForNext(*abcNext);
|
|
||||||
|
|
||||||
WITH_RANK << "__gc__: checking validity\n";
|
|
||||||
|
|
||||||
#ifdef HAVE_OCD
|
|
||||||
// check for validity of the slices
|
|
||||||
for (auto type: u->sliceTypes) {
|
|
||||||
auto tuple = Slice::subtupleBySlice(abc, type);
|
|
||||||
for (auto& slice: u->slices) {
|
|
||||||
if ( slice.info.type == type
|
|
||||||
&& slice.info.tuple == tuple
|
|
||||||
&& slice.isDirectlyFetchable()
|
|
||||||
) {
|
|
||||||
if (slice.info.state == Slice::Dispatched)
|
|
||||||
throw std::domain_error( "This slice should not be undispatched! "
|
|
||||||
+ pretty_print(slice.info));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
chrono["gc"].stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
// CLEAN CHRONO ======================================================{{{1
|
|
||||||
chrono["iterations"].stop();
|
|
||||||
{ // TODO: REMOVEME
|
|
||||||
chrono["oneshot-doubles"].clear();
|
|
||||||
chrono["oneshot-mpi:barrier"].clear();
|
|
||||||
chrono["oneshot-db:comm:allgather"].clear();
|
|
||||||
chrono["oneshot-unwrap"].clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
// ITERATION END ====================================================={{{1
|
|
||||||
} // END OF MAIN LOOP
|
|
||||||
|
|
||||||
MPI_Barrier(universe);
|
|
||||||
|
|
||||||
// PRINT TUPLES ========================================================={{{1
|
|
||||||
#if defined(HAVE_OCD) || defined(TRIPLES_PRINT_TUPLES)
|
|
||||||
LOG(0,"NEW_TRIPLES") << "tuple energies" << "\n";
|
|
||||||
for (size_t i = 0; i < np; i++) {
|
|
||||||
MPI_Barrier(universe);
|
|
||||||
for (auto const& pair: tupleEnergies) {
|
|
||||||
if (i == rank)
|
|
||||||
std::cout << pair.first[0]
|
|
||||||
<< " " << pair.first[1]
|
|
||||||
<< " " << pair.first[2]
|
|
||||||
<< std::setprecision(15) << std::setw(23)
|
|
||||||
<< " tupleEnergy: " << pair.second
|
|
||||||
<< "\n"
|
|
||||||
;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// COMMUNICATE THE ENERGIES ============================================={{{1
|
|
||||||
LOG(0,"NEW_TRIPLES") << "COMMUNICATING ENERGIES \n";
|
|
||||||
double globalEnergy = 0;
|
|
||||||
MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe);
|
|
||||||
|
|
||||||
WITH_RANK << "local energy " << energy << "\n";
|
|
||||||
LOG(0,"LOOP FINISHED, energy")
|
|
||||||
<< std::setprecision(15) << std::setw(23)
|
|
||||||
<< globalEnergy << std::endl;
|
|
||||||
|
|
||||||
// PRINT TIMINGS {{{1
|
|
||||||
for (auto const& pair: chrono)
|
|
||||||
LOG(0,"atrip:chrono") << pair.first << " "
|
|
||||||
<< pair.second.count() << std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
LOG(0, "atrip:flops")
|
|
||||||
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n";
|
|
||||||
|
|
||||||
}
|
|
||||||
#+end_src
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user