Add sources explicitly
This commit is contained in:
parent
3d0f8a3a61
commit
1cc41f1dd1
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,2 +1,4 @@
|
||||
.emacs
|
||||
doc/doxygen/
|
||||
*.o
|
||||
*.d
|
||||
|
||||
1
Sources.mk
Normal file
1
Sources.mk
Normal file
@ -0,0 +1 @@
|
||||
ATRIP_SOURCES = include/atrip.hpp src/atrip/Atrip.cxx include/atrip/Atrip.hpp include/atrip/Debug.hpp include/atrip/Equations.hpp include/atrip/Tuples.hpp include/atrip/Unions.hpp include/atrip/RankMap.hpp include/atrip/Blas.hpp include/atrip/Utils.hpp include/atrip/SliceUnion.hpp include/atrip/Slice.hpp
|
||||
5
include/atrip.hpp
Normal file
5
include/atrip.hpp
Normal file
@ -0,0 +1,5 @@
|
||||
// [[file:../atrip.org::*Include header][Include header:1]]
|
||||
#pragma once
|
||||
|
||||
#include <atrip/Atrip.hpp>
|
||||
// Include header:1 ends here
|
||||
48
include/atrip/Atrip.hpp
Normal file
48
include/atrip/Atrip.hpp
Normal file
@ -0,0 +1,48 @@
|
||||
// [[file:../../atrip.org::*Atrip][Atrip:1]]
|
||||
#pragma once
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <chrono>
|
||||
|
||||
#include <ctf.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
struct Atrip {
|
||||
|
||||
static int rank;
|
||||
static int np;
|
||||
static void init();
|
||||
|
||||
struct Input {
|
||||
CTF::Tensor<double> *ei = nullptr
|
||||
, *ea = nullptr
|
||||
, *Tph = nullptr
|
||||
, *Tpphh = nullptr
|
||||
, *Vpphh = nullptr
|
||||
, *Vhhhp = nullptr
|
||||
, *Vppph = nullptr
|
||||
;
|
||||
int maxIterations = 0, iterationMod = -1;
|
||||
bool barrier = true;
|
||||
Input& with_epsilon_i(CTF::Tensor<double> * t) { ei = t; return *this; }
|
||||
Input& with_epsilon_a(CTF::Tensor<double> * t) { ea = t; return *this; }
|
||||
Input& with_Tai(CTF::Tensor<double> * t) { Tph = t; return *this; }
|
||||
Input& with_Tabij(CTF::Tensor<double> * t) { Tpphh = t; return *this; }
|
||||
Input& with_Vabij(CTF::Tensor<double> * t) { Vpphh = t; return *this; }
|
||||
Input& with_Vijka(CTF::Tensor<double> * t) { Vhhhp = t; return *this; }
|
||||
Input& with_Vabci(CTF::Tensor<double> * t) { Vppph = t; return *this; }
|
||||
Input& with_maxIterations(int i) { maxIterations = i; return *this; }
|
||||
Input& with_iterationMod(int i) { iterationMod = i; return *this; }
|
||||
Input& with_barrier(bool i) { barrier = i; return *this; }
|
||||
};
|
||||
|
||||
struct Output {
|
||||
double energy;
|
||||
};
|
||||
static Output run(Input const& in);
|
||||
};
|
||||
|
||||
}
|
||||
// Atrip:1 ends here
|
||||
22
include/atrip/Blas.hpp
Normal file
22
include/atrip/Blas.hpp
Normal file
@ -0,0 +1,22 @@
|
||||
// [[file:../../atrip.org::*Blas][Blas:1]]
|
||||
#pragma once
|
||||
namespace atrip {
|
||||
extern "C" {
|
||||
void dgemm_(
|
||||
const char *transa,
|
||||
const char *transb,
|
||||
const int *m,
|
||||
const int *n,
|
||||
const int *k,
|
||||
double *alpha,
|
||||
const double *A,
|
||||
const int *lda,
|
||||
const double *B,
|
||||
const int *ldb,
|
||||
double *beta,
|
||||
double *C,
|
||||
const int *ldc
|
||||
);
|
||||
}
|
||||
}
|
||||
// Blas:1 ends here
|
||||
60
include/atrip/Debug.hpp
Normal file
60
include/atrip/Debug.hpp
Normal file
@ -0,0 +1,60 @@
|
||||
// [[file:../../atrip.org::*Debug][Debug:1]]
|
||||
#pragma once
|
||||
#define ATRIP_BENCHMARK
|
||||
#define ATRIP_DONT_SLICE
|
||||
#define ATRIP_DEBUG 1
|
||||
//#define ATRIP_WORKLOAD_DUMP
|
||||
#define ATRIP_USE_DGEMM
|
||||
//#define ATRIP_PRINT_TUPLES
|
||||
|
||||
#define LOG(level, name) if (Atrip::rank == 0) std::cout << name << ": "
|
||||
|
||||
#if ATRIP_DEBUG == 4
|
||||
# pragma message("WARNING: You have OCD debugging ABC triples "\
|
||||
"expect GB of output and consult your therapist")
|
||||
# include <dbg.h>
|
||||
# define HAVE_OCD
|
||||
# define OCD_Barrier(com) MPI_Barrier(com)
|
||||
# define WITH_OCD
|
||||
# define WITH_ROOT if (atrip::Atrip::rank == 0)
|
||||
# define WITH_SPECIAL(r) if (atrip::Atrip::rank == r)
|
||||
# define WITH_RANK std::cout << atrip::Atrip::rank << ": "
|
||||
# define WITH_CRAZY_DEBUG
|
||||
# define WITH_DBG
|
||||
# define DBG(...) dbg(__VA_ARGS__)
|
||||
#elif ATRIP_DEBUG == 3
|
||||
# pragma message("WARNING: You have crazy debugging ABC triples,"\
|
||||
" expect GB of output")
|
||||
# include <dbg.h>
|
||||
# define OCD_Barrier(com)
|
||||
# define WITH_OCD if (false)
|
||||
# define WITH_ROOT if (atrip::Atrip::rank == 0)
|
||||
# define WITH_SPECIAL(r) if (atrip::Atrip::rank == r)
|
||||
# define WITH_RANK std::cout << atrip::Atrip::rank << ": "
|
||||
# define WITH_CRAZY_DEBUG
|
||||
# define WITH_DBG
|
||||
# define DBG(...) dbg(__VA_ARGS__)
|
||||
#elif ATRIP_DEBUG == 2
|
||||
# pragma message("WARNING: You have some debugging info for ABC triples")
|
||||
# include <dbg.h>
|
||||
# define OCD_Barrier(com)
|
||||
# define WITH_OCD if (false)
|
||||
# define WITH_ROOT if (atrip::Atrip::rank == 0)
|
||||
# define WITH_SPECIAL(r) if (atrip::Atrip::rank == r)
|
||||
# define WITH_RANK std::cout << atrip::Atrip::rank << ": "
|
||||
# define WITH_CRAZY_DEBUG if (false)
|
||||
# define WITH_DBG
|
||||
# define DBG(...) dbg(__VA_ARGS__)
|
||||
#elif ATRIP_DEBUG == 1
|
||||
# define OCD_Barrier(com)
|
||||
# define WITH_OCD if (false)
|
||||
# define WITH_ROOT if (false)
|
||||
# define WITH_SPECIAL(r) if (false)
|
||||
# define WITH_RANK if (false) std::cout << atrip::Atrip::rank << ": "
|
||||
# define WITH_DBG if (false)
|
||||
# define WITH_CRAZY_DEBUG if (false)
|
||||
# define DBG(...)
|
||||
#else
|
||||
# error("ATRIP_DEBUG is not defined!")
|
||||
#endif
|
||||
// Debug:1 ends here
|
||||
337
include/atrip/Equations.hpp
Normal file
337
include/atrip/Equations.hpp
Normal file
@ -0,0 +1,337 @@
|
||||
// [[file:../../atrip.org::*Equations][Equations:1]]
|
||||
#pragma once
|
||||
|
||||
#include<atrip/Slice.hpp>
|
||||
#include<atrip/Blas.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
double getEnergyDistinct
|
||||
( const double epsabc
|
||||
, std::vector<double> const& epsi
|
||||
, std::vector<double> const& Tijk_
|
||||
, std::vector<double> const& Zijk_
|
||||
) {
|
||||
constexpr size_t blockSize=16;
|
||||
double energy(0.);
|
||||
const size_t No = epsi.size();
|
||||
for (size_t kk=0; kk<No; kk+=blockSize){
|
||||
const size_t kend( std::min(No, kk+blockSize) );
|
||||
for (size_t jj(kk); jj<No; jj+=blockSize){
|
||||
const size_t jend( std::min( No, jj+blockSize) );
|
||||
for (size_t ii(jj); ii<No; ii+=blockSize){
|
||||
const size_t iend( std::min( No, ii+blockSize) );
|
||||
for (size_t k(kk); k < kend; k++){
|
||||
const double ek(epsi[k]);
|
||||
const size_t jstart = jj > k ? kk : k;
|
||||
for (size_t j(jstart); j < jend; j++){
|
||||
const double ej(epsi[j]);
|
||||
double facjk( j == k ? 0.5 : 1.0);
|
||||
size_t istart = ii > j ? ii : j;
|
||||
for (size_t i(istart); i < iend; i++){
|
||||
const double ei(epsi[i]);
|
||||
double facij ( i==j ? 0.5 : 1.0);
|
||||
double denominator(epsabc - ei - ej - ek);
|
||||
double U(Zijk_[i + No*j + No*No*k]);
|
||||
double V(Zijk_[i + No*k + No*No*j]);
|
||||
double W(Zijk_[j + No*i + No*No*k]);
|
||||
double X(Zijk_[j + No*k + No*No*i]);
|
||||
double Y(Zijk_[k + No*i + No*No*j]);
|
||||
double Z(Zijk_[k + No*j + No*No*i]);
|
||||
|
||||
double A(Tijk_[i + No*j + No*No*k]);
|
||||
double B(Tijk_[i + No*k + No*No*j]);
|
||||
double C(Tijk_[j + No*i + No*No*k]);
|
||||
double D(Tijk_[j + No*k + No*No*i]);
|
||||
double E(Tijk_[k + No*i + No*No*j]);
|
||||
double F(Tijk_[k + No*j + No*No*i]);
|
||||
double value(3.0*(A*U+B*V+C*W+D*X+E*Y+F*Z)
|
||||
+((U+X+Y)-2.0*(V+W+Z))*(A+D+E)
|
||||
+((V+W+Z)-2.0*(U+X+Y))*(B+C+F));
|
||||
energy += 2.0*value / denominator * facjk * facij;
|
||||
} // i
|
||||
} // j
|
||||
} // k
|
||||
} // ii
|
||||
} // jj
|
||||
} // kk
|
||||
return energy;
|
||||
}
|
||||
|
||||
|
||||
double getEnergySame
|
||||
( const double epsabc
|
||||
, std::vector<double> const& epsi
|
||||
, std::vector<double> const& Tijk_
|
||||
, std::vector<double> const& Zijk_
|
||||
) {
|
||||
constexpr size_t blockSize = 16;
|
||||
const size_t No = epsi.size();
|
||||
double energy(0.);
|
||||
for (size_t kk=0; kk<No; kk+=blockSize){
|
||||
const size_t kend( std::min( kk+blockSize, No) );
|
||||
for (size_t jj(kk); jj<No; jj+=blockSize){
|
||||
const size_t jend( std::min( jj+blockSize, No) );
|
||||
for (size_t ii(jj); ii<No; ii+=blockSize){
|
||||
const size_t iend( std::min( ii+blockSize, No) );
|
||||
for (size_t k(kk); k < kend; k++){
|
||||
const double ek(epsi[k]);
|
||||
const size_t jstart = jj > k ? jj : k;
|
||||
for(size_t j(jstart); j < jend; j++){
|
||||
const double facjk( j == k ? 0.5 : 1.0);
|
||||
const double ej(epsi[j]);
|
||||
const size_t istart = ii > j ? ii : j;
|
||||
for(size_t i(istart); i < iend; i++){
|
||||
double ei(epsi[i]);
|
||||
double facij ( i==j ? 0.5 : 1.0);
|
||||
double denominator(epsabc - ei - ej - ek);
|
||||
double U(Zijk_[i + No*j + No*No*k]);
|
||||
double V(Zijk_[j + No*k + No*No*i]);
|
||||
double W(Zijk_[k + No*i + No*No*j]);
|
||||
double A(Tijk_[i + No*j + No*No*k]);
|
||||
double B(Tijk_[j + No*k + No*No*i]);
|
||||
double C(Tijk_[k + No*i + No*No*j]);
|
||||
double value(3.0*( A*U + B*V + C*W) - (A+B+C)*(U+V+W));
|
||||
energy += 2.0*value / denominator * facjk * facij;
|
||||
} // i
|
||||
} // j
|
||||
} // k
|
||||
} // ii
|
||||
} // jj
|
||||
} // kk
|
||||
return energy;
|
||||
}
|
||||
|
||||
void singlesContribution
|
||||
( size_t No
|
||||
, size_t Nv
|
||||
, const ABCTuple &abc
|
||||
, double const* Tph
|
||||
, double const* VABij
|
||||
, double const* VACij
|
||||
, double const* VBCij
|
||||
, double *Zijk
|
||||
) {
|
||||
const size_t a(abc[0]), b(abc[1]), c(abc[2]);
|
||||
for (size_t k=0; k < No; k++)
|
||||
for (size_t i=0; i < No; i++)
|
||||
for (size_t j=0; j < No; j++) {
|
||||
const size_t ijk = i + j*No + k*No*No
|
||||
, jk = j + No * k
|
||||
;
|
||||
Zijk[ijk] += Tph[ a + i * Nv ] * VBCij[ j + k * No ];
|
||||
Zijk[ijk] += Tph[ b + j * Nv ] * VACij[ i + k * No ];
|
||||
Zijk[ijk] += Tph[ c + k * Nv ] * VABij[ i + j * No ];
|
||||
}
|
||||
}
|
||||
|
||||
void doublesContribution
|
||||
( const ABCTuple &abc
|
||||
, size_t const No
|
||||
, size_t const Nv
|
||||
// -- VABCI
|
||||
, double const* VABph
|
||||
, double const* VACph
|
||||
, double const* VBCph
|
||||
, double const* VBAph
|
||||
, double const* VCAph
|
||||
, double const* VCBph
|
||||
// -- VHHHA
|
||||
, double const* VhhhA
|
||||
, double const* VhhhB
|
||||
, double const* VhhhC
|
||||
// -- TA
|
||||
, double const* TAphh
|
||||
, double const* TBphh
|
||||
, double const* TCphh
|
||||
// -- TABIJ
|
||||
, double const* TABhh
|
||||
, double const* TAChh
|
||||
, double const* TBChh
|
||||
// -- TIJK
|
||||
, double *Tijk
|
||||
, atrip::Timings& chrono
|
||||
) {
|
||||
|
||||
auto& t_reorder = chrono["doubles:reorder"];
|
||||
const size_t a = abc[0], b = abc[1], c = abc[2]
|
||||
, NoNo = No*No, NoNv = No*Nv
|
||||
;
|
||||
|
||||
#if defined(ATRIP_USE_DGEMM)
|
||||
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
||||
#define REORDER(__II, __JJ, __KK) \
|
||||
t_reorder.start(); \
|
||||
for (size_t k = 0; k < No; k++) \
|
||||
for (size_t j = 0; j < No; j++) \
|
||||
for (size_t i = 0; i < No; i++) { \
|
||||
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
||||
} \
|
||||
t_reorder.stop();
|
||||
#define DGEMM_PARTICLES(__A, __B) \
|
||||
atrip::dgemm_( "T" \
|
||||
, "N" \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&Nv \
|
||||
, &one \
|
||||
, __A \
|
||||
, (int const*)&Nv \
|
||||
, __B \
|
||||
, (int const*)&Nv \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||
atrip::dgemm_( "N" \
|
||||
, __TRANSB \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&No \
|
||||
, &m_one \
|
||||
, __A \
|
||||
, (int const*)&NoNo \
|
||||
, __B \
|
||||
, (int const*)&No \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
|
||||
using F = double;
|
||||
const size_t NoNoNo = No*NoNo;
|
||||
std::vector<double> _t_buffer;
|
||||
_t_buffer.reserve(NoNoNo);
|
||||
F one{1.0}, m_one{-1.0}, zero{0.0};
|
||||
|
||||
t_reorder.start();
|
||||
for (size_t k = 0; k < NoNoNo; k++) {
|
||||
// zero the Tijk
|
||||
Tijk[k] = 0.0;
|
||||
}
|
||||
t_reorder.stop();
|
||||
|
||||
chrono["doubles:holes"].start();
|
||||
{ // Holes part ============================================================
|
||||
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
||||
chrono["doubles:holes:1"].start();
|
||||
DGEMM_HOLES(VhhhC, TABhh, "N")
|
||||
REORDER(i, k, j)
|
||||
chrono["doubles:holes:1"].stop();
|
||||
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
|
||||
chrono["doubles:holes:2"].start();
|
||||
DGEMM_HOLES(VhhhC, TABhh, "T")
|
||||
REORDER(j, k, i)
|
||||
chrono["doubles:holes:2"].stop();
|
||||
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
|
||||
chrono["doubles:holes:3"].start();
|
||||
DGEMM_HOLES(VhhhB, TAChh, "N")
|
||||
REORDER(i, j, k)
|
||||
chrono["doubles:holes:3"].stop();
|
||||
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
|
||||
chrono["doubles:holes:4"].start();
|
||||
DGEMM_HOLES(VhhhB, TAChh, "T")
|
||||
REORDER(k, j, i)
|
||||
chrono["doubles:holes:4"].stop();
|
||||
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
|
||||
chrono["doubles:holes:5"].start();
|
||||
DGEMM_HOLES(VhhhA, TBChh, "N")
|
||||
REORDER(j, i, k)
|
||||
chrono["doubles:holes:5"].stop();
|
||||
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
|
||||
chrono["doubles:holes:6"].start();
|
||||
DGEMM_HOLES(VhhhA, TBChh, "T")
|
||||
REORDER(k, i, j)
|
||||
chrono["doubles:holes:6"].stop();
|
||||
}
|
||||
chrono["doubles:holes"].stop();
|
||||
|
||||
chrono["doubles:particles"].start();
|
||||
{ // Particle part =========================================================
|
||||
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
|
||||
chrono["doubles:particles:1"].start();
|
||||
DGEMM_PARTICLES(TAphh, VBCph)
|
||||
REORDER(i, j, k)
|
||||
chrono["doubles:particles:1"].stop();
|
||||
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
|
||||
chrono["doubles:particles:2"].start();
|
||||
DGEMM_PARTICLES(TAphh, VCBph)
|
||||
REORDER(i, k, j)
|
||||
chrono["doubles:particles:2"].stop();
|
||||
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
|
||||
chrono["doubles:particles:3"].start();
|
||||
DGEMM_PARTICLES(TCphh, VABph)
|
||||
REORDER(k, i, j)
|
||||
chrono["doubles:particles:3"].stop();
|
||||
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
|
||||
chrono["doubles:particles:4"].start();
|
||||
DGEMM_PARTICLES(TCphh, VBAph)
|
||||
REORDER(k, j, i)
|
||||
chrono["doubles:particles:4"].stop();
|
||||
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
|
||||
chrono["doubles:particles:5"].start();
|
||||
DGEMM_PARTICLES(TBphh, VACph)
|
||||
REORDER(j, i, k)
|
||||
chrono["doubles:particles:5"].stop();
|
||||
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
|
||||
chrono["doubles:particles:6"].start();
|
||||
DGEMM_PARTICLES(TBphh, VCAph)
|
||||
REORDER(j, k, i)
|
||||
chrono["doubles:particles:6"].stop();
|
||||
}
|
||||
chrono["doubles:particles"].stop();
|
||||
|
||||
#undef REORDER
|
||||
#undef DGEMM_HOLES
|
||||
#undef DGEMM_PARTICLES
|
||||
#undef _IJK_
|
||||
#else
|
||||
for (size_t k = 0; k < No; k++)
|
||||
for (size_t j = 0; j < No; j++)
|
||||
for (size_t i = 0; i < No; i++){
|
||||
const size_t ijk = i + j*No + k*NoNo
|
||||
, jk = j + k*No
|
||||
;
|
||||
Tijk[ijk] = 0.0; // :important
|
||||
// HOLE DIAGRAMS: TABHH and VHHHA
|
||||
for (size_t L = 0; L < No; L++){
|
||||
// t[abLj] * V[Lcik] H1
|
||||
// t[baLi] * V[Lcjk] H0 TODO: conjugate T for complex
|
||||
Tijk[ijk] -= TABhh[L + j*No] * VhhhC[i + k*No + L*NoNo];
|
||||
Tijk[ijk] -= TABhh[i + L*No] * VhhhC[j + k*No + L*NoNo];
|
||||
|
||||
// t[acLk] * V[Lbij] H5
|
||||
// t[caLi] * V[Lbkj] H3
|
||||
Tijk[ijk] -= TAChh[L + k*No] * VhhhB[i + j*No + L*NoNo];
|
||||
Tijk[ijk] -= TAChh[i + L*No] * VhhhB[k + j*No + L*NoNo];
|
||||
|
||||
// t[bcLk] * V[Laji] H2
|
||||
// t[cbLj] * V[Laki] H4
|
||||
Tijk[ijk] -= TBChh[L + k*No] * VhhhA[j + i*No + L*NoNo];
|
||||
Tijk[ijk] -= TBChh[j + L*No] * VhhhA[k + i*No + L*NoNo];
|
||||
}
|
||||
// PARTILCE DIAGRAMS: TAPHH and VABPH
|
||||
for (size_t E = 0; E < Nv; E++) {
|
||||
// t[aEij] * V[bcEk] P0
|
||||
// t[aEik] * V[cbEj] P3 // TODO: CHECK THIS ONE, I DONT KNOW
|
||||
Tijk[ijk] += TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv];
|
||||
Tijk[ijk] += TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv];
|
||||
|
||||
// t[cEki] * V[abEj] P5
|
||||
// t[cEkj] * V[baEi] P2
|
||||
Tijk[ijk] += TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv];
|
||||
Tijk[ijk] += TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv];
|
||||
|
||||
// t[bEji] * V[acEk] P1
|
||||
// t[bEjk] * V[caEi] P4
|
||||
Tijk[ijk] += TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv];
|
||||
Tijk[ijk] += TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv];
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
// Equations:1 ends here
|
||||
467
include/atrip/Slice.hpp
Normal file
467
include/atrip/Slice.hpp
Normal file
@ -0,0 +1,467 @@
|
||||
// [[file:../../atrip.org::*The slice][The slice:1]]
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <mpi.h>
|
||||
|
||||
#include <atrip/Tuples.hpp>
|
||||
#include <atrip/Utils.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
|
||||
struct Slice {
|
||||
|
||||
using F = double;
|
||||
// The slice:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*The slice][The slice:2]]
|
||||
// ASSOCIATED TYPES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
struct Location { size_t rank; size_t source; };
|
||||
|
||||
enum Type
|
||||
{ A = 10
|
||||
, B
|
||||
, C
|
||||
// Two-parameter slices
|
||||
, AB = 20
|
||||
, BC
|
||||
, AC
|
||||
// for abci and the doubles
|
||||
, CB
|
||||
, BA
|
||||
, CA
|
||||
// The non-typed slice
|
||||
, Blank = 404
|
||||
};
|
||||
|
||||
enum State {
|
||||
// Fetch represents the state where a slice is to be fetched
|
||||
// and has a valid data pointer that can be written to
|
||||
Fetch = 0,
|
||||
// Dispatches represents the state that an MPI call has been
|
||||
// dispatched in order to get the data, but the data has not been
|
||||
// yet unwrapped, the data might be there or we might have to wait.
|
||||
Dispatched = 2,
|
||||
// Ready means that the data pointer can be read from
|
||||
Ready = 1,
|
||||
// Self sufficient is a slice when its contents are located
|
||||
// in the same rank that it lives, so that it does not have to
|
||||
// fetch from no one else.
|
||||
SelfSufficient = 911,
|
||||
// Recycled means that this slice gets its data pointer from another
|
||||
// slice, so it should not be written to
|
||||
Recycled = 123,
|
||||
// Acceptor means that the Slice can accept a new Slice, it is
|
||||
// the counterpart of the Blank type, but for states
|
||||
Acceptor = 405
|
||||
};
|
||||
|
||||
struct Info {
|
||||
// which part of a,b,c the slice holds
|
||||
PartialTuple tuple;
|
||||
// The type of slice for the user to retrieve the correct one
|
||||
Type type;
|
||||
// What is the state of the slice
|
||||
State state;
|
||||
// Where the slice is to be retrieved
|
||||
// NOTE: this can actually be computed from tuple
|
||||
Location from;
|
||||
// If the data are actually to be found in this other slice
|
||||
Type recycling;
|
||||
|
||||
Info() : tuple{0,0}
|
||||
, type{Blank}
|
||||
, state{Acceptor}
|
||||
, from{0,0}
|
||||
, recycling{Blank}
|
||||
{}
|
||||
};
|
||||
|
||||
using Ty_x_Tu = std::pair< Type, PartialTuple >;
|
||||
|
||||
// Names of the integrals that are considered in CCSD(T)
|
||||
enum Name
|
||||
{ TA = 100
|
||||
, VIJKA = 101
|
||||
, VABCI = 200
|
||||
, TABIJ = 201
|
||||
, VABIJ = 202
|
||||
};
|
||||
|
||||
// DATABASE ==========================================================={{{1
|
||||
struct LocalDatabaseElement {
|
||||
Slice::Name name;
|
||||
Slice::Info info;
|
||||
};
|
||||
using LocalDatabase = std::vector<LocalDatabaseElement>;
|
||||
using Database = LocalDatabase;
|
||||
|
||||
|
||||
// STATIC METHODS ===========================================================
|
||||
//
|
||||
// They are useful to organize the structure of slices
|
||||
|
||||
struct mpi {
|
||||
|
||||
static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) {
|
||||
MPI_Datatype dt;
|
||||
MPI_Type_vector(n, 1, 1, DT, &dt);
|
||||
MPI_Type_commit(&dt);
|
||||
return dt;
|
||||
}
|
||||
|
||||
static MPI_Datatype sliceLocation () {
|
||||
constexpr int n = 2;
|
||||
// create a sliceLocation to measure in the current architecture
|
||||
// the packing of the struct
|
||||
Slice::Location measure;
|
||||
MPI_Datatype dt;
|
||||
const std::vector<int> lengths(n, 1);
|
||||
const MPI_Datatype types[n] = {usizeDt(), usizeDt()};
|
||||
|
||||
// measure the displacements in the struct
|
||||
size_t j = 0;
|
||||
MPI_Aint displacements[n];
|
||||
MPI_Get_address(&measure.rank, &displacements[j++]);
|
||||
MPI_Get_address(&measure.source, &displacements[j++]);
|
||||
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
|
||||
displacements[0] = 0;
|
||||
|
||||
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
||||
MPI_Type_commit(&dt);
|
||||
return dt;
|
||||
}
|
||||
|
||||
static MPI_Datatype enumDt() { return MPI_INT; }
|
||||
static MPI_Datatype usizeDt() { return MPI_UINT64_T; }
|
||||
|
||||
static MPI_Datatype sliceInfo () {
|
||||
constexpr int n = 5;
|
||||
MPI_Datatype dt;
|
||||
Slice::Info measure;
|
||||
const std::vector<int> lengths(n, 1);
|
||||
const MPI_Datatype types[n]
|
||||
= { vector(2, usizeDt())
|
||||
, enumDt()
|
||||
, enumDt()
|
||||
, sliceLocation()
|
||||
, enumDt()
|
||||
};
|
||||
|
||||
// create the displacements from the info measurement struct
|
||||
size_t j = 0;
|
||||
MPI_Aint displacements[n];
|
||||
MPI_Get_address(measure.tuple.data(), &displacements[j++]);
|
||||
MPI_Get_address(&measure.type, &displacements[j++]);
|
||||
MPI_Get_address(&measure.state, &displacements[j++]);
|
||||
MPI_Get_address(&measure.from, &displacements[j++]);
|
||||
MPI_Get_address(&measure.recycling, &displacements[j++]);
|
||||
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
|
||||
displacements[0] = 0;
|
||||
|
||||
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
||||
MPI_Type_commit(&dt);
|
||||
return dt;
|
||||
}
|
||||
|
||||
static MPI_Datatype localDatabaseElement () {
|
||||
constexpr int n = 2;
|
||||
MPI_Datatype dt;
|
||||
LocalDatabaseElement measure;
|
||||
const std::vector<int> lengths(n, 1);
|
||||
const MPI_Datatype types[n]
|
||||
= { enumDt()
|
||||
, sliceInfo()
|
||||
};
|
||||
|
||||
// measure the displacements in the struct
|
||||
size_t j = 0;
|
||||
MPI_Aint displacements[n];
|
||||
MPI_Get_address(&measure.name, &displacements[j++]);
|
||||
MPI_Get_address(&measure.info, &displacements[j++]);
|
||||
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
|
||||
displacements[0] = 0;
|
||||
|
||||
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
||||
MPI_Type_commit(&dt);
|
||||
return dt;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
static
|
||||
PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
|
||||
switch (sliceType) {
|
||||
case AB: return {abc[0], abc[1]};
|
||||
case BC: return {abc[1], abc[2]};
|
||||
case AC: return {abc[0], abc[2]};
|
||||
case CB: return {abc[2], abc[1]};
|
||||
case BA: return {abc[1], abc[0]};
|
||||
case CA: return {abc[2], abc[0]};
|
||||
case A: return {abc[0], 0};
|
||||
case B: return {abc[1], 0};
|
||||
case C: return {abc[2], 0};
|
||||
default: throw "Switch statement not exhaustive!";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* It is important here to return a reference to a Slice
|
||||
* not to accidentally copy the associated buffer of the slice.
|
||||
*/
|
||||
static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&type](Slice const& s) {
|
||||
return type == s.info.type;
|
||||
});
|
||||
WITH_CRAZY_DEBUG
|
||||
WITH_RANK
|
||||
<< "\t__ looking for " << type << "\n";
|
||||
if (sliceIt == slices.end())
|
||||
throw std::domain_error("Slice by type not found!");
|
||||
return *sliceIt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if an info has
|
||||
*
|
||||
*/
|
||||
static std::vector<Slice*> hasRecycledReferencingToIt
|
||||
( std::vector<Slice> &slices
|
||||
, Info const& info
|
||||
) {
|
||||
std::vector<Slice*> result;
|
||||
|
||||
for (auto& s: slices)
|
||||
if ( s.info.recycling == info.type
|
||||
&& s.info.tuple == info.tuple
|
||||
&& s.info.state == Recycled
|
||||
) result.push_back(&s);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static Slice&
|
||||
findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&info](Slice const& s) {
|
||||
return info.recycling == s.info.type
|
||||
&& info.tuple == s.info.tuple
|
||||
&& State::Recycled != s.info.state
|
||||
;
|
||||
});
|
||||
|
||||
WITH_CRAZY_DEBUG
|
||||
WITH_RANK << "__slice__:find: recycling source of "
|
||||
<< pretty_print(info) << "\n";
|
||||
if (sliceIt == slices.end())
|
||||
throw std::domain_error( "Slice not found: "
|
||||
+ pretty_print(info)
|
||||
+ " rank: "
|
||||
+ pretty_print(Atrip::rank)
|
||||
);
|
||||
WITH_RANK << "__slice__:find: " << pretty_print(sliceIt->info) << "\n";
|
||||
return *sliceIt;
|
||||
}
|
||||
|
||||
static Slice& findByTypeAbc
|
||||
( std::vector<Slice> &slices
|
||||
, Slice::Type type
|
||||
, ABCTuple const& abc
|
||||
) {
|
||||
const auto tuple = Slice::subtupleBySlice(abc, type);
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&type, &tuple](Slice const& s) {
|
||||
return type == s.info.type
|
||||
&& tuple == s.info.tuple
|
||||
;
|
||||
});
|
||||
WITH_CRAZY_DEBUG
|
||||
WITH_RANK << "__slice__:find:" << type << " and tuple "
|
||||
<< pretty_print(tuple)
|
||||
<< "\n";
|
||||
if (sliceIt == slices.end())
|
||||
throw std::domain_error( "Slice not found: "
|
||||
+ pretty_print(tuple)
|
||||
+ ", "
|
||||
+ pretty_print(type)
|
||||
+ " rank: "
|
||||
+ pretty_print(Atrip::rank)
|
||||
);
|
||||
return *sliceIt;
|
||||
}
|
||||
|
||||
static Slice& findByInfo(std::vector<Slice> &slices,
|
||||
Slice::Info const& info) {
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&info](Slice const& s) {
|
||||
// TODO: maybe implement comparison in Info struct
|
||||
return info.type == s.info.type
|
||||
&& info.state == s.info.state
|
||||
&& info.tuple == s.info.tuple
|
||||
&& info.from.rank == s.info.from.rank
|
||||
&& info.from.source == s.info.from.source
|
||||
;
|
||||
});
|
||||
WITH_CRAZY_DEBUG
|
||||
WITH_RANK << "__slice__:find:looking for " << pretty_print(info) << "\n";
|
||||
if (sliceIt == slices.end())
|
||||
throw std::domain_error( "Slice by info not found: "
|
||||
+ pretty_print(info));
|
||||
return *sliceIt;
|
||||
}
|
||||
|
||||
// SLICE DEFINITION =================================================={{{1
|
||||
|
||||
// ATTRIBUTES ============================================================
|
||||
Info info;
|
||||
F *data;
|
||||
MPI_Request request;
|
||||
const size_t size;
|
||||
|
||||
void markReady() noexcept {
|
||||
info.state = Ready;
|
||||
info.recycling = Blank;
|
||||
}
|
||||
|
||||
/*
|
||||
* This means that the data is there
|
||||
*/
|
||||
bool isUnwrapped() const noexcept {
|
||||
return info.state == Ready
|
||||
|| info.state == SelfSufficient
|
||||
;
|
||||
}
|
||||
|
||||
bool isUnwrappable() const noexcept {
|
||||
return isUnwrapped()
|
||||
|| info.state == Recycled
|
||||
|| info.state == Dispatched
|
||||
;
|
||||
}
|
||||
|
||||
inline bool isDirectlyFetchable() const noexcept {
|
||||
return info.state == Ready || info.state == Dispatched;
|
||||
}
|
||||
|
||||
void free() noexcept {
|
||||
info.tuple = {0, 0};
|
||||
info.type = Blank;
|
||||
info.state = Acceptor;
|
||||
info.from = {0, 0};
|
||||
info.recycling = Blank;
|
||||
data = nullptr;
|
||||
}
|
||||
|
||||
inline bool isFree() const noexcept {
|
||||
return info.tuple == PartialTuple{0, 0}
|
||||
&& info.type == Blank
|
||||
&& info.state == Acceptor
|
||||
&& info.from.rank == 0
|
||||
&& info.from.source == 0
|
||||
&& info.recycling == Blank
|
||||
&& data == nullptr
|
||||
;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* This function answers the question, which slices can be recycled.
|
||||
*
|
||||
* A slice can only be recycled if it is Fetch or Ready and has
|
||||
* a valid datapointer.
|
||||
*
|
||||
* In particular, SelfSufficient are not recyclable, since it is easier
|
||||
* just to create a SelfSufficient slice than deal with data dependencies.
|
||||
*
|
||||
* Furthermore, a recycled slice is not recyclable, if this is the case
|
||||
* then it is either bad design or a bug.
|
||||
*/
|
||||
inline bool isRecyclable() const noexcept {
|
||||
return ( info.state == Dispatched
|
||||
|| info.state == Ready
|
||||
|| info.state == Fetch
|
||||
)
|
||||
&& hasValidDataPointer()
|
||||
;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function describes if a slice has a valid data pointer.
|
||||
*
|
||||
* This is important to know if the slice has some data to it, also
|
||||
* some structural checks are done, so that it should not be Acceptor
|
||||
* or Blank, if this is the case then it is a bug.
|
||||
*/
|
||||
inline bool hasValidDataPointer() const noexcept {
|
||||
return data != nullptr
|
||||
&& info.state != Acceptor
|
||||
&& info.type != Blank
|
||||
;
|
||||
}
|
||||
|
||||
void unwrapAndMarkReady() {
|
||||
if (info.state == Ready) return;
|
||||
if (info.state != Dispatched)
|
||||
throw
|
||||
std::domain_error("Can't unwrap a non-ready, non-dispatched slice!");
|
||||
markReady();
|
||||
MPI_Status status;
|
||||
#ifdef HAVE_OCD
|
||||
WITH_RANK << "__slice__:mpi: waiting " << "\n";
|
||||
#endif
|
||||
const int errorCode = MPI_Wait(&request, &status);
|
||||
if (errorCode != MPI_SUCCESS)
|
||||
throw "MPI ERROR HAPPENED....";
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
char errorString[MPI_MAX_ERROR_STRING];
|
||||
int errorSize;
|
||||
MPI_Error_string(errorCode, errorString, &errorSize);
|
||||
|
||||
WITH_RANK << "__slice__:mpi: status "
|
||||
<< "{ .source=" << status.MPI_SOURCE
|
||||
<< ", .tag=" << status.MPI_TAG
|
||||
<< ", .error=" << status.MPI_ERROR
|
||||
<< ", .errCode=" << errorCode
|
||||
<< ", .err=" << errorString
|
||||
<< " }"
|
||||
<< "\n";
|
||||
#endif
|
||||
}
|
||||
|
||||
Slice(size_t size_)
|
||||
: info({})
|
||||
, data(nullptr)
|
||||
, size(size_)
|
||||
{}
|
||||
|
||||
|
||||
}; // struct Slice
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, Slice::Location const& v) {
|
||||
// TODO: remove me
|
||||
out << "{.r(" << v.rank << "), .s(" << v.source << ")};";
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, Slice::Info const& i) {
|
||||
out << "«t" << i.type << ", s" << i.state << "»"
|
||||
<< " ⊙ {" << i.from.rank << ", " << i.from.source << "}"
|
||||
<< " ∴ {" << i.tuple[0] << ", " << i.tuple[1] << "}"
|
||||
<< " ♲t" << i.recycling
|
||||
;
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace atrip
|
||||
// The slice:2 ends here
|
||||
543
include/atrip/SliceUnion.hpp
Normal file
543
include/atrip/SliceUnion.hpp
Normal file
@ -0,0 +1,543 @@
|
||||
// [[file:../../atrip.org::*The slice union][The slice union:1]]
|
||||
#pragma once
|
||||
#include <atrip/Debug.hpp>
|
||||
#include <atrip/Slice.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
struct SliceUnion {
|
||||
using F = double;
|
||||
using Tensor = CTF::Tensor<F>;
|
||||
|
||||
virtual void
|
||||
sliceIntoBuffer(size_t iteration, Tensor &to, Tensor const& from) = 0;
|
||||
|
||||
/*
|
||||
* This function should enforce an important property of a SliceUnion.
|
||||
* Namely, there can be no two Slices of the same nature.
|
||||
*
|
||||
* This means that there can be at most one slice with a given Ty_x_Tu.
|
||||
*/
|
||||
void checkForDuplicates() const {
|
||||
std::vector<Slice::Ty_x_Tu> tytus;
|
||||
for (auto const& s: slices) {
|
||||
if (s.isFree()) continue;
|
||||
tytus.push_back({s.info.type, s.info.tuple});
|
||||
}
|
||||
|
||||
for (auto const& tytu: tytus) {
|
||||
if (std::count(tytus.begin(), tytus.end(), tytu) > 1)
|
||||
throw "Invariance violated, more than one slice with same Ty_x_Tu";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::vector<Slice::Ty_x_Tu> neededSlices(ABCTuple const& abc) {
|
||||
std::vector<Slice::Ty_x_Tu> needed(sliceTypes.size());
|
||||
// build the needed vector
|
||||
std::transform(sliceTypes.begin(), sliceTypes.end(),
|
||||
needed.begin(),
|
||||
[&abc](Slice::Type const type) {
|
||||
auto tuple = Slice::subtupleBySlice(abc, type);
|
||||
return std::make_pair(type, tuple);
|
||||
});
|
||||
return needed;
|
||||
}
|
||||
|
||||
/* buildLocalDatabase
|
||||
*
|
||||
* It should build a database of slices so that we know what is needed
|
||||
* to fetch in the next iteration represented by the tuple 'abc'.
|
||||
*
|
||||
* 1. The algorithm works as follows, we build a database of the all
|
||||
* the slice types that we need together with their tuple.
|
||||
*
|
||||
* 2. Look in the SliceUnion if we already have this tuple,
|
||||
* if we already have it mark it (TODO)
|
||||
*
|
||||
* 3. If we don't have the tuple, look for a (state=acceptor, type=blank)
|
||||
* slice and mark this slice as type=Fetch with the corresponding type
|
||||
* and tuple.
|
||||
*
|
||||
* NOTE: The algorithm should certify that we always have enough blank
|
||||
* slices.
|
||||
*
|
||||
*/
|
||||
Slice::LocalDatabase buildLocalDatabase(ABCTuple const& abc) {
|
||||
Slice::LocalDatabase result;
|
||||
|
||||
auto const needed = neededSlices(abc);
|
||||
|
||||
WITH_RANK << "__db__:needed:" << pretty_print(needed) << "\n";
|
||||
// BUILD THE DATABASE
|
||||
// we need to loop over all sliceTypes that this TensorUnion
|
||||
// is representing and find out how we will get the corresponding
|
||||
// slice for the abc we are considering right now.
|
||||
for (auto const& pair: needed) {
|
||||
auto const type = pair.first;
|
||||
auto const tuple = pair.second;
|
||||
auto const from = rankMap.find(abc, type);
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
WITH_RANK << "__db__:want:" << pretty_print(pair) << "\n";
|
||||
for (auto const& s: slices)
|
||||
WITH_RANK << "__db__:guts:ocd "
|
||||
<< s.info << " pt " << s.data
|
||||
<< "\n";
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
WITH_RANK << "__db__: checking if exact match" << "\n";
|
||||
#endif
|
||||
{
|
||||
// FIRST: look up if there is already a *Ready* slice matching what we
|
||||
// need
|
||||
auto const& it
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&tuple, &type](Slice const& other) {
|
||||
return other.info.tuple == tuple
|
||||
&& other.info.type == type
|
||||
// we only want another slice when it
|
||||
// has already ready-to-use data
|
||||
&& other.isUnwrappable()
|
||||
;
|
||||
});
|
||||
if (it != slices.end()) {
|
||||
// if we find this slice, it means that we don't have to do anything
|
||||
WITH_RANK << "__db__: EXACT: found EXACT in name=" << name
|
||||
<< " for tuple " << tuple[0] << ", " << tuple[1]
|
||||
<< " ptr " << it->data
|
||||
<< "\n";
|
||||
result.push_back({name, it->info});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
WITH_RANK << "__db__: checking if recycle" << "\n";
|
||||
#endif
|
||||
// Try to find a recyling possibility ie. find a slice with the same
|
||||
// tuple and that has a valid data pointer.
|
||||
auto const& recycleIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&tuple, &type](Slice const& other) {
|
||||
return other.info.tuple == tuple
|
||||
&& other.info.type != type
|
||||
&& other.isRecyclable()
|
||||
;
|
||||
});
|
||||
|
||||
// if we find this recylce, then we find a Blank slice
|
||||
// (which should exist by construction :THINK)
|
||||
//
|
||||
if (recycleIt != slices.end()) {
|
||||
auto& blank = Slice::findOneByType(slices, Slice::Blank);
|
||||
// TODO: formalize this through a method to copy information
|
||||
// from another slice
|
||||
blank.data = recycleIt->data;
|
||||
blank.info.type = type;
|
||||
blank.info.tuple = tuple;
|
||||
blank.info.state = Slice::Recycled;
|
||||
blank.info.from = from;
|
||||
blank.info.recycling = recycleIt->info.type;
|
||||
result.push_back({name, blank.info});
|
||||
WITH_RANK << "__db__: RECYCLING: n" << name
|
||||
<< " " << pretty_print(abc)
|
||||
<< " get " << pretty_print(blank.info)
|
||||
<< " from " << pretty_print(recycleIt->info)
|
||||
<< " ptr " << recycleIt->data
|
||||
<< "\n"
|
||||
;
|
||||
continue;
|
||||
}
|
||||
|
||||
// in this case we have to create a new slice
|
||||
// this means that we should have a blank slice at our disposal
|
||||
// and also the freePointers should have some elements inside,
|
||||
// so we pop a data pointer from the freePointers container
|
||||
#ifdef HAVE_OCD
|
||||
WITH_RANK << "__db__: none work, doing new" << "\n";
|
||||
#endif
|
||||
{
|
||||
WITH_RANK << "__db__: NEW: finding blank in " << name
|
||||
<< " for type " << type
|
||||
<< " for tuple " << tuple[0] << ", " << tuple[1]
|
||||
<< "\n"
|
||||
;
|
||||
auto& blank = Slice::findOneByType(slices, Slice::Blank);
|
||||
blank.info.type = type;
|
||||
blank.info.tuple = tuple;
|
||||
blank.info.from = from;
|
||||
|
||||
// Handle self sufficiency
|
||||
blank.info.state = Atrip::rank == from.rank
|
||||
? Slice::SelfSufficient
|
||||
: Slice::Fetch
|
||||
;
|
||||
if (blank.info.state == Slice::SelfSufficient) {
|
||||
blank.data = sources[from.source].data();
|
||||
} else {
|
||||
if (freePointers.size() == 0)
|
||||
throw std::domain_error("No more free pointers!");
|
||||
auto dataPointer = freePointers.begin();
|
||||
freePointers.erase(dataPointer);
|
||||
blank.data = *dataPointer;
|
||||
}
|
||||
|
||||
result.push_back({name, blank.info});
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
for (auto const& s: slices)
|
||||
WITH_RANK << "__db__:guts:ocd:__end__ " << s.info << "\n";
|
||||
#endif
|
||||
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Garbage collect slices not needed for the next iteration.
|
||||
*
|
||||
* It will throw if it tries to gc a slice that has not been
|
||||
* previously unwrapped, as a safety mechanism.
|
||||
*/
|
||||
void clearUnusedSlicesForNext(ABCTuple const& abc) {
|
||||
auto const needed = neededSlices(abc);
|
||||
|
||||
// CLEAN UP SLICES, FREE THE ONES THAT ARE NOT NEEDED ANYMORE
|
||||
for (auto& slice: slices) {
|
||||
// if the slice is free, then it was not used anyways
|
||||
if (slice.isFree()) continue;
|
||||
|
||||
|
||||
// try to find the slice in the needed slices list
|
||||
auto const found
|
||||
= std::find_if(needed.begin(), needed.end(),
|
||||
[&slice] (Slice::Ty_x_Tu const& tytu) {
|
||||
return slice.info.tuple == tytu.second
|
||||
&& slice.info.type == tytu.first
|
||||
;
|
||||
});
|
||||
|
||||
// if we did not find slice in needed, then erase it
|
||||
if (found == needed.end()) {
|
||||
|
||||
// We have to be careful about the data pointer,
|
||||
// for SelfSufficient, the data pointer is a source pointer
|
||||
// of the slice, so we should just wipe it.
|
||||
//
|
||||
// For Ready slices, we have to be careful if there are some
|
||||
// recycled slices depending on it.
|
||||
bool freeSlicePointer = true;
|
||||
|
||||
// allow to gc unwrapped and recycled, never Fetch,
|
||||
// if we have a Fetch slice then something has gone very wrong.
|
||||
if (!slice.isUnwrapped() && slice.info.state != Slice::Recycled)
|
||||
throw
|
||||
std::domain_error("Trying to garbage collect "
|
||||
" a non-unwrapped slice! "
|
||||
+ pretty_print(&slice)
|
||||
+ pretty_print(slice.info));
|
||||
|
||||
// it can be that our slice is ready, but it has some hanging
|
||||
// references lying around in the form of a recycled slice.
|
||||
// Of course if we need the recycled slice the next iteration
|
||||
// this would be fatal, because we would then free the pointer
|
||||
// of the slice and at some point in the future we would
|
||||
// overwrite it. Therefore, we must check if slice has some
|
||||
// references in slices and if so then
|
||||
//
|
||||
// - we should mark those references as the original (since the data
|
||||
// pointer should be the same)
|
||||
//
|
||||
// - we should make sure that the data pointer of slice
|
||||
// does not get freed.
|
||||
//
|
||||
if (slice.info.state == Slice::Ready) {
|
||||
WITH_OCD WITH_RANK
|
||||
<< "__gc__:" << "checking for data recycled dependencies\n";
|
||||
auto recycled
|
||||
= Slice::hasRecycledReferencingToIt(slices, slice.info);
|
||||
if (recycled.size()) {
|
||||
Slice* newReady = recycled[0];
|
||||
WITH_OCD WITH_RANK
|
||||
<< "__gc__:" << "swaping recycled "
|
||||
<< pretty_print(newReady->info)
|
||||
<< " and "
|
||||
<< pretty_print(slice.info)
|
||||
<< "\n";
|
||||
newReady->markReady();
|
||||
assert(newReady->data == slice.data);
|
||||
freeSlicePointer = false;
|
||||
|
||||
for (size_t i = 1; i < recycled.size(); i++) {
|
||||
auto newRecyled = recycled[i];
|
||||
newRecyled->info.recycling = newReady->info.type;
|
||||
WITH_OCD WITH_RANK
|
||||
<< "__gc__:" << "updating recycled "
|
||||
<< pretty_print(newRecyled->info)
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// if the slice is self sufficient, do not dare touching the
|
||||
// pointer, since it is a pointer to our sources in our rank.
|
||||
if ( slice.info.state == Slice::SelfSufficient
|
||||
|| slice.info.state == Slice::Recycled
|
||||
) {
|
||||
freeSlicePointer = false;
|
||||
}
|
||||
|
||||
// make sure we get its data pointer to be used later
|
||||
// only for non-recycled, since it can be that we need
|
||||
// for next iteration the data of the slice that the recycled points
|
||||
// to
|
||||
if (freeSlicePointer) {
|
||||
freePointers.insert(slice.data);
|
||||
WITH_RANK << "~~~:cl(" << name << ")"
|
||||
<< " added to freePointer "
|
||||
<< pretty_print(freePointers)
|
||||
<< "\n";
|
||||
} else {
|
||||
WITH_OCD WITH_RANK << "__gc__:not touching the free Pointer\n";
|
||||
}
|
||||
|
||||
// at this point, let us blank the slice
|
||||
WITH_RANK << "~~~:cl(" << name << ")"
|
||||
<< " freeing up slice "
|
||||
<< " info " << slice.info
|
||||
<< "\n";
|
||||
slice.free();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// CONSTRUCTOR
|
||||
SliceUnion( Tensor const& sourceTensor
|
||||
, std::vector<Slice::Type> sliceTypes_
|
||||
, std::vector<size_t> sliceLength_
|
||||
, std::vector<size_t> paramLength
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
, Slice::Name name_
|
||||
, size_t nSliceBuffers = 4
|
||||
)
|
||||
: rankMap(paramLength, np)
|
||||
, world(child_world)
|
||||
, universe(global_world)
|
||||
, sliceLength(sliceLength_)
|
||||
, sources(rankMap.nSources(),
|
||||
std::vector<F>
|
||||
(std::accumulate(sliceLength.begin(),
|
||||
sliceLength.end(),
|
||||
1, std::multiplies<size_t>())))
|
||||
, name(name_)
|
||||
, sliceTypes(sliceTypes_)
|
||||
, sliceBuffers(nSliceBuffers, sources[0])
|
||||
//, slices(2 * sliceTypes.size(), Slice{ sources[0].size() })
|
||||
{ // constructor begin
|
||||
|
||||
LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n";
|
||||
|
||||
slices
|
||||
= std::vector<Slice>(2 * sliceTypes.size(), { sources[0].size() });
|
||||
// TODO: think exactly ^------------------- about this number
|
||||
|
||||
// initialize the freePointers with the pointers to the buffers
|
||||
std::transform(sliceBuffers.begin(), sliceBuffers.end(),
|
||||
std::inserter(freePointers, freePointers.begin()),
|
||||
[](std::vector<F> &vec) { return vec.data(); });
|
||||
|
||||
|
||||
|
||||
LOG(1,"Atrip") << "rankMap.nSources "
|
||||
<< rankMap.nSources() << "\n";
|
||||
LOG(1,"Atrip") << "#slices "
|
||||
<< slices.size() << "\n";
|
||||
LOG(1,"Atrip") << "#slices[0] "
|
||||
<< slices[0].size << "\n";
|
||||
LOG(1,"Atrip") << "#sources "
|
||||
<< sources.size() << "\n";
|
||||
LOG(1,"Atrip") << "#sources[0] "
|
||||
<< sources[0].size() << "\n";
|
||||
LOG(1,"Atrip") << "#freePointers "
|
||||
<< freePointers.size() << "\n";
|
||||
LOG(1,"Atrip") << "#sliceBuffers "
|
||||
<< sliceBuffers.size() << "\n";
|
||||
LOG(1,"Atrip") << "#sliceBuffers[0] "
|
||||
<< sliceBuffers[0].size() << "\n";
|
||||
LOG(1,"Atrip") << "#sliceLength "
|
||||
<< sliceLength.size() << "\n";
|
||||
LOG(1,"Atrip") << "#paramLength "
|
||||
<< paramLength.size() << "\n";
|
||||
LOG(1,"Atrip") << "GB*" << np << " "
|
||||
<< double(sources.size() + sliceBuffers.size())
|
||||
* sources[0].size()
|
||||
* 8 * np
|
||||
/ 1073741824.0
|
||||
<< "\n";
|
||||
} // constructor ends
|
||||
|
||||
void init(Tensor const& sourceTensor) {
|
||||
|
||||
CTF::World w(world);
|
||||
const int rank = Atrip::rank
|
||||
, order = sliceLength.size()
|
||||
;
|
||||
std::vector<int> const syms(order, NS);
|
||||
std::vector<int> __sliceLength(sliceLength.begin(), sliceLength.end());
|
||||
Tensor toSliceInto(order,
|
||||
__sliceLength.data(),
|
||||
syms.data(),
|
||||
w);
|
||||
LOG(1,"Atrip") << "slicing... \n";
|
||||
|
||||
// setUp sources
|
||||
for (size_t it(0); it < rankMap.nSources(); ++it) {
|
||||
const size_t
|
||||
source = rankMap.isSourcePadding(rank, source) ? 0 : it;
|
||||
WITH_OCD
|
||||
WITH_RANK
|
||||
<< "Init:toSliceInto it-" << it
|
||||
<< " :: source " << source << "\n";
|
||||
sliceIntoBuffer(source, toSliceInto, sourceTensor);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Send asynchronously only if the state is Fetch
|
||||
*/
|
||||
void send( size_t otherRank
|
||||
, Slice::Info const& info
|
||||
, size_t tag) const noexcept {
|
||||
MPI_Request request;
|
||||
bool sendData_p = false;
|
||||
|
||||
if (info.state == Slice::Fetch) sendData_p = true;
|
||||
// TODO: remove this because I have SelfSufficient
|
||||
if (otherRank == info.from.rank) sendData_p = false;
|
||||
if (!sendData_p) return;
|
||||
|
||||
MPI_Isend( sources[info.from.source].data()
|
||||
, sources[info.from.source].size()
|
||||
, MPI_DOUBLE /* TODO: adapt this with traits */
|
||||
, otherRank
|
||||
, tag
|
||||
, universe
|
||||
, &request
|
||||
);
|
||||
WITH_CRAZY_DEBUG
|
||||
WITH_RANK << "sent to " << otherRank << "\n";
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Receive asynchronously only if the state is Fetch
|
||||
*/
|
||||
void receive(Slice::Info const& info, size_t tag) noexcept {
|
||||
auto& slice = Slice::findByInfo(slices, info);
|
||||
|
||||
if (Atrip::rank == info.from.rank) return;
|
||||
|
||||
if (slice.info.state == Slice::Fetch) {
|
||||
// TODO: do it through the slice class
|
||||
slice.info.state = Slice::Dispatched;
|
||||
MPI_Request request = nullptr;
|
||||
slice.request = request;
|
||||
MPI_Irecv( slice.data
|
||||
, slice.size
|
||||
, MPI_DOUBLE // TODO: Adapt this with traits
|
||||
, info.from.rank
|
||||
, tag
|
||||
, universe
|
||||
, &slice.request
|
||||
//, MPI_STATUS_IGNORE
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void unwrapAll(ABCTuple const& abc) {
|
||||
for (auto type: sliceTypes) unwrapSlice(type, abc);
|
||||
}
|
||||
|
||||
F* unwrapSlice(Slice::Type type, ABCTuple const& abc) {
|
||||
WITH_CRAZY_DEBUG
|
||||
WITH_RANK << "__unwrap__:slice " << type << " w n "
|
||||
<< name
|
||||
<< " abc" << pretty_print(abc)
|
||||
<< "\n";
|
||||
auto& slice = Slice::findByTypeAbc(slices, type, abc);
|
||||
WITH_RANK << "__unwrap__:info " << slice.info << "\n";
|
||||
switch (slice.info.state) {
|
||||
case Slice::Dispatched:
|
||||
WITH_RANK << "__unwrap__:Fetch: " << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
slice.unwrapAndMarkReady();
|
||||
return slice.data;
|
||||
break;
|
||||
case Slice::SelfSufficient:
|
||||
WITH_RANK << "__unwrap__:SelfSufficient: " << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
return slice.data;
|
||||
break;
|
||||
case Slice::Ready:
|
||||
WITH_RANK << "__unwrap__:READY: UNWRAPPED ALREADY" << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
return slice.data;
|
||||
break;
|
||||
case Slice::Recycled:
|
||||
WITH_RANK << "__unwrap__:RECYCLED " << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
return unwrapSlice(slice.info.recycling, abc);
|
||||
break;
|
||||
case Slice::Fetch:
|
||||
case Slice::Acceptor:
|
||||
throw std::domain_error("Can't unwrap an acceptor or fetch slice!");
|
||||
break;
|
||||
default:
|
||||
throw std::domain_error("Unknown error unwrapping slice!");
|
||||
}
|
||||
return slice.data;
|
||||
}
|
||||
|
||||
const RankMap rankMap;
|
||||
const MPI_Comm world;
|
||||
const MPI_Comm universe;
|
||||
const std::vector<size_t> sliceLength;
|
||||
std::vector< std::vector<F> > sources;
|
||||
std::vector< Slice > slices;
|
||||
Slice::Name name;
|
||||
const std::vector<Slice::Type> sliceTypes;
|
||||
std::vector< std::vector<F> > sliceBuffers;
|
||||
std::set<F*> freePointers;
|
||||
|
||||
};
|
||||
|
||||
SliceUnion&
|
||||
unionByName(std::vector<SliceUnion*> const& unions, Slice::Name name) {
|
||||
const auto sliceUnionIt
|
||||
= std::find_if(unions.begin(), unions.end(),
|
||||
[&name](SliceUnion const* s) {
|
||||
return name == s->name;
|
||||
});
|
||||
if (sliceUnionIt == unions.end())
|
||||
throw std::domain_error("SliceUnion not found!");
|
||||
return **sliceUnionIt;
|
||||
}
|
||||
|
||||
}
|
||||
// The slice union:1 ends here
|
||||
73
include/atrip/Tuples.hpp
Normal file
73
include/atrip/Tuples.hpp
Normal file
@ -0,0 +1,73 @@
|
||||
// [[file:../../atrip.org::*Tuples][Tuples:1]]
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <array>
|
||||
|
||||
#include <atrip/Utils.hpp>
|
||||
#include <atrip/Debug.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
using ABCTuple = std::array<size_t, 3>;
|
||||
using PartialTuple = std::array<size_t, 2>;
|
||||
using ABCTuples = std::vector<ABCTuple>;
|
||||
|
||||
ABCTuples getTuplesList(size_t Nv) {
|
||||
const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv;
|
||||
ABCTuples result(n);
|
||||
size_t u(0);
|
||||
|
||||
for (size_t a(0); a < Nv; a++)
|
||||
for (size_t b(a); b < Nv; b++)
|
||||
for (size_t c(b); c < Nv; c++){
|
||||
if ( a == b && b == c ) continue;
|
||||
result[u++] = {a, b, c};
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
|
||||
std::pair<size_t, size_t>
|
||||
getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
|
||||
|
||||
std::vector<size_t> n_tuples_per_rank(np, tuplesList.size()/np);
|
||||
const size_t
|
||||
// how many valid tuples should we still verteilen to nodes
|
||||
// since the number of tuples is not divisible by the number of nodes
|
||||
nRoundRobin = tuplesList.size() % np
|
||||
// every node must have the sanme amount of tuples in order for the
|
||||
// other nodes to receive and send somewhere, therefore
|
||||
// some nodes will get extra tuples but that are dummy tuples
|
||||
, nExtraInvalid = (np - nRoundRobin) % np
|
||||
;
|
||||
|
||||
if (nRoundRobin) for (int i = 0; i < np; i++) n_tuples_per_rank[i]++;
|
||||
|
||||
#if defined(TODO)
|
||||
assert( tuplesList.size()
|
||||
==
|
||||
( std::accumulate(n_tuples_per_rank.begin(),
|
||||
n_tuples_per_rank.end(),
|
||||
0)
|
||||
+ nExtraInvalid
|
||||
));
|
||||
#endif
|
||||
|
||||
WITH_RANK << "nRoundRobin = " << nRoundRobin << "\n";
|
||||
WITH_RANK << "nExtraInvalid = " << nExtraInvalid << "\n";
|
||||
WITH_RANK << "ntuples = " << n_tuples_per_rank[rank] << "\n";
|
||||
|
||||
auto const& it = n_tuples_per_rank.begin();
|
||||
|
||||
return
|
||||
{ std::accumulate(it, it + rank , 0)
|
||||
, std::accumulate(it, it + rank + 1, 0)
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
// Tuples:1 ends here
|
||||
240
include/atrip/Unions.hpp
Normal file
240
include/atrip/Unions.hpp
Normal file
@ -0,0 +1,240 @@
|
||||
// [[file:../../atrip.org::*Unions][Unions:1]]
|
||||
#pragma once
|
||||
#include <atrip/SliceUnion.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
void sliceIntoVector
|
||||
( std::vector<double> &v
|
||||
, CTF::Tensor<double> &toSlice
|
||||
, std::vector<int64_t> const low
|
||||
, std::vector<int64_t> const up
|
||||
, CTF::Tensor<double> const& origin
|
||||
, std::vector<int64_t> const originLow
|
||||
, std::vector<int64_t> const originUp
|
||||
) {
|
||||
// Thank you CTF for forcing me to do this
|
||||
struct { std::vector<int> up, low; }
|
||||
toSlice_ = { {up.begin(), up.end()}
|
||||
, {low.begin(), low.end()} }
|
||||
, origin_ = { {originUp.begin(), originUp.end()}
|
||||
, {originLow.begin(), originLow.end()} }
|
||||
;
|
||||
|
||||
WITH_OCD
|
||||
WITH_RANK << "slicing into " << pretty_print(toSlice_.up)
|
||||
<< "," << pretty_print(toSlice_.low)
|
||||
<< " from " << pretty_print(origin_.up)
|
||||
<< "," << pretty_print(origin_.low)
|
||||
<< "\n";
|
||||
|
||||
#ifndef ATRIP_DONT_SLICE
|
||||
toSlice.slice( toSlice_.low.data()
|
||||
, toSlice_.up.data()
|
||||
, 0.0
|
||||
, origin
|
||||
, origin_.low.data()
|
||||
, origin_.up.data()
|
||||
, 1.0);
|
||||
memcpy(v.data(), toSlice.data, sizeof(double) * v.size());
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
struct TAPHH : public SliceUnion {
|
||||
TAPHH( Tensor const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::A, Slice::B, Slice::C}
|
||||
, {Nv, No, No} // size of the slices
|
||||
, {Nv}
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::TA
|
||||
, 4) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
||||
{
|
||||
const int rank = Atrip::rank
|
||||
, Nv = sliceLength[0]
|
||||
, No = sliceLength[1]
|
||||
, a = rankMap.find({static_cast<size_t>(rank), it});
|
||||
;
|
||||
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0, 0}, {Nv, No, No}
|
||||
, from, {a, 0, 0, 0}, {a+1, Nv, No, No}
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct HHHA : public SliceUnion {
|
||||
HHHA( Tensor const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::A, Slice::B, Slice::C}
|
||||
, {No, No, No} // size of the slices
|
||||
, {Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::VIJKA
|
||||
, 4) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
||||
{
|
||||
|
||||
const int rank = Atrip::rank
|
||||
, No = sliceLength[0]
|
||||
, a = rankMap.find({static_cast<size_t>(rank), it})
|
||||
;
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0, 0}, {No, No, No}
|
||||
, from, {0, 0, 0, a}, {No, No, No, a+1}
|
||||
);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
struct ABPH : public SliceUnion {
|
||||
ABPH( Tensor const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, { Slice::AB, Slice::BC, Slice::AC
|
||||
, Slice::BA, Slice::CB, Slice::CA
|
||||
}
|
||||
, {Nv, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::VABCI
|
||||
, 2*6) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
||||
|
||||
const int Nv = sliceLength[0]
|
||||
, No = sliceLength[1]
|
||||
, rank = Atrip::rank
|
||||
, el = rankMap.find({static_cast<size_t>(rank), it})
|
||||
, a = el % Nv
|
||||
, b = el / Nv
|
||||
;
|
||||
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0}, {Nv, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, Nv, No}
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct ABHH : public SliceUnion {
|
||||
ABHH( Tensor const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::AB, Slice::BC, Slice::AC}
|
||||
, {No, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::VABIJ
|
||||
, 6) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
||||
|
||||
const int Nv = from.lens[0]
|
||||
, No = sliceLength[1]
|
||||
, rank = Atrip::rank
|
||||
, el = rankMap.find({static_cast<size_t>(rank), it})
|
||||
, a = el % Nv
|
||||
, b = el / Nv
|
||||
;
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0}, {No, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct TABHH : public SliceUnion {
|
||||
TABHH( Tensor const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::AB, Slice::BC, Slice::AC}
|
||||
, {No, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::TABIJ
|
||||
, 6) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
||||
// TODO: maybe generalize this with ABHH
|
||||
|
||||
const int Nv = from.lens[0]
|
||||
, No = sliceLength[1]
|
||||
, rank = Atrip::rank
|
||||
, el = rankMap.find({static_cast<size_t>(rank), it})
|
||||
, a = el % Nv
|
||||
, b = el / Nv
|
||||
;
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0}, {No, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
// Unions:1 ends here
|
||||
99
include/atrip/Utils.hpp
Normal file
99
include/atrip/Utils.hpp
Normal file
@ -0,0 +1,99 @@
|
||||
// [[file:../../atrip.org::*Utils][Utils:1]]
|
||||
#pragma once
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <chrono>
|
||||
|
||||
#include <ctf.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
|
||||
template <typename T>
|
||||
std::string pretty_print(T&& value) {
|
||||
std::stringstream stream;
|
||||
#if ATRIP_DEBUG > 1
|
||||
dbg::pretty_print(stream, std::forward<T>(value));
|
||||
#endif
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
#define WITH_CHRONO(__chrono, ...) \
|
||||
__chrono.start(); __VA_ARGS__ __chrono.stop();
|
||||
|
||||
struct Timer {
|
||||
using Clock = std::chrono::high_resolution_clock;
|
||||
using Event = std::chrono::time_point<Clock>;
|
||||
std::chrono::duration<double> duration;
|
||||
Event _start;
|
||||
inline void start() noexcept { _start = Clock::now(); }
|
||||
inline void stop() noexcept { duration += Clock::now() - _start; }
|
||||
inline void clear() noexcept { duration *= 0; }
|
||||
inline double count() const noexcept { return duration.count(); }
|
||||
};
|
||||
using Timings = std::map<std::string, Timer>;
|
||||
}
|
||||
// Utils:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]]
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
#include <atrip/Slice.hpp>
|
||||
|
||||
namespace atrip {
|
||||
struct RankMap {
|
||||
|
||||
std::vector<size_t> const lengths;
|
||||
size_t const np, size;
|
||||
|
||||
RankMap(std::vector<size_t> lens, size_t np_)
|
||||
: lengths(lens)
|
||||
, np(np_)
|
||||
, size(std::accumulate(lengths.begin(), lengths.end(),
|
||||
1, std::multiplies<size_t>()))
|
||||
{ assert(lengths.size() <= 2); }
|
||||
|
||||
size_t find(Slice::Location const& p) const noexcept {
|
||||
return p.source * np + p.rank;
|
||||
}
|
||||
|
||||
size_t nSources() const noexcept {
|
||||
return size / np + size_t(size % np != 0);
|
||||
}
|
||||
|
||||
|
||||
bool isPaddingRank(size_t rank) const noexcept {
|
||||
return size % np == 0
|
||||
? false
|
||||
: rank > (size % np - 1)
|
||||
;
|
||||
}
|
||||
|
||||
bool isSourcePadding(size_t rank, size_t source) const noexcept {
|
||||
return source == nSources() && isPaddingRank(rank);
|
||||
}
|
||||
|
||||
Slice::Location
|
||||
find(ABCTuple const& abc, Slice::Type sliceType) const noexcept {
|
||||
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
|
||||
const auto tuple = Slice::subtupleBySlice(abc, sliceType);
|
||||
|
||||
const size_t index
|
||||
= tuple[0]
|
||||
+ tuple[1] * (lengths.size() > 1 ? lengths[0] : 0)
|
||||
;
|
||||
|
||||
return
|
||||
{ index % np
|
||||
, index / np
|
||||
};
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
// The rank mapping:1 ends here
|
||||
563
src/atrip/Atrip.cxx
Normal file
563
src/atrip/Atrip.cxx
Normal file
@ -0,0 +1,563 @@
|
||||
// [[file:../../atrip.org::*Atrip][Atrip:2]]
|
||||
#include <iomanip>
|
||||
|
||||
#include <atrip/Atrip.hpp>
|
||||
#include <atrip/Utils.hpp>
|
||||
#include <atrip/Equations.hpp>
|
||||
#include <atrip/SliceUnion.hpp>
|
||||
#include <atrip/Unions.hpp>
|
||||
|
||||
using namespace atrip;
|
||||
|
||||
int Atrip::rank;
|
||||
int Atrip::np;
|
||||
|
||||
void Atrip::init() {
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
|
||||
}
|
||||
|
||||
Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
|
||||
const int np = Atrip::np;
|
||||
const int rank = Atrip::rank;
|
||||
MPI_Comm universe = in.ei->wrld->comm;
|
||||
|
||||
// Timings in seconds ================================================{{{1
|
||||
Timings chrono{};
|
||||
|
||||
const size_t No = in.ei->lens[0];
|
||||
const size_t Nv = in.ea->lens[0];
|
||||
LOG(0,"Atrip") << "No: " << No << "\n";
|
||||
LOG(0,"Atrip") << "Nv: " << Nv << "\n";
|
||||
|
||||
// allocate the three scratches, see piecuch
|
||||
std::vector<double> Tijk(No*No*No) // doubles only (see piecuch)
|
||||
, Zijk(No*No*No) // singles + doubles (see piecuch)
|
||||
// we need local copies of the following tensors on every
|
||||
// rank
|
||||
, epsi(No)
|
||||
, epsa(Nv)
|
||||
, Tai(No * Nv)
|
||||
;
|
||||
|
||||
in.ei->read_all(epsi.data());
|
||||
in.ea->read_all(epsa.data());
|
||||
in.Tph->read_all(Tai.data());
|
||||
|
||||
// COMMUNICATOR CONSTRUCTION ========================================={{{1
|
||||
//
|
||||
// Construct a new communicator living only on a single rank
|
||||
int child_size = 1
|
||||
, child_rank
|
||||
;
|
||||
const
|
||||
int color = rank / child_size
|
||||
, crank = rank % child_size
|
||||
;
|
||||
MPI_Comm child_comm;
|
||||
if (np == 1) {
|
||||
child_comm = universe;
|
||||
} else {
|
||||
MPI_Comm_split(universe, color, crank, &child_comm);
|
||||
MPI_Comm_rank(child_comm, &child_rank);
|
||||
MPI_Comm_size(child_comm, &child_size);
|
||||
//CTF::World child_world(child_comm);
|
||||
}
|
||||
|
||||
|
||||
chrono["nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
|
||||
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
|
||||
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-slices"].stop();
|
||||
|
||||
chrono["nv-nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
|
||||
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
|
||||
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-nv-slices"].stop();
|
||||
|
||||
// all tensors
|
||||
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
||||
|
||||
//CONSTRUCT TUPLE LIST ==============================================={{{1
|
||||
LOG(0,"Atrip") << "BUILD TUPLE LIST\n";
|
||||
const auto tuplesList = std::move(getTuplesList(Nv));
|
||||
WITH_RANK << "tupList.size() = " << tuplesList.size() << "\n";
|
||||
|
||||
// GET ABC INDEX RANGE FOR RANK ======================================{{{1
|
||||
auto abcIndex = getABCRange(np, rank, tuplesList);
|
||||
size_t nIterations = abcIndex.second - abcIndex.first;
|
||||
|
||||
#ifdef ATRIP_BENCHMARK
|
||||
{ const size_t maxIterations = in.maxIterations;
|
||||
if (maxIterations != 0) {
|
||||
abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1);
|
||||
nIterations = maxIterations % (nIterations + 1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
WITH_RANK << "abcIndex = " << pretty_print(abcIndex) << "\n";
|
||||
LOG(0,"Atrip") << "#iterations: "
|
||||
<< nIterations << "\n";
|
||||
|
||||
// first abc
|
||||
const ABCTuple firstAbc = tuplesList[abcIndex.first];
|
||||
|
||||
|
||||
double energy(0.);
|
||||
|
||||
|
||||
auto const isFakeTuple
|
||||
= [&tuplesList](size_t const i) { return i >= tuplesList.size(); };
|
||||
|
||||
|
||||
auto communicateDatabase
|
||||
= [ &unions
|
||||
, np
|
||||
, &chrono
|
||||
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
|
||||
|
||||
chrono["db:comm:type:do"].start();
|
||||
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
|
||||
chrono["db:comm:type:do"].stop();
|
||||
|
||||
chrono["db:comm:ldb"].start();
|
||||
Slice::LocalDatabase ldb;
|
||||
|
||||
for (auto const& tensor: unions) {
|
||||
auto const& tensorDb = tensor->buildLocalDatabase(abc);
|
||||
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
|
||||
}
|
||||
chrono["db:comm:ldb"].stop();
|
||||
|
||||
Slice::Database db(np * ldb.size(), ldb[0]);
|
||||
|
||||
chrono["oneshot-db:comm:allgather"].start();
|
||||
chrono["db:comm:allgather"].start();
|
||||
MPI_Allgather( ldb.data()
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
, db.data()
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
, c);
|
||||
chrono["db:comm:allgather"].stop();
|
||||
chrono["oneshot-db:comm:allgather"].stop();
|
||||
|
||||
chrono["db:comm:type:free"].start();
|
||||
MPI_Type_free(&MPI_LDB_ELEMENT);
|
||||
chrono["db:comm:type:free"].stop();
|
||||
|
||||
return db;
|
||||
};
|
||||
|
||||
auto doIOPhase
|
||||
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
|
||||
|
||||
const size_t localDBLength = db.size() / np;
|
||||
|
||||
size_t sendTag = 0
|
||||
, recvTag = rank * localDBLength
|
||||
;
|
||||
|
||||
// RECIEVE PHASE ======================================================
|
||||
{
|
||||
// At this point, we have already send to everyone that fits
|
||||
auto const& begin = &db[rank * localDBLength]
|
||||
, end = begin + localDBLength
|
||||
;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
recvTag++;
|
||||
auto const& el = *it;
|
||||
auto& u = unionByName(unions, el.name);
|
||||
|
||||
WITH_DBG std::cout
|
||||
<< rank << ":r"
|
||||
<< "♯" << recvTag << " =>"
|
||||
<< " «n" << el.name
|
||||
<< ", t" << el.info.type
|
||||
<< ", s" << el.info.state
|
||||
<< "»"
|
||||
<< " ⊙ {" << rank << "⇐" << el.info.from.rank
|
||||
<< ", "
|
||||
<< el.info.from.source << "}"
|
||||
<< " ∴ {" << el.info.tuple[0]
|
||||
<< ", "
|
||||
<< el.info.tuple[1]
|
||||
<< "}"
|
||||
<< "\n"
|
||||
;
|
||||
|
||||
chrono["db:io:recv"].start();
|
||||
u.receive(el.info, recvTag);
|
||||
chrono["db:io:recv"].stop();
|
||||
|
||||
} // recv
|
||||
}
|
||||
|
||||
// SEND PHASE =========================================================
|
||||
for (size_t otherRank = 0; otherRank<np; otherRank++) {
|
||||
auto const& begin = &db[otherRank * localDBLength]
|
||||
, end = begin + localDBLength
|
||||
;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
sendTag++;
|
||||
Slice::LocalDatabaseElement const& el = *it;
|
||||
|
||||
if (el.info.from.rank != rank) continue;
|
||||
|
||||
auto& u = unionByName(unions, el.name);
|
||||
WITH_DBG std::cout
|
||||
<< rank << ":s"
|
||||
<< "♯" << sendTag << " =>"
|
||||
<< " «n" << el.name
|
||||
<< ", t" << el.info.type
|
||||
<< ", s" << el.info.state
|
||||
<< "»"
|
||||
<< " ⊙ {" << el.info.from.rank << "⇒" << otherRank
|
||||
<< ", "
|
||||
<< el.info.from.source << "}"
|
||||
<< " ∴ {" << el.info.tuple[0]
|
||||
<< ", "
|
||||
<< el.info.tuple[1]
|
||||
<< "}"
|
||||
<< "\n"
|
||||
;
|
||||
|
||||
chrono["db:io:send"].start();
|
||||
u.send(otherRank, el.info, sendTag);
|
||||
chrono["db:io:send"].stop();
|
||||
|
||||
} // send phase
|
||||
|
||||
} // otherRank
|
||||
|
||||
|
||||
};
|
||||
|
||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||
std::map<ABCTuple, double> tupleEnergies;
|
||||
#endif
|
||||
|
||||
const double doublesFlops
|
||||
= double(No)
|
||||
* double(No)
|
||||
* double(No)
|
||||
* (double(No) + double(Nv))
|
||||
* 2
|
||||
* 6
|
||||
/ 1e9
|
||||
;
|
||||
|
||||
// START MAIN LOOP ======================================================{{{1
|
||||
|
||||
Slice::Database db;
|
||||
|
||||
for ( size_t i = abcIndex.first, iteration = 1
|
||||
; i < abcIndex.second
|
||||
; i++, iteration++
|
||||
) {
|
||||
chrono["iterations"].start();
|
||||
|
||||
// check overhead from chrono over all iterations
|
||||
chrono["start:stop"].start(); chrono["start:stop"].stop();
|
||||
|
||||
// check overhead of doing a barrier at the beginning
|
||||
chrono["oneshot-mpi:barrier"].start();
|
||||
chrono["mpi:barrier"].start();
|
||||
// TODO: REMOVE
|
||||
if (in.barrier == 1)
|
||||
MPI_Barrier(universe);
|
||||
chrono["mpi:barrier"].stop();
|
||||
chrono["oneshot-mpi:barrier"].stop();
|
||||
|
||||
if (iteration % in.iterationMod == 0) {
|
||||
LOG(0,"Atrip")
|
||||
<< "iteration " << iteration
|
||||
<< " [" << 100 * iteration / nIterations << "%]"
|
||||
<< " (" << doublesFlops * iteration / chrono["doubles"].count()
|
||||
<< "GF)"
|
||||
<< " (" << doublesFlops * iteration / chrono["iterations"].count()
|
||||
<< "GF)"
|
||||
<< " ===========================\n";
|
||||
|
||||
// PRINT TIMINGS
|
||||
for (auto const& pair: chrono)
|
||||
LOG(1, " ") << pair.first << " :: "
|
||||
<< pair.second.count()
|
||||
<< std::endl;
|
||||
|
||||
}
|
||||
|
||||
const ABCTuple abc = isFakeTuple(i)
|
||||
? tuplesList[tuplesList.size() - 1]
|
||||
: tuplesList[i]
|
||||
, *abcNext = i == (abcIndex.second - 1)
|
||||
? nullptr
|
||||
: isFakeTuple(i + 1)
|
||||
? &tuplesList[tuplesList.size() - 1]
|
||||
: &tuplesList[i + 1]
|
||||
;
|
||||
|
||||
chrono["with_rank"].start();
|
||||
WITH_RANK << " :it " << iteration
|
||||
<< " :abc " << pretty_print(abc)
|
||||
<< " :abcN "
|
||||
<< (abcNext ? pretty_print(*abcNext) : "None")
|
||||
<< "\n";
|
||||
chrono["with_rank"].stop();
|
||||
|
||||
|
||||
// COMM FIRST DATABASE ================================================{{{1
|
||||
if (i == abcIndex.first) {
|
||||
WITH_RANK << "__first__:first database ............ \n";
|
||||
const auto __db = communicateDatabase(abc, universe);
|
||||
WITH_RANK << "__first__:first database communicated \n";
|
||||
WITH_RANK << "__first__:first database io phase \n";
|
||||
doIOPhase(__db);
|
||||
WITH_RANK << "__first__:first database io phase DONE\n";
|
||||
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
|
||||
for (auto& u: unions) u->unwrapAll(abc);
|
||||
WITH_RANK << "__first__::::Unwrapping all slices for first database DONE\n";
|
||||
MPI_Barrier(universe);
|
||||
}
|
||||
|
||||
// COMM NEXT DATABASE ================================================={{{1
|
||||
if (abcNext) {
|
||||
WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
|
||||
chrono["db:comm"].start();
|
||||
//const auto db = communicateDatabase(*abcNext, universe);
|
||||
db = communicateDatabase(*abcNext, universe);
|
||||
chrono["db:comm"].stop();
|
||||
chrono["db:io"].start();
|
||||
doIOPhase(db);
|
||||
chrono["db:io"].stop();
|
||||
WITH_RANK << "__comm__:" << iteration << "th database io phase DONE\n";
|
||||
}
|
||||
|
||||
// COMPUTE DOUBLES ===================================================={{{1
|
||||
OCD_Barrier(universe);
|
||||
if (!isFakeTuple(i)) {
|
||||
WITH_RANK << iteration << "-th doubles\n";
|
||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap:doubles"],
|
||||
for (auto& u: decltype(unions){&abph, &hhha, &taphh, &tabhh}) {
|
||||
u->unwrapAll(abc);
|
||||
}
|
||||
)))
|
||||
chrono["oneshot-doubles"].start();
|
||||
chrono["doubles"].start();
|
||||
doublesContribution( abc, (size_t)No, (size_t)Nv
|
||||
// -- VABCI
|
||||
, abph.unwrapSlice(Slice::AB, abc)
|
||||
, abph.unwrapSlice(Slice::AC, abc)
|
||||
, abph.unwrapSlice(Slice::BC, abc)
|
||||
, abph.unwrapSlice(Slice::BA, abc)
|
||||
, abph.unwrapSlice(Slice::CA, abc)
|
||||
, abph.unwrapSlice(Slice::CB, abc)
|
||||
// -- VHHHA
|
||||
, hhha.unwrapSlice(Slice::A, abc)
|
||||
, hhha.unwrapSlice(Slice::B, abc)
|
||||
, hhha.unwrapSlice(Slice::C, abc)
|
||||
// -- TA
|
||||
, taphh.unwrapSlice(Slice::A, abc)
|
||||
, taphh.unwrapSlice(Slice::B, abc)
|
||||
, taphh.unwrapSlice(Slice::C, abc)
|
||||
// -- TABIJ
|
||||
, tabhh.unwrapSlice(Slice::AB, abc)
|
||||
, tabhh.unwrapSlice(Slice::AC, abc)
|
||||
, tabhh.unwrapSlice(Slice::BC, abc)
|
||||
// -- TIJK
|
||||
, Tijk.data()
|
||||
, chrono
|
||||
);
|
||||
WITH_RANK << iteration << "-th doubles done\n";
|
||||
chrono["doubles"].stop();
|
||||
chrono["oneshot-doubles"].stop();
|
||||
}
|
||||
|
||||
// COMPUTE SINGLES =================================================== {{{1
|
||||
OCD_Barrier(universe);
|
||||
if (!isFakeTuple(i)) {
|
||||
WITH_CHRONO(chrono["oneshot-unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap"],
|
||||
WITH_CHRONO(chrono["unwrap:singles"],
|
||||
abhh.unwrapAll(abc);
|
||||
)))
|
||||
chrono["reorder"].start();
|
||||
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
||||
chrono["reorder"].stop();
|
||||
chrono["singles"].start();
|
||||
singlesContribution( No, Nv, abc
|
||||
, Tai.data()
|
||||
, abhh.unwrapSlice(Slice::AB, abc)
|
||||
, abhh.unwrapSlice(Slice::AC, abc)
|
||||
, abhh.unwrapSlice(Slice::BC, abc)
|
||||
, Zijk.data());
|
||||
chrono["singles"].stop();
|
||||
}
|
||||
|
||||
|
||||
// COMPUTE ENERGY ==================================================== {{{1
|
||||
if (!isFakeTuple(i)) {
|
||||
double tupleEnergy(0.);
|
||||
|
||||
int distinct(0);
|
||||
if (abc[0] == abc[1]) distinct++;
|
||||
if (abc[1] == abc[2]) distinct--;
|
||||
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
||||
|
||||
chrono["energy"].start();
|
||||
if ( distinct == 0)
|
||||
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
|
||||
else
|
||||
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
|
||||
chrono["energy"].stop();
|
||||
|
||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||
tupleEnergies[abc] = tupleEnergy;
|
||||
#endif
|
||||
|
||||
energy += tupleEnergy;
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
auto const print_slices
|
||||
= [](ABCTuple const& abc, ABCTuple const& want, SliceUnion& u) {
|
||||
if (abc != want) return;
|
||||
|
||||
for (auto type: u.sliceTypes) {
|
||||
auto const& ptr = u.unwrapSlice(type, abc);
|
||||
auto const& slice = Slice::findByTypeAbc(u.slices, type, abc);
|
||||
WITH_RANK << "__print_slice__:n" << u.name << " "
|
||||
<< pretty_print(abc) << " "
|
||||
<< pretty_print(slice.info)
|
||||
;
|
||||
for (size_t i = 0; i < 20; i++) std::cout << ptr[i] << ", ";
|
||||
std::cout << std::endl;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
if (isFakeTuple(i)) {
|
||||
// fake iterations should also unwrap whatever they got
|
||||
WITH_RANK << iteration
|
||||
<< "th unwrapping because of fake in "
|
||||
<< i << "\n";
|
||||
for (auto& u: unions) u->unwrapAll(abc);
|
||||
}
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
for (auto const& u: unions) {
|
||||
WITH_RANK << "__dups__:"
|
||||
<< iteration
|
||||
<< "-th n" << u->name << " checking duplicates\n";
|
||||
u->checkForDuplicates();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// CLEANUP UNIONS ===================================================={{{1
|
||||
OCD_Barrier(universe);
|
||||
if (abcNext) {
|
||||
chrono["gc"].start();
|
||||
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
|
||||
for (auto& u: unions) {
|
||||
|
||||
u->unwrapAll(abc);
|
||||
WITH_RANK << "__gc__:n" << u->name << " :it " << iteration
|
||||
<< " :abc " << pretty_print(abc)
|
||||
<< " :abcN " << pretty_print(*abcNext)
|
||||
<< "\n";
|
||||
for (auto const& slice: u->slices)
|
||||
WITH_RANK << "__gc__:guts:" << slice.info << "\n";
|
||||
u->clearUnusedSlicesForNext(*abcNext);
|
||||
|
||||
WITH_RANK << "__gc__: checking validity\n";
|
||||
|
||||
#ifdef HAVE_OCD
|
||||
// check for validity of the slices
|
||||
for (auto type: u->sliceTypes) {
|
||||
auto tuple = Slice::subtupleBySlice(abc, type);
|
||||
for (auto& slice: u->slices) {
|
||||
if ( slice.info.type == type
|
||||
&& slice.info.tuple == tuple
|
||||
&& slice.isDirectlyFetchable()
|
||||
) {
|
||||
if (slice.info.state == Slice::Dispatched)
|
||||
throw std::domain_error( "This slice should not be undispatched! "
|
||||
+ pretty_print(slice.info));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
chrono["gc"].stop();
|
||||
}
|
||||
|
||||
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
|
||||
}
|
||||
|
||||
// CLEAN CHRONO ======================================================{{{1
|
||||
chrono["iterations"].stop();
|
||||
{ // TODO: REMOVEME
|
||||
chrono["oneshot-doubles"].clear();
|
||||
chrono["oneshot-mpi:barrier"].clear();
|
||||
chrono["oneshot-db:comm:allgather"].clear();
|
||||
chrono["oneshot-unwrap"].clear();
|
||||
}
|
||||
|
||||
// ITERATION END ====================================================={{{1
|
||||
} // END OF MAIN LOOP
|
||||
|
||||
MPI_Barrier(universe);
|
||||
|
||||
// PRINT TUPLES ========================================================={{{1
|
||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||
LOG(0,"Atrip") << "tuple energies" << "\n";
|
||||
for (size_t i = 0; i < np; i++) {
|
||||
MPI_Barrier(universe);
|
||||
for (auto const& pair: tupleEnergies) {
|
||||
if (i == rank)
|
||||
std::cout << pair.first[0]
|
||||
<< " " << pair.first[1]
|
||||
<< " " << pair.first[2]
|
||||
<< std::setprecision(15) << std::setw(23)
|
||||
<< " tupleEnergy: " << pair.second
|
||||
<< "\n"
|
||||
;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// COMMUNICATE THE ENERGIES ============================================={{{1
|
||||
LOG(0,"Atrip") << "COMMUNICATING ENERGIES \n";
|
||||
double globalEnergy = 0;
|
||||
MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe);
|
||||
|
||||
WITH_RANK << "local energy " << energy << "\n";
|
||||
LOG(0,"LOOP FINISHED, energy")
|
||||
<< std::setprecision(15) << std::setw(23)
|
||||
<< globalEnergy << std::endl;
|
||||
|
||||
// PRINT TIMINGS {{{1
|
||||
for (auto const& pair: chrono)
|
||||
LOG(0,"atrip:chrono") << pair.first << " "
|
||||
<< pair.second.count() << std::endl;
|
||||
|
||||
|
||||
LOG(0, "atrip:flops")
|
||||
<< nIterations * doublesFlops / chrono["doubles"].count() << "\n";
|
||||
|
||||
return { energy };
|
||||
|
||||
}
|
||||
// Atrip:2 ends here
|
||||
Loading…
Reference in New Issue
Block a user