Tanlge source files for complex
This commit is contained in:
parent
7f455d54fd
commit
c2b1c78c67
@ -1,4 +1,4 @@
|
||||
// [[file:../atrip.org::*Include header][Include header:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Include%20header][Include header:1]]
|
||||
#pragma once
|
||||
|
||||
#include <atrip/Atrip.hpp>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Atrip][Atrip:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Header][Header:1]]
|
||||
#pragma once
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
@ -15,8 +15,9 @@ namespace atrip {
|
||||
static int np;
|
||||
static void init();
|
||||
|
||||
template <typename F=double>
|
||||
struct Input {
|
||||
CTF::Tensor<double> *ei = nullptr
|
||||
CTF::Tensor<F> *ei = nullptr
|
||||
, *ea = nullptr
|
||||
, *Tph = nullptr
|
||||
, *Tpphh = nullptr
|
||||
@ -27,13 +28,13 @@ namespace atrip {
|
||||
int maxIterations = 0, iterationMod = -1, percentageMod = -1;
|
||||
bool barrier = false;
|
||||
bool chrono = false;
|
||||
Input& with_epsilon_i(CTF::Tensor<double> * t) { ei = t; return *this; }
|
||||
Input& with_epsilon_a(CTF::Tensor<double> * t) { ea = t; return *this; }
|
||||
Input& with_Tai(CTF::Tensor<double> * t) { Tph = t; return *this; }
|
||||
Input& with_Tabij(CTF::Tensor<double> * t) { Tpphh = t; return *this; }
|
||||
Input& with_Vabij(CTF::Tensor<double> * t) { Vpphh = t; return *this; }
|
||||
Input& with_Vijka(CTF::Tensor<double> * t) { Vhhhp = t; return *this; }
|
||||
Input& with_Vabci(CTF::Tensor<double> * t) { Vppph = t; return *this; }
|
||||
Input& with_epsilon_i(CTF::Tensor<F> * t) { ei = t; return *this; }
|
||||
Input& with_epsilon_a(CTF::Tensor<F> * t) { ea = t; return *this; }
|
||||
Input& with_Tai(CTF::Tensor<F> * t) { Tph = t; return *this; }
|
||||
Input& with_Tabij(CTF::Tensor<F> * t) { Tpphh = t; return *this; }
|
||||
Input& with_Vabij(CTF::Tensor<F> * t) { Vpphh = t; return *this; }
|
||||
Input& with_Vijka(CTF::Tensor<F> * t) { Vhhhp = t; return *this; }
|
||||
Input& with_Vabci(CTF::Tensor<F> * t) { Vppph = t; return *this; }
|
||||
Input& with_maxIterations(int i) { maxIterations = i; return *this; }
|
||||
Input& with_iterationMod(int i) { iterationMod = i; return *this; }
|
||||
Input& with_percentageMod(int i) { percentageMod = i; return *this; }
|
||||
@ -44,8 +45,9 @@ namespace atrip {
|
||||
struct Output {
|
||||
double energy;
|
||||
};
|
||||
static Output run(Input const& in);
|
||||
template <typename F=double>
|
||||
static Output run(Input<F> const& in);
|
||||
};
|
||||
|
||||
}
|
||||
// Atrip:1 ends here
|
||||
// Header:1 ends here
|
||||
|
||||
@ -1,6 +1,9 @@
|
||||
// [[file:../../atrip.org::*Blas][Blas:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Blas][Blas:1]]
|
||||
#pragma once
|
||||
namespace atrip {
|
||||
|
||||
using Complex = std::complex<double>;
|
||||
|
||||
extern "C" {
|
||||
void dgemm_(
|
||||
const char *transa,
|
||||
@ -9,14 +12,73 @@ namespace atrip {
|
||||
const int *n,
|
||||
const int *k,
|
||||
double *alpha,
|
||||
const double *A,
|
||||
const double *a,
|
||||
const int *lda,
|
||||
const double *B,
|
||||
const double *b,
|
||||
const int *ldb,
|
||||
double *beta,
|
||||
double *C,
|
||||
double *c,
|
||||
const int *ldc
|
||||
);
|
||||
|
||||
void zgemm_(
|
||||
const char *transa,
|
||||
const char *transb,
|
||||
const int *m,
|
||||
const int *n,
|
||||
const int *k,
|
||||
Complex *alpha,
|
||||
const Complex *A,
|
||||
const int *lda,
|
||||
const Complex *B,
|
||||
const int *ldb,
|
||||
Complex *beta,
|
||||
Complex *C,
|
||||
const int *ldc
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
template <typename F=double>
|
||||
void xgemm(const char *transa,
|
||||
const char *transb,
|
||||
const int *m,
|
||||
const int *n,
|
||||
const int *k,
|
||||
F *alpha,
|
||||
const F *A,
|
||||
const int *lda,
|
||||
const F *B,
|
||||
const int *ldb,
|
||||
F *beta,
|
||||
F *C,
|
||||
const int *ldc) {
|
||||
dgemm_(transa, transb,
|
||||
m, n, k,
|
||||
alpha, A, lda,
|
||||
B, ldb, beta,
|
||||
C, ldc);
|
||||
}
|
||||
|
||||
template <>
|
||||
void xgemm(const char *transa,
|
||||
const char *transb,
|
||||
const int *m,
|
||||
const int *n,
|
||||
const int *k,
|
||||
Complex *alpha,
|
||||
const Complex *A,
|
||||
const int *lda,
|
||||
const Complex *B,
|
||||
const int *ldb,
|
||||
Complex *beta,
|
||||
Complex *C,
|
||||
const int *ldc) {
|
||||
zgemm_(transa, transb,
|
||||
m, n, k,
|
||||
alpha, A, lda,
|
||||
B, ldb, beta,
|
||||
C, ldc);
|
||||
}
|
||||
}
|
||||
// Blas:1 ends here
|
||||
|
||||
@ -1,9 +1,11 @@
|
||||
// [[file:../../atrip.org::*Macros][Macros:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Macros][Macros:1]]
|
||||
#pragma once
|
||||
#include <functional>
|
||||
#define ATRIP_BENCHMARK
|
||||
//#define ATRIP_DONT_SLICE
|
||||
#define ATRIP_DEBUG 1
|
||||
#ifndef ATRIP_DEBUG
|
||||
# define ATRIP_DEBUG 1
|
||||
#endif
|
||||
//#define ATRIP_WORKLOAD_DUMP
|
||||
#define ATRIP_USE_DGEMM
|
||||
//#define ATRIP_PRINT_TUPLES
|
||||
@ -60,20 +62,20 @@
|
||||
#endif
|
||||
// Macros:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Macros][Macros:2]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Macros][Macros:2]]
|
||||
#ifndef LOG
|
||||
#define LOG(level, name) if (Atrip::rank == 0) std::cout << name << ": "
|
||||
#endif
|
||||
// Macros:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Macros][Macros:3]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Macros][Macros:3]]
|
||||
#ifdef ATRIP_NO_OUTPUT
|
||||
# undef LOG
|
||||
# define LOG(level, name) if (false) std::cout << name << ": "
|
||||
#endif
|
||||
// Macros:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::IterationDescriptor][IterationDescriptor]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::IterationDescriptor][IterationDescriptor]]
|
||||
namespace atrip {
|
||||
|
||||
struct IterationDescription;
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Equations][Equations:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Equations][Equations:1]]
|
||||
#pragma once
|
||||
|
||||
#include<atrip/Slice.hpp>
|
||||
@ -6,14 +6,15 @@
|
||||
|
||||
namespace atrip {
|
||||
|
||||
template <typename F=double>
|
||||
double getEnergyDistinct
|
||||
( const double epsabc
|
||||
, std::vector<double> const& epsi
|
||||
, std::vector<double> const& Tijk_
|
||||
, std::vector<double> const& Zijk_
|
||||
( const F epsabc
|
||||
, std::vector<F> const& epsi
|
||||
, std::vector<F> const& Tijk_
|
||||
, std::vector<F> const& Zijk_
|
||||
) {
|
||||
constexpr size_t blockSize=16;
|
||||
double energy(0.);
|
||||
F energy(0.);
|
||||
const size_t No = epsi.size();
|
||||
for (size_t kk=0; kk<No; kk+=blockSize){
|
||||
const size_t kend( std::min(No, kk+blockSize) );
|
||||
@ -22,52 +23,64 @@ namespace atrip {
|
||||
for (size_t ii(jj); ii<No; ii+=blockSize){
|
||||
const size_t iend( std::min( No, ii+blockSize) );
|
||||
for (size_t k(kk); k < kend; k++){
|
||||
const double ek(epsi[k]);
|
||||
const F ek(epsi[k]);
|
||||
const size_t jstart = jj > k ? jj : k;
|
||||
for (size_t j(jstart); j < jend; j++){
|
||||
const double ej(epsi[j]);
|
||||
double facjk( j == k ? 0.5 : 1.0);
|
||||
F const ej(epsi[j]);
|
||||
F const facjk = j == k ? F(0.5) : F(1.0);
|
||||
size_t istart = ii > j ? ii : j;
|
||||
for (size_t i(istart); i < iend; i++){
|
||||
const double ei(epsi[i]);
|
||||
double facij ( i==j ? 0.5 : 1.0);
|
||||
double denominator(epsabc - ei - ej - ek);
|
||||
double U(Zijk_[i + No*j + No*No*k]);
|
||||
double V(Zijk_[i + No*k + No*No*j]);
|
||||
double W(Zijk_[j + No*i + No*No*k]);
|
||||
double X(Zijk_[j + No*k + No*No*i]);
|
||||
double Y(Zijk_[k + No*i + No*No*j]);
|
||||
double Z(Zijk_[k + No*j + No*No*i]);
|
||||
|
||||
double A(Tijk_[i + No*j + No*No*k]);
|
||||
double B(Tijk_[i + No*k + No*No*j]);
|
||||
double C(Tijk_[j + No*i + No*No*k]);
|
||||
double D(Tijk_[j + No*k + No*No*i]);
|
||||
double E(Tijk_[k + No*i + No*No*j]);
|
||||
double F(Tijk_[k + No*j + No*No*i]);
|
||||
double value(3.0*(A*U+B*V+C*W+D*X+E*Y+F*Z)
|
||||
+((U+X+Y)-2.0*(V+W+Z))*(A+D+E)
|
||||
+((V+W+Z)-2.0*(U+X+Y))*(B+C+F));
|
||||
energy += 2.0*value / denominator * facjk * facij;
|
||||
const F
|
||||
ei(epsi[i])
|
||||
, facij = i == j ? F(0.5) : F(1.0)
|
||||
, denominator(epsabc - ei - ej - ek)
|
||||
, U(Zijk_[i + No*j + No*No*k])
|
||||
, V(Zijk_[i + No*k + No*No*j])
|
||||
, W(Zijk_[j + No*i + No*No*k])
|
||||
, X(Zijk_[j + No*k + No*No*i])
|
||||
, Y(Zijk_[k + No*i + No*No*j])
|
||||
, Z(Zijk_[k + No*j + No*No*i])
|
||||
, A(std::conj(Tijk_[i + No*j + No*No*k]))
|
||||
, B(std::conj(Tijk_[i + No*k + No*No*j]))
|
||||
, C(std::conj(Tijk_[j + No*i + No*No*k]))
|
||||
, D(std::conj(Tijk_[j + No*k + No*No*i]))
|
||||
, E(std::conj(Tijk_[k + No*i + No*No*j]))
|
||||
, F(std::conj(Tijk_[k + No*j + No*No*i]))
|
||||
, value
|
||||
= 3.0 * ( A * U
|
||||
+ B * V
|
||||
+ C * W
|
||||
+ D * X
|
||||
+ E * Y
|
||||
+ F * Z )
|
||||
+ ( ( U + X + Y )
|
||||
- 2.0 * ( V + W + Z )
|
||||
) * ( A + D + E )
|
||||
+ ( ( V + W + Z )
|
||||
- 2.0 * ( U + X + Y )
|
||||
) * ( B + C + F )
|
||||
;
|
||||
energy += 2.0 * value / denominator * facjk * facij;
|
||||
} // i
|
||||
} // j
|
||||
} // k
|
||||
} // ii
|
||||
} // jj
|
||||
} // kk
|
||||
return energy;
|
||||
return std::real(energy);
|
||||
}
|
||||
|
||||
|
||||
template <typename F=double>
|
||||
double getEnergySame
|
||||
( const double epsabc
|
||||
, std::vector<double> const& epsi
|
||||
, std::vector<double> const& Tijk_
|
||||
, std::vector<double> const& Zijk_
|
||||
( const F epsabc
|
||||
, std::vector<F> const& epsi
|
||||
, std::vector<F> const& Tijk_
|
||||
, std::vector<F> const& Zijk_
|
||||
) {
|
||||
constexpr size_t blockSize = 16;
|
||||
const size_t No = epsi.size();
|
||||
double energy(0.);
|
||||
F energy = F(0.);
|
||||
for (size_t kk=0; kk<No; kk+=blockSize){
|
||||
const size_t kend( std::min( kk+blockSize, No) );
|
||||
for (size_t jj(kk); jj<No; jj+=blockSize){
|
||||
@ -75,42 +88,50 @@ namespace atrip {
|
||||
for (size_t ii(jj); ii<No; ii+=blockSize){
|
||||
const size_t iend( std::min( ii+blockSize, No) );
|
||||
for (size_t k(kk); k < kend; k++){
|
||||
const double ek(epsi[k]);
|
||||
const F ek(epsi[k]);
|
||||
const size_t jstart = jj > k ? jj : k;
|
||||
for(size_t j(jstart); j < jend; j++){
|
||||
const double facjk( j == k ? 0.5 : 1.0);
|
||||
const double ej(epsi[j]);
|
||||
const F facjk( j == k ? F(0.5) : F(1.0));
|
||||
const F ej(epsi[j]);
|
||||
const size_t istart = ii > j ? ii : j;
|
||||
for(size_t i(istart); i < iend; i++){
|
||||
double ei(epsi[i]);
|
||||
double facij ( i==j ? 0.5 : 1.0);
|
||||
double denominator(epsabc - ei - ej - ek);
|
||||
double U(Zijk_[i + No*j + No*No*k]);
|
||||
double V(Zijk_[j + No*k + No*No*i]);
|
||||
double W(Zijk_[k + No*i + No*No*j]);
|
||||
double A(Tijk_[i + No*j + No*No*k]);
|
||||
double B(Tijk_[j + No*k + No*No*i]);
|
||||
double C(Tijk_[k + No*i + No*No*j]);
|
||||
double value(3.0*( A*U + B*V + C*W) - (A+B+C)*(U+V+W));
|
||||
energy += 2.0*value / denominator * facjk * facij;
|
||||
const F
|
||||
ei(epsi[i])
|
||||
, facij ( i==j ? F(0.5) : F(1.0))
|
||||
, denominator(epsabc - ei - ej - ek)
|
||||
, U(Zijk_[i + No*j + No*No*k])
|
||||
, V(Zijk_[j + No*k + No*No*i])
|
||||
, W(Zijk_[k + No*i + No*No*j])
|
||||
, A(std::conj(Tijk_[i + No*j + No*No*k]))
|
||||
, B(std::conj(Tijk_[j + No*k + No*No*i]))
|
||||
, C(std::conj(Tijk_[k + No*i + No*No*j]))
|
||||
, value
|
||||
= F(3.0) * ( A * U
|
||||
+ B * V
|
||||
+ C * W
|
||||
)
|
||||
- ( A + B + C ) * ( U + V + W )
|
||||
;
|
||||
energy += F(2.0) * value / denominator * facjk * facij;
|
||||
} // i
|
||||
} // j
|
||||
} // k
|
||||
} // ii
|
||||
} // jj
|
||||
} // kk
|
||||
return energy;
|
||||
return std::real(energy);
|
||||
}
|
||||
|
||||
template <typename F=double>
|
||||
void singlesContribution
|
||||
( size_t No
|
||||
, size_t Nv
|
||||
, const ABCTuple &abc
|
||||
, double const* Tph
|
||||
, double const* VABij
|
||||
, double const* VACij
|
||||
, double const* VBCij
|
||||
, double *Zijk
|
||||
, F const* Tph
|
||||
, F const* VABij
|
||||
, F const* VACij
|
||||
, F const* VBCij
|
||||
, F *Zijk
|
||||
) {
|
||||
const size_t a(abc[0]), b(abc[1]), c(abc[2]);
|
||||
for (size_t k=0; k < No; k++)
|
||||
@ -125,31 +146,32 @@ namespace atrip {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename F=double>
|
||||
void doublesContribution
|
||||
( const ABCTuple &abc
|
||||
, size_t const No
|
||||
, size_t const Nv
|
||||
// -- VABCI
|
||||
, double const* VABph
|
||||
, double const* VACph
|
||||
, double const* VBCph
|
||||
, double const* VBAph
|
||||
, double const* VCAph
|
||||
, double const* VCBph
|
||||
, F const* VABph
|
||||
, F const* VACph
|
||||
, F const* VBCph
|
||||
, F const* VBAph
|
||||
, F const* VCAph
|
||||
, F const* VCBph
|
||||
// -- VHHHA
|
||||
, double const* VhhhA
|
||||
, double const* VhhhB
|
||||
, double const* VhhhC
|
||||
, F const* VhhhA
|
||||
, F const* VhhhB
|
||||
, F const* VhhhC
|
||||
// -- TA
|
||||
, double const* TAphh
|
||||
, double const* TBphh
|
||||
, double const* TCphh
|
||||
, F const* TAphh
|
||||
, F const* TBphh
|
||||
, F const* TCphh
|
||||
// -- TABIJ
|
||||
, double const* TABhh
|
||||
, double const* TAChh
|
||||
, double const* TBChh
|
||||
, F const* TABhh
|
||||
, F const* TAChh
|
||||
, F const* TBChh
|
||||
// -- TIJK
|
||||
, double *Tijk
|
||||
, F *Tijk
|
||||
, atrip::Timings& chrono
|
||||
) {
|
||||
|
||||
@ -168,40 +190,47 @@ namespace atrip {
|
||||
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
||||
} \
|
||||
t_reorder.stop();
|
||||
#define DGEMM_PARTICLES(__A, __B) \
|
||||
atrip::dgemm_( "T" \
|
||||
, "N" \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&Nv \
|
||||
, &one \
|
||||
, __A \
|
||||
, (int const*)&Nv \
|
||||
, __B \
|
||||
, (int const*)&Nv \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||
atrip::dgemm_( "N" \
|
||||
, __TRANSB \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&No \
|
||||
, &m_one \
|
||||
, __A \
|
||||
, (int const*)&NoNo \
|
||||
, __B \
|
||||
, (int const*)&No \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
#define DGEMM_PARTICLES(__A, __B) \
|
||||
atrip::xgemm<F>( "T" \
|
||||
, "N" \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&Nv \
|
||||
, &one \
|
||||
, __A \
|
||||
, (int const*)&Nv \
|
||||
, __B \
|
||||
, (int const*)&Nv \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||
atrip::xgemm<F>( "N" \
|
||||
, __TRANSB \
|
||||
, (int const*)&NoNo \
|
||||
, (int const*)&No \
|
||||
, (int const*)&No \
|
||||
, &m_one \
|
||||
, __A \
|
||||
, (int const*)&NoNo \
|
||||
, __B \
|
||||
, (int const*)&No \
|
||||
, &zero \
|
||||
, _t_buffer.data() \
|
||||
, (int const*)&NoNo \
|
||||
);
|
||||
#define MAYBE_CONJ(_conj, _buffer) \
|
||||
if (traits::isComplex<F>()) { \
|
||||
for (size_t __i = 0; __i < NoNoNo; ++__i) \
|
||||
_conj[__i] = std::conj(_buffer[__i]); \
|
||||
} else { \
|
||||
for (size_t __i = 0; __i < NoNoNo; ++__i) \
|
||||
_conj[__i] = _buffer[__i]; \
|
||||
}
|
||||
|
||||
using F = double;
|
||||
const size_t NoNoNo = No*NoNo;
|
||||
std::vector<double> _t_buffer;
|
||||
std::vector<F> _t_buffer;
|
||||
_t_buffer.reserve(NoNoNo);
|
||||
F one{1.0}, m_one{-1.0}, zero{0.0};
|
||||
|
||||
@ -214,38 +243,48 @@ namespace atrip {
|
||||
|
||||
chrono["doubles:holes"].start();
|
||||
{ // Holes part ============================================================
|
||||
|
||||
std::vector<F> _vhhh(NoNoNo);
|
||||
|
||||
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
||||
MAYBE_CONJ(_vhhh, VhhhC)
|
||||
chrono["doubles:holes:1"].start();
|
||||
DGEMM_HOLES(VhhhC, TABhh, "N")
|
||||
DGEMM_HOLES(_vhhh.data(), TABhh, "N")
|
||||
REORDER(i, k, j)
|
||||
chrono["doubles:holes:1"].stop();
|
||||
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
|
||||
chrono["doubles:holes:2"].start();
|
||||
DGEMM_HOLES(VhhhC, TABhh, "T")
|
||||
DGEMM_HOLES(_vhhh.data(), TABhh, "T")
|
||||
REORDER(j, k, i)
|
||||
chrono["doubles:holes:2"].stop();
|
||||
|
||||
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
|
||||
MAYBE_CONJ(_vhhh, VhhhB)
|
||||
chrono["doubles:holes:3"].start();
|
||||
DGEMM_HOLES(VhhhB, TAChh, "N")
|
||||
DGEMM_HOLES(_vhhh.data(), TAChh, "N")
|
||||
REORDER(i, j, k)
|
||||
chrono["doubles:holes:3"].stop();
|
||||
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
|
||||
chrono["doubles:holes:4"].start();
|
||||
DGEMM_HOLES(VhhhB, TAChh, "T")
|
||||
DGEMM_HOLES(_vhhh.data(), TAChh, "T")
|
||||
REORDER(k, j, i)
|
||||
chrono["doubles:holes:4"].stop();
|
||||
|
||||
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
|
||||
MAYBE_CONJ(_vhhh, VhhhA)
|
||||
chrono["doubles:holes:5"].start();
|
||||
DGEMM_HOLES(VhhhA, TBChh, "N")
|
||||
DGEMM_HOLES(_vhhh.data(), TBChh, "N")
|
||||
REORDER(j, i, k)
|
||||
chrono["doubles:holes:5"].stop();
|
||||
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
|
||||
chrono["doubles:holes:6"].start();
|
||||
DGEMM_HOLES(VhhhA, TBChh, "T")
|
||||
DGEMM_HOLES(_vhhh.data(), TBChh, "T")
|
||||
REORDER(k, i, j)
|
||||
chrono["doubles:holes:6"].stop();
|
||||
|
||||
}
|
||||
chrono["doubles:holes"].stop();
|
||||
#undef MAYBE_CONJ
|
||||
|
||||
chrono["doubles:particles"].start();
|
||||
{ // Particle part =========================================================
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20rank%20mapping][The rank mapping:1]]
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
@ -7,6 +7,8 @@
|
||||
#include <atrip/Slice.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
template <typename F=double>
|
||||
struct RankMap {
|
||||
|
||||
std::vector<size_t> const lengths;
|
||||
@ -19,7 +21,7 @@ namespace atrip {
|
||||
1UL, std::multiplies<size_t>()))
|
||||
{ assert(lengths.size() <= 2); }
|
||||
|
||||
size_t find(Slice::Location const& p) const noexcept {
|
||||
size_t find(typename Slice<F>::Location const& p) const noexcept {
|
||||
return p.source * np + p.rank;
|
||||
}
|
||||
|
||||
@ -39,10 +41,10 @@ namespace atrip {
|
||||
return source == nSources() && isPaddingRank(rank);
|
||||
}
|
||||
|
||||
Slice::Location
|
||||
find(ABCTuple const& abc, Slice::Type sliceType) const noexcept {
|
||||
typename Slice<F>::Location
|
||||
find(ABCTuple const& abc, typename Slice<F>::Type sliceType) const noexcept {
|
||||
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
|
||||
const auto tuple = Slice::subtupleBySlice(abc, sliceType);
|
||||
const auto tuple = Slice<F>::subtupleBySlice(abc, sliceType);
|
||||
|
||||
const size_t index
|
||||
= tuple[0]
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*The slice][The slice:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice][The slice:1]]
|
||||
#pragma once
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
@ -7,16 +7,26 @@
|
||||
|
||||
#include <atrip/Tuples.hpp>
|
||||
#include <atrip/Utils.hpp>
|
||||
#include <atrip/Blas.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
namespace traits {
|
||||
template <typename FF> bool isComplex() { return false; };
|
||||
template <> bool isComplex<Complex>() { return true; };
|
||||
namespace mpi {
|
||||
template <typename FF> MPI_Datatype datatypeOf(void);
|
||||
template <> MPI_Datatype datatypeOf<double>() { return MPI_DOUBLE; }
|
||||
template <> MPI_Datatype datatypeOf<Complex>() { return MPI_DOUBLE_COMPLEX; }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename F=double>
|
||||
struct Slice {
|
||||
|
||||
using F = double;
|
||||
// The slice:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*The slice][The slice:2]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice][The slice:2]]
|
||||
// ASSOCIATED TYPES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
struct Location { size_t rank; size_t source; };
|
||||
@ -93,8 +103,8 @@ struct Slice {
|
||||
|
||||
// DATABASE ==========================================================={{{1
|
||||
struct LocalDatabaseElement {
|
||||
Slice::Name name;
|
||||
Slice::Info info;
|
||||
Slice<F>::Name name;
|
||||
Slice<F>::Info info;
|
||||
};
|
||||
using LocalDatabase = std::vector<LocalDatabaseElement>;
|
||||
using Database = LocalDatabase;
|
||||
@ -117,7 +127,7 @@ struct Slice {
|
||||
constexpr int n = 2;
|
||||
// create a sliceLocation to measure in the current architecture
|
||||
// the packing of the struct
|
||||
Slice::Location measure;
|
||||
Slice<F>::Location measure;
|
||||
MPI_Datatype dt;
|
||||
const std::vector<int> lengths(n, 1);
|
||||
const MPI_Datatype types[n] = {usizeDt(), usizeDt()};
|
||||
@ -141,7 +151,7 @@ struct Slice {
|
||||
static MPI_Datatype sliceInfo () {
|
||||
constexpr int n = 5;
|
||||
MPI_Datatype dt;
|
||||
Slice::Info measure;
|
||||
Slice<F>::Info measure;
|
||||
const std::vector<int> lengths(n, 1);
|
||||
const MPI_Datatype types[n]
|
||||
= { vector(2, usizeDt())
|
||||
@ -213,10 +223,10 @@ struct Slice {
|
||||
* It is important here to return a reference to a Slice
|
||||
* not to accidentally copy the associated buffer of the slice.
|
||||
*/
|
||||
static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
|
||||
static Slice<F>& findOneByType(std::vector<Slice<F>> &slices, Slice<F>::Type type) {
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&type](Slice const& s) {
|
||||
[&type](Slice<F> const& s) {
|
||||
return type == s.info.type;
|
||||
});
|
||||
WITH_CRAZY_DEBUG
|
||||
@ -231,11 +241,11 @@ struct Slice {
|
||||
* Check if an info has
|
||||
*
|
||||
*/
|
||||
static std::vector<Slice*> hasRecycledReferencingToIt
|
||||
( std::vector<Slice> &slices
|
||||
static std::vector<Slice<F>*> hasRecycledReferencingToIt
|
||||
( std::vector<Slice<F>> &slices
|
||||
, Info const& info
|
||||
) {
|
||||
std::vector<Slice*> result;
|
||||
std::vector<Slice<F>*> result;
|
||||
|
||||
for (auto& s: slices)
|
||||
if ( s.info.recycling == info.type
|
||||
@ -246,11 +256,11 @@ struct Slice {
|
||||
return result;
|
||||
}
|
||||
|
||||
static Slice&
|
||||
findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
|
||||
static Slice<F>&
|
||||
findRecycledSource (std::vector<Slice<F>> &slices, Slice<F>::Info info) {
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&info](Slice const& s) {
|
||||
[&info](Slice<F> const& s) {
|
||||
return info.recycling == s.info.type
|
||||
&& info.tuple == s.info.tuple
|
||||
&& State::Recycled != s.info.state
|
||||
@ -270,15 +280,15 @@ struct Slice {
|
||||
return *sliceIt;
|
||||
}
|
||||
|
||||
static Slice& findByTypeAbc
|
||||
( std::vector<Slice> &slices
|
||||
, Slice::Type type
|
||||
static Slice<F>& findByTypeAbc
|
||||
( std::vector<Slice<F>> &slices
|
||||
, Slice<F>::Type type
|
||||
, ABCTuple const& abc
|
||||
) {
|
||||
const auto tuple = Slice::subtupleBySlice(abc, type);
|
||||
const auto tuple = Slice<F>::subtupleBySlice(abc, type);
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&type, &tuple](Slice const& s) {
|
||||
[&type, &tuple](Slice<F> const& s) {
|
||||
return type == s.info.type
|
||||
&& tuple == s.info.tuple
|
||||
;
|
||||
@ -298,11 +308,11 @@ struct Slice {
|
||||
return *sliceIt;
|
||||
}
|
||||
|
||||
static Slice& findByInfo(std::vector<Slice> &slices,
|
||||
Slice::Info const& info) {
|
||||
static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
|
||||
Slice<F>::Info const& info) {
|
||||
const auto sliceIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&info](Slice const& s) {
|
||||
[&info](Slice<F> const& s) {
|
||||
// TODO: maybe implement comparison in Info struct
|
||||
return info.type == s.info.type
|
||||
&& info.state == s.info.state
|
||||
@ -448,13 +458,15 @@ struct Slice {
|
||||
}; // struct Slice
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, Slice::Location const& v) {
|
||||
template <typename F=double>
|
||||
std::ostream& operator<<(std::ostream& out, typename Slice<F>::Location const& v) {
|
||||
// TODO: remove me
|
||||
out << "{.r(" << v.rank << "), .s(" << v.source << ")};";
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, Slice::Info const& i) {
|
||||
template <typename F=double>
|
||||
std::ostream& operator<<(std::ostream& out, typename Slice<F>::Info const& i) {
|
||||
out << "«t" << i.type << ", s" << i.state << "»"
|
||||
<< " ⊙ {" << i.from.rank << ", " << i.from.source << "}"
|
||||
<< " ∴ {" << i.tuple[0] << ", " << i.tuple[1] << "}"
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*The slice union][The slice union:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice%20union][The slice union:1]]
|
||||
#pragma once
|
||||
#include <atrip/Debug.hpp>
|
||||
#include <atrip/Slice.hpp>
|
||||
@ -6,8 +6,8 @@
|
||||
|
||||
namespace atrip {
|
||||
|
||||
template <typename F=double>
|
||||
struct SliceUnion {
|
||||
using F = double;
|
||||
using Tensor = CTF::Tensor<F>;
|
||||
|
||||
virtual void
|
||||
@ -20,7 +20,7 @@ namespace atrip {
|
||||
* This means that there can be at most one slice with a given Ty_x_Tu.
|
||||
*/
|
||||
void checkForDuplicates() const {
|
||||
std::vector<Slice::Ty_x_Tu> tytus;
|
||||
std::vector<typename Slice<F>::Ty_x_Tu> tytus;
|
||||
for (auto const& s: slices) {
|
||||
if (s.isFree()) continue;
|
||||
tytus.push_back({s.info.type, s.info.tuple});
|
||||
@ -33,13 +33,13 @@ namespace atrip {
|
||||
|
||||
}
|
||||
|
||||
std::vector<Slice::Ty_x_Tu> neededSlices(ABCTuple const& abc) {
|
||||
std::vector<Slice::Ty_x_Tu> needed(sliceTypes.size());
|
||||
std::vector<typename Slice<F>::Ty_x_Tu> neededSlices(ABCTuple const& abc) {
|
||||
std::vector<typename Slice<F>::Ty_x_Tu> needed(sliceTypes.size());
|
||||
// build the needed vector
|
||||
std::transform(sliceTypes.begin(), sliceTypes.end(),
|
||||
needed.begin(),
|
||||
[&abc](Slice::Type const type) {
|
||||
auto tuple = Slice::subtupleBySlice(abc, type);
|
||||
[&abc](typename Slice<F>::Type const type) {
|
||||
auto tuple = Slice<F>::subtupleBySlice(abc, type);
|
||||
return std::make_pair(type, tuple);
|
||||
});
|
||||
return needed;
|
||||
@ -64,8 +64,9 @@ namespace atrip {
|
||||
* slices.
|
||||
*
|
||||
*/
|
||||
Slice::LocalDatabase buildLocalDatabase(ABCTuple const& abc) {
|
||||
Slice::LocalDatabase result;
|
||||
typename
|
||||
Slice<F>::LocalDatabase buildLocalDatabase(ABCTuple const& abc) {
|
||||
typename Slice<F>::LocalDatabase result;
|
||||
|
||||
auto const needed = neededSlices(abc);
|
||||
|
||||
@ -95,7 +96,7 @@ namespace atrip {
|
||||
// need
|
||||
auto const& it
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&tuple, &type](Slice const& other) {
|
||||
[&tuple, &type](Slice<F> const& other) {
|
||||
return other.info.tuple == tuple
|
||||
&& other.info.type == type
|
||||
// we only want another slice when it
|
||||
@ -121,7 +122,7 @@ namespace atrip {
|
||||
// tuple and that has a valid data pointer.
|
||||
auto const& recycleIt
|
||||
= std::find_if(slices.begin(), slices.end(),
|
||||
[&tuple, &type](Slice const& other) {
|
||||
[&tuple, &type](Slice<F> const& other) {
|
||||
return other.info.tuple == tuple
|
||||
&& other.info.type != type
|
||||
&& other.isRecyclable()
|
||||
@ -132,13 +133,13 @@ namespace atrip {
|
||||
// (which should exist by construction :THINK)
|
||||
//
|
||||
if (recycleIt != slices.end()) {
|
||||
auto& blank = Slice::findOneByType(slices, Slice::Blank);
|
||||
auto& blank = Slice<F>::findOneByType(slices, Slice<F>::Blank);
|
||||
// TODO: formalize this through a method to copy information
|
||||
// from another slice
|
||||
blank.data = recycleIt->data;
|
||||
blank.info.type = type;
|
||||
blank.info.tuple = tuple;
|
||||
blank.info.state = Slice::Recycled;
|
||||
blank.info.state = Slice<F>::Recycled;
|
||||
blank.info.from = from;
|
||||
blank.info.recycling = recycleIt->info.type;
|
||||
result.push_back({name, blank.info});
|
||||
@ -165,17 +166,17 @@ namespace atrip {
|
||||
<< " for tuple " << tuple[0] << ", " << tuple[1]
|
||||
<< "\n"
|
||||
;
|
||||
auto& blank = Slice::findOneByType(slices, Slice::Blank);
|
||||
auto& blank = Slice<F>::findOneByType(slices, Slice<F>::Blank);
|
||||
blank.info.type = type;
|
||||
blank.info.tuple = tuple;
|
||||
blank.info.from = from;
|
||||
|
||||
// Handle self sufficiency
|
||||
blank.info.state = Atrip::rank == from.rank
|
||||
? Slice::SelfSufficient
|
||||
: Slice::Fetch
|
||||
? Slice<F>::SelfSufficient
|
||||
: Slice<F>::Fetch
|
||||
;
|
||||
if (blank.info.state == Slice::SelfSufficient) {
|
||||
if (blank.info.state == Slice<F>::SelfSufficient) {
|
||||
blank.data = sources[from.source].data();
|
||||
} else {
|
||||
if (freePointers.size() == 0)
|
||||
@ -219,7 +220,7 @@ namespace atrip {
|
||||
// try to find the slice in the needed slices list
|
||||
auto const found
|
||||
= std::find_if(needed.begin(), needed.end(),
|
||||
[&slice] (Slice::Ty_x_Tu const& tytu) {
|
||||
[&slice] (typename Slice<F>::Ty_x_Tu const& tytu) {
|
||||
return slice.info.tuple == tytu.second
|
||||
&& slice.info.type == tytu.first
|
||||
;
|
||||
@ -238,7 +239,7 @@ namespace atrip {
|
||||
|
||||
// allow to gc unwrapped and recycled, never Fetch,
|
||||
// if we have a Fetch slice then something has gone very wrong.
|
||||
if (!slice.isUnwrapped() && slice.info.state != Slice::Recycled)
|
||||
if (!slice.isUnwrapped() && slice.info.state != Slice<F>::Recycled)
|
||||
throw
|
||||
std::domain_error("Trying to garbage collect "
|
||||
" a non-unwrapped slice! "
|
||||
@ -259,13 +260,13 @@ namespace atrip {
|
||||
// - we should make sure that the data pointer of slice
|
||||
// does not get freed.
|
||||
//
|
||||
if (slice.info.state == Slice::Ready) {
|
||||
if (slice.info.state == Slice<F>::Ready) {
|
||||
WITH_OCD WITH_RANK
|
||||
<< "__gc__:" << "checking for data recycled dependencies\n";
|
||||
auto recycled
|
||||
= Slice::hasRecycledReferencingToIt(slices, slice.info);
|
||||
= Slice<F>::hasRecycledReferencingToIt(slices, slice.info);
|
||||
if (recycled.size()) {
|
||||
Slice* newReady = recycled[0];
|
||||
Slice<F>* newReady = recycled[0];
|
||||
WITH_OCD WITH_RANK
|
||||
<< "__gc__:" << "swaping recycled "
|
||||
<< pretty_print(newReady->info)
|
||||
@ -290,8 +291,8 @@ namespace atrip {
|
||||
|
||||
// if the slice is self sufficient, do not dare touching the
|
||||
// pointer, since it is a pointer to our sources in our rank.
|
||||
if ( slice.info.state == Slice::SelfSufficient
|
||||
|| slice.info.state == Slice::Recycled
|
||||
if ( slice.info.state == Slice<F>::SelfSufficient
|
||||
|| slice.info.state == Slice<F>::Recycled
|
||||
) {
|
||||
freeSlicePointer = false;
|
||||
}
|
||||
@ -313,7 +314,8 @@ namespace atrip {
|
||||
// at this point, let us blank the slice
|
||||
WITH_RANK << "~~~:cl(" << name << ")"
|
||||
<< " freeing up slice "
|
||||
<< " info " << slice.info
|
||||
// TODO: make this possible
|
||||
// << " info " << slice.info
|
||||
<< "\n";
|
||||
slice.free();
|
||||
}
|
||||
@ -323,13 +325,13 @@ namespace atrip {
|
||||
|
||||
// CONSTRUCTOR
|
||||
SliceUnion( Tensor const& sourceTensor
|
||||
, std::vector<Slice::Type> sliceTypes_
|
||||
, std::vector<typename Slice<F>::Type> sliceTypes_
|
||||
, std::vector<size_t> sliceLength_
|
||||
, std::vector<size_t> paramLength
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
, Slice::Name name_
|
||||
, typename Slice<F>::Name name_
|
||||
, size_t nSliceBuffers = 4
|
||||
)
|
||||
: rankMap(paramLength, np)
|
||||
@ -344,13 +346,13 @@ namespace atrip {
|
||||
, name(name_)
|
||||
, sliceTypes(sliceTypes_)
|
||||
, sliceBuffers(nSliceBuffers, sources[0])
|
||||
//, slices(2 * sliceTypes.size(), Slice{ sources[0].size() })
|
||||
//, slices(2 * sliceTypes.size(), Slice<F>{ sources[0].size() })
|
||||
{ // constructor begin
|
||||
|
||||
LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n";
|
||||
|
||||
slices
|
||||
= std::vector<Slice>(2 * sliceTypes.size(), { sources[0].size() });
|
||||
= std::vector<Slice<F>>(2 * sliceTypes.size(), { sources[0].size() });
|
||||
// TODO: think exactly ^------------------- about this number
|
||||
|
||||
// initialize the freePointers with the pointers to the buffers
|
||||
@ -419,19 +421,19 @@ namespace atrip {
|
||||
* \brief Send asynchronously only if the state is Fetch
|
||||
*/
|
||||
void send( size_t otherRank
|
||||
, Slice::Info const& info
|
||||
, typename Slice<F>::Info const& info
|
||||
, size_t tag) const noexcept {
|
||||
MPI_Request request;
|
||||
bool sendData_p = false;
|
||||
|
||||
if (info.state == Slice::Fetch) sendData_p = true;
|
||||
if (info.state == Slice<F>::Fetch) sendData_p = true;
|
||||
// TODO: remove this because I have SelfSufficient
|
||||
if (otherRank == info.from.rank) sendData_p = false;
|
||||
if (!sendData_p) return;
|
||||
|
||||
MPI_Isend( sources[info.from.source].data()
|
||||
, sources[info.from.source].size()
|
||||
, MPI_DOUBLE /* TODO: adapt this with traits */
|
||||
, traits::mpi::datatypeOf<F>()
|
||||
, otherRank
|
||||
, tag
|
||||
, universe
|
||||
@ -445,19 +447,19 @@ namespace atrip {
|
||||
/**
|
||||
* \brief Receive asynchronously only if the state is Fetch
|
||||
*/
|
||||
void receive(Slice::Info const& info, size_t tag) noexcept {
|
||||
auto& slice = Slice::findByInfo(slices, info);
|
||||
void receive(typename Slice<F>::Info const& info, size_t tag) noexcept {
|
||||
auto& slice = Slice<F>::findByInfo(slices, info);
|
||||
|
||||
if (Atrip::rank == info.from.rank) return;
|
||||
|
||||
if (slice.info.state == Slice::Fetch) {
|
||||
if (slice.info.state == Slice<F>::Fetch) {
|
||||
// TODO: do it through the slice class
|
||||
slice.info.state = Slice::Dispatched;
|
||||
slice.info.state = Slice<F>::Dispatched;
|
||||
MPI_Request request;
|
||||
slice.request = request;
|
||||
MPI_Irecv( slice.data
|
||||
, slice.size
|
||||
, MPI_DOUBLE // TODO: Adapt this with traits
|
||||
, traits::mpi::datatypeOf<F>()
|
||||
, info.from.rank
|
||||
, tag
|
||||
, universe
|
||||
@ -471,42 +473,42 @@ namespace atrip {
|
||||
for (auto type: sliceTypes) unwrapSlice(type, abc);
|
||||
}
|
||||
|
||||
F* unwrapSlice(Slice::Type type, ABCTuple const& abc) {
|
||||
F* unwrapSlice(typename Slice<F>::Type type, ABCTuple const& abc) {
|
||||
WITH_CRAZY_DEBUG
|
||||
WITH_RANK << "__unwrap__:slice " << type << " w n "
|
||||
<< name
|
||||
<< " abc" << pretty_print(abc)
|
||||
<< "\n";
|
||||
auto& slice = Slice::findByTypeAbc(slices, type, abc);
|
||||
WITH_RANK << "__unwrap__:info " << slice.info << "\n";
|
||||
auto& slice = Slice<F>::findByTypeAbc(slices, type, abc);
|
||||
//WITH_RANK << "__unwrap__:info " << slice.info << "\n";
|
||||
switch (slice.info.state) {
|
||||
case Slice::Dispatched:
|
||||
case Slice<F>::Dispatched:
|
||||
WITH_RANK << "__unwrap__:Fetch: " << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
slice.unwrapAndMarkReady();
|
||||
return slice.data;
|
||||
break;
|
||||
case Slice::SelfSufficient:
|
||||
case Slice<F>::SelfSufficient:
|
||||
WITH_RANK << "__unwrap__:SelfSufficient: " << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
return slice.data;
|
||||
break;
|
||||
case Slice::Ready:
|
||||
case Slice<F>::Ready:
|
||||
WITH_RANK << "__unwrap__:READY: UNWRAPPED ALREADY" << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
return slice.data;
|
||||
break;
|
||||
case Slice::Recycled:
|
||||
case Slice<F>::Recycled:
|
||||
WITH_RANK << "__unwrap__:RECYCLED " << &slice
|
||||
<< " info " << pretty_print(slice.info)
|
||||
<< "\n";
|
||||
return unwrapSlice(slice.info.recycling, abc);
|
||||
break;
|
||||
case Slice::Fetch:
|
||||
case Slice::Acceptor:
|
||||
case Slice<F>::Fetch:
|
||||
case Slice<F>::Acceptor:
|
||||
throw std::domain_error("Can't unwrap an acceptor or fetch slice!");
|
||||
break;
|
||||
default:
|
||||
@ -515,24 +517,26 @@ namespace atrip {
|
||||
return slice.data;
|
||||
}
|
||||
|
||||
const RankMap rankMap;
|
||||
const RankMap<F> rankMap;
|
||||
const MPI_Comm world;
|
||||
const MPI_Comm universe;
|
||||
const std::vector<size_t> sliceLength;
|
||||
std::vector< std::vector<F> > sources;
|
||||
std::vector< Slice > slices;
|
||||
Slice::Name name;
|
||||
const std::vector<Slice::Type> sliceTypes;
|
||||
std::vector< Slice<F> > slices;
|
||||
typename Slice<F>::Name name;
|
||||
const std::vector<typename Slice<F>::Type> sliceTypes;
|
||||
std::vector< std::vector<F> > sliceBuffers;
|
||||
std::set<F*> freePointers;
|
||||
|
||||
};
|
||||
|
||||
SliceUnion&
|
||||
unionByName(std::vector<SliceUnion*> const& unions, Slice::Name name) {
|
||||
template <typename F=double>
|
||||
SliceUnion<F>&
|
||||
unionByName(std::vector<SliceUnion<F>*> const& unions,
|
||||
typename Slice<F>::Name name) {
|
||||
const auto sliceUnionIt
|
||||
= std::find_if(unions.begin(), unions.end(),
|
||||
[&name](SliceUnion const* s) {
|
||||
[&name](SliceUnion<F> const* s) {
|
||||
return name == s->name;
|
||||
});
|
||||
if (sliceUnionIt == unions.end())
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Tuples][Tuples:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Tuples][Tuples:1]]
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
@ -1,15 +1,16 @@
|
||||
// [[file:../../atrip.org::*Unions][Unions:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Unions][Unions:1]]
|
||||
#pragma once
|
||||
#include <atrip/SliceUnion.hpp>
|
||||
|
||||
namespace atrip {
|
||||
|
||||
template <typename F=double>
|
||||
void sliceIntoVector
|
||||
( std::vector<double> &v
|
||||
, CTF::Tensor<double> &toSlice
|
||||
( std::vector<F> &v
|
||||
, CTF::Tensor<F> &toSlice
|
||||
, std::vector<int64_t> const low
|
||||
, std::vector<int64_t> const up
|
||||
, CTF::Tensor<double> const& origin
|
||||
, CTF::Tensor<F> const& origin
|
||||
, std::vector<int64_t> const originLow
|
||||
, std::vector<int64_t> const originUp
|
||||
) {
|
||||
@ -36,155 +37,159 @@ namespace atrip {
|
||||
, origin_.low.data()
|
||||
, origin_.up.data()
|
||||
, 1.0);
|
||||
memcpy(v.data(), toSlice.data, sizeof(double) * v.size());
|
||||
memcpy(v.data(), toSlice.data, sizeof(F) * v.size());
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
struct TAPHH : public SliceUnion {
|
||||
TAPHH( Tensor const& sourceTensor
|
||||
template <typename F=double>
|
||||
struct TAPHH : public SliceUnion<F> {
|
||||
TAPHH( CTF::Tensor<F> const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::A, Slice::B, Slice::C}
|
||||
, {Nv, No, No} // size of the slices
|
||||
, {Nv}
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::TA
|
||||
, 4) {
|
||||
) : SliceUnion<F>( sourceTensor
|
||||
, {Slice<F>::A, Slice<F>::B, Slice<F>::C}
|
||||
, {Nv, No, No} // size of the slices
|
||||
, {Nv}
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice<F>::TA
|
||||
, 4) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
||||
{
|
||||
const int Nv = sliceLength[0]
|
||||
, No = sliceLength[1]
|
||||
, a = rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
||||
const int Nv = this->sliceLength[0]
|
||||
, No = this->sliceLength[1]
|
||||
, a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
||||
;
|
||||
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0, 0}, {Nv, No, No}
|
||||
, from, {a, 0, 0, 0}, {a+1, Nv, No, No}
|
||||
);
|
||||
sliceIntoVector<F>( this->sources[it]
|
||||
, to, {0, 0, 0}, {Nv, No, No}
|
||||
, from, {a, 0, 0, 0}, {a+1, Nv, No, No}
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct HHHA : public SliceUnion {
|
||||
HHHA( Tensor const& sourceTensor
|
||||
template <typename F=double>
|
||||
struct HHHA : public SliceUnion<F> {
|
||||
HHHA( CTF::Tensor<F> const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::A, Slice::B, Slice::C}
|
||||
, {No, No, No} // size of the slices
|
||||
, {Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::VIJKA
|
||||
, 4) {
|
||||
) : SliceUnion<F>( sourceTensor
|
||||
, {Slice<F>::A, Slice<F>::B, Slice<F>::C}
|
||||
, {No, No, No} // size of the slices
|
||||
, {Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice<F>::VIJKA
|
||||
, 4) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
||||
{
|
||||
|
||||
const int No = sliceLength[0]
|
||||
, a = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
const int No = this->sliceLength[0]
|
||||
, a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
;
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0, 0}, {No, No, No}
|
||||
, from, {0, 0, 0, a}, {No, No, No, a+1}
|
||||
);
|
||||
sliceIntoVector<F>( this->sources[it]
|
||||
, to, {0, 0, 0}, {No, No, No}
|
||||
, from, {0, 0, 0, a}, {No, No, No, a+1}
|
||||
);
|
||||
|
||||
}
|
||||
};
|
||||
|
||||
struct ABPH : public SliceUnion {
|
||||
ABPH( Tensor const& sourceTensor
|
||||
template <typename F=double>
|
||||
struct ABPH : public SliceUnion<F> {
|
||||
ABPH( CTF::Tensor<F> const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, { Slice::AB, Slice::BC, Slice::AC
|
||||
, Slice::BA, Slice::CB, Slice::CA
|
||||
}
|
||||
, {Nv, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::VABCI
|
||||
, 2*6) {
|
||||
) : SliceUnion<F>( sourceTensor
|
||||
, { Slice<F>::AB, Slice<F>::BC, Slice<F>::AC
|
||||
, Slice<F>::BA, Slice<F>::CB, Slice<F>::CA
|
||||
}
|
||||
, {Nv, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice<F>::VABCI
|
||||
, 2*6) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||
|
||||
const int Nv = sliceLength[0]
|
||||
, No = sliceLength[1]
|
||||
, el = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
const int Nv = this->sliceLength[0]
|
||||
, No = this->sliceLength[1]
|
||||
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
, a = el % Nv
|
||||
, b = el / Nv
|
||||
;
|
||||
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0}, {Nv, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, Nv, No}
|
||||
);
|
||||
sliceIntoVector<F>( this->sources[it]
|
||||
, to, {0, 0}, {Nv, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, Nv, No}
|
||||
);
|
||||
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
struct ABHH : public SliceUnion {
|
||||
ABHH( Tensor const& sourceTensor
|
||||
template <typename F=double>
|
||||
struct ABHH : public SliceUnion<F> {
|
||||
ABHH( CTF::Tensor<F> const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::AB, Slice::BC, Slice::AC}
|
||||
, {No, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::VABIJ
|
||||
, 6) {
|
||||
) : SliceUnion<F>( sourceTensor
|
||||
, {Slice<F>::AB, Slice<F>::BC, Slice<F>::AC}
|
||||
, {No, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice<F>::VABIJ
|
||||
, 6) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||
|
||||
const int Nv = from.lens[0]
|
||||
, No = sliceLength[1]
|
||||
, el = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
, No = this->sliceLength[1]
|
||||
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
, a = el % Nv
|
||||
, b = el / Nv
|
||||
;
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0}, {No, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||
);
|
||||
sliceIntoVector<F>( this->sources[it]
|
||||
, to, {0, 0}, {No, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
@ -192,39 +197,40 @@ namespace atrip {
|
||||
};
|
||||
|
||||
|
||||
struct TABHH : public SliceUnion {
|
||||
TABHH( Tensor const& sourceTensor
|
||||
template <typename F=double>
|
||||
struct TABHH : public SliceUnion<F> {
|
||||
TABHH( CTF::Tensor<F> const& sourceTensor
|
||||
, size_t No
|
||||
, size_t Nv
|
||||
, size_t np
|
||||
, MPI_Comm child_world
|
||||
, MPI_Comm global_world
|
||||
) : SliceUnion( sourceTensor
|
||||
, {Slice::AB, Slice::BC, Slice::AC}
|
||||
, {No, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice::TABIJ
|
||||
, 6) {
|
||||
) : SliceUnion<F>( sourceTensor
|
||||
, {Slice<F>::AB, Slice<F>::BC, Slice<F>::AC}
|
||||
, {No, No} // size of the slices
|
||||
, {Nv, Nv} // size of the parametrization
|
||||
, np
|
||||
, child_world
|
||||
, global_world
|
||||
, Slice<F>::TABIJ
|
||||
, 6) {
|
||||
init(sourceTensor);
|
||||
}
|
||||
|
||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||
// TODO: maybe generalize this with ABHH
|
||||
|
||||
const int Nv = from.lens[0]
|
||||
, No = sliceLength[1]
|
||||
, el = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
, No = this->sliceLength[1]
|
||||
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||
, a = el % Nv
|
||||
, b = el / Nv
|
||||
;
|
||||
|
||||
sliceIntoVector( sources[it]
|
||||
, to, {0, 0}, {No, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||
);
|
||||
sliceIntoVector<F>( this->sources[it]
|
||||
, to, {0, 0}, {No, No}
|
||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||
);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Utils][Utils:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Utils][Utils:1]]
|
||||
#pragma once
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// [[file:../../atrip.org::*Main][Main:1]]
|
||||
// [[file:~/cc4s/src/atrip/complex/atrip.org::*Main][Main:1]]
|
||||
#include <iomanip>
|
||||
|
||||
#include <atrip/Atrip.hpp>
|
||||
@ -23,7 +23,8 @@ void Atrip::init() {
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
|
||||
}
|
||||
|
||||
Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
template <typename F>
|
||||
Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
|
||||
const int np = Atrip::np;
|
||||
const int rank = Atrip::rank;
|
||||
@ -38,14 +39,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
LOG(0,"Atrip") << "Nv: " << Nv << "\n";
|
||||
|
||||
// allocate the three scratches, see piecuch
|
||||
std::vector<double> Tijk(No*No*No) // doubles only (see piecuch)
|
||||
, Zijk(No*No*No) // singles + doubles (see piecuch)
|
||||
// we need local copies of the following tensors on every
|
||||
// rank
|
||||
, epsi(No)
|
||||
, epsa(Nv)
|
||||
, Tai(No * Nv)
|
||||
;
|
||||
std::vector<F> Tijk(No*No*No) // doubles only (see piecuch)
|
||||
, Zijk(No*No*No) // singles + doubles (see piecuch)
|
||||
// we need local copies of the following tensors on every
|
||||
// rank
|
||||
, epsi(No)
|
||||
, epsa(Nv)
|
||||
, Tai(No * Nv)
|
||||
;
|
||||
|
||||
in.ei->read_all(epsi.data());
|
||||
in.ea->read_all(epsa.data());
|
||||
@ -74,20 +75,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
chrono["nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
|
||||
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
|
||||
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
TAPHH<F> taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
HHHA<F> hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-slices"].stop();
|
||||
|
||||
chrono["nv-nv-slices"].start();
|
||||
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
|
||||
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
|
||||
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
ABPH<F> abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
ABHH<F> abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
TABHH<F> tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||
chrono["nv-nv-slices"].stop();
|
||||
|
||||
// all tensors
|
||||
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
||||
std::vector< SliceUnion<F>* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
||||
|
||||
//CONSTRUCT TUPLE LIST ==============================================={{{1
|
||||
LOG(0,"Atrip") << "BUILD TUPLE LIST\n";
|
||||
@ -121,18 +122,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
= [&tuplesList](size_t const i) { return i >= tuplesList.size(); };
|
||||
|
||||
|
||||
using Database = typename Slice<F>::Database;
|
||||
using LocalDatabase = typename Slice<F>::LocalDatabase;
|
||||
auto communicateDatabase
|
||||
= [ &unions
|
||||
, np
|
||||
, &chrono
|
||||
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
|
||||
] (ABCTuple const& abc, MPI_Comm const& c) -> Database {
|
||||
|
||||
chrono["db:comm:type:do"].start();
|
||||
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
|
||||
auto MPI_LDB_ELEMENT = Slice<F>::mpi::localDatabaseElement();
|
||||
chrono["db:comm:type:do"].stop();
|
||||
|
||||
chrono["db:comm:ldb"].start();
|
||||
Slice::LocalDatabase ldb;
|
||||
LocalDatabase ldb;
|
||||
|
||||
for (auto const& tensor: unions) {
|
||||
auto const& tensorDb = tensor->buildLocalDatabase(abc);
|
||||
@ -140,7 +143,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
}
|
||||
chrono["db:comm:ldb"].stop();
|
||||
|
||||
Slice::Database db(np * ldb.size(), ldb[0]);
|
||||
Database db(np * ldb.size(), ldb[0]);
|
||||
|
||||
chrono["oneshot-db:comm:allgather"].start();
|
||||
chrono["db:comm:allgather"].start();
|
||||
@ -162,7 +165,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
};
|
||||
|
||||
auto doIOPhase
|
||||
= [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) {
|
||||
= [&unions, &rank, &np, &universe, &chrono] (Database const& db) {
|
||||
|
||||
const size_t localDBLength = db.size() / np;
|
||||
|
||||
@ -212,7 +215,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
;
|
||||
for (auto it = begin; it != end; ++it) {
|
||||
sendTag++;
|
||||
Slice::LocalDatabaseElement const& el = *it;
|
||||
typename Slice<F>::LocalDatabaseElement const& el = *it;
|
||||
|
||||
if (el.info.from.rank != rank) continue;
|
||||
|
||||
@ -261,14 +264,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
|
||||
// START MAIN LOOP ======================================================{{{1
|
||||
|
||||
Slice::Database db;
|
||||
|
||||
for ( size_t i = abcIndex.first, iteration = 1
|
||||
; i < abcIndex.second
|
||||
; i++, iteration++
|
||||
) {
|
||||
chrono["iterations"].start();
|
||||
|
||||
|
||||
// check overhead from chrono over all iterations
|
||||
chrono["start:stop"].start(); chrono["start:stop"].stop();
|
||||
|
||||
@ -347,7 +349,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
WITH_RANK << "__comm__:" << iteration << "th communicating database\n";
|
||||
chrono["db:comm"].start();
|
||||
//const auto db = communicateDatabase(*abcNext, universe);
|
||||
db = communicateDatabase(*abcNext, universe);
|
||||
Database db = communicateDatabase(*abcNext, universe);
|
||||
chrono["db:comm"].stop();
|
||||
chrono["db:io"].start();
|
||||
doIOPhase(db);
|
||||
@ -368,30 +370,30 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
)))
|
||||
chrono["oneshot-doubles"].start();
|
||||
chrono["doubles"].start();
|
||||
doublesContribution( abc, (size_t)No, (size_t)Nv
|
||||
// -- VABCI
|
||||
, abph.unwrapSlice(Slice::AB, abc)
|
||||
, abph.unwrapSlice(Slice::AC, abc)
|
||||
, abph.unwrapSlice(Slice::BC, abc)
|
||||
, abph.unwrapSlice(Slice::BA, abc)
|
||||
, abph.unwrapSlice(Slice::CA, abc)
|
||||
, abph.unwrapSlice(Slice::CB, abc)
|
||||
// -- VHHHA
|
||||
, hhha.unwrapSlice(Slice::A, abc)
|
||||
, hhha.unwrapSlice(Slice::B, abc)
|
||||
, hhha.unwrapSlice(Slice::C, abc)
|
||||
// -- TA
|
||||
, taphh.unwrapSlice(Slice::A, abc)
|
||||
, taphh.unwrapSlice(Slice::B, abc)
|
||||
, taphh.unwrapSlice(Slice::C, abc)
|
||||
// -- TABIJ
|
||||
, tabhh.unwrapSlice(Slice::AB, abc)
|
||||
, tabhh.unwrapSlice(Slice::AC, abc)
|
||||
, tabhh.unwrapSlice(Slice::BC, abc)
|
||||
// -- TIJK
|
||||
, Tijk.data()
|
||||
, chrono
|
||||
);
|
||||
doublesContribution<F>( abc, (size_t)No, (size_t)Nv
|
||||
// -- VABCI
|
||||
, abph.unwrapSlice(Slice<F>::AB, abc)
|
||||
, abph.unwrapSlice(Slice<F>::AC, abc)
|
||||
, abph.unwrapSlice(Slice<F>::BC, abc)
|
||||
, abph.unwrapSlice(Slice<F>::BA, abc)
|
||||
, abph.unwrapSlice(Slice<F>::CA, abc)
|
||||
, abph.unwrapSlice(Slice<F>::CB, abc)
|
||||
// -- VHHHA
|
||||
, hhha.unwrapSlice(Slice<F>::A, abc)
|
||||
, hhha.unwrapSlice(Slice<F>::B, abc)
|
||||
, hhha.unwrapSlice(Slice<F>::C, abc)
|
||||
// -- TA
|
||||
, taphh.unwrapSlice(Slice<F>::A, abc)
|
||||
, taphh.unwrapSlice(Slice<F>::B, abc)
|
||||
, taphh.unwrapSlice(Slice<F>::C, abc)
|
||||
// -- TABIJ
|
||||
, tabhh.unwrapSlice(Slice<F>::AB, abc)
|
||||
, tabhh.unwrapSlice(Slice<F>::AC, abc)
|
||||
, tabhh.unwrapSlice(Slice<F>::BC, abc)
|
||||
// -- TIJK
|
||||
, Tijk.data()
|
||||
, chrono
|
||||
);
|
||||
WITH_RANK << iteration << "-th doubles done\n";
|
||||
chrono["doubles"].stop();
|
||||
chrono["oneshot-doubles"].stop();
|
||||
@ -409,12 +411,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
||||
chrono["reorder"].stop();
|
||||
chrono["singles"].start();
|
||||
singlesContribution( No, Nv, abc
|
||||
, Tai.data()
|
||||
, abhh.unwrapSlice(Slice::AB, abc)
|
||||
, abhh.unwrapSlice(Slice::AC, abc)
|
||||
, abhh.unwrapSlice(Slice::BC, abc)
|
||||
, Zijk.data());
|
||||
singlesContribution<F>( No, Nv, abc
|
||||
, Tai.data()
|
||||
, abhh.unwrapSlice(Slice<F>::AB, abc)
|
||||
, abhh.unwrapSlice(Slice<F>::AC, abc)
|
||||
, abhh.unwrapSlice(Slice<F>::BC, abc)
|
||||
, Zijk.data());
|
||||
chrono["singles"].stop();
|
||||
}
|
||||
|
||||
@ -426,13 +428,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
int distinct(0);
|
||||
if (abc[0] == abc[1]) distinct++;
|
||||
if (abc[1] == abc[2]) distinct--;
|
||||
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
||||
const F epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
||||
|
||||
chrono["energy"].start();
|
||||
if ( distinct == 0)
|
||||
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
|
||||
tupleEnergy = getEnergyDistinct<F>(epsabc, epsi, Tijk, Zijk);
|
||||
else
|
||||
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
|
||||
tupleEnergy = getEnergySame<F>(epsabc, epsi, Tijk, Zijk);
|
||||
chrono["energy"].stop();
|
||||
|
||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||
@ -473,8 +475,8 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
<< " :abc " << pretty_print(abc)
|
||||
<< " :abcN " << pretty_print(*abcNext)
|
||||
<< "\n";
|
||||
for (auto const& slice: u->slices)
|
||||
WITH_RANK << "__gc__:guts:" << slice.info << "\n";
|
||||
// for (auto const& slice: u->slices)
|
||||
// WITH_RANK << "__gc__:guts:" << slice.info << "\n";
|
||||
u->clearUnusedSlicesForNext(*abcNext);
|
||||
|
||||
WITH_RANK << "__gc__: checking validity\n";
|
||||
@ -482,13 +484,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
#ifdef HAVE_OCD
|
||||
// check for validity of the slices
|
||||
for (auto type: u->sliceTypes) {
|
||||
auto tuple = Slice::subtupleBySlice(abc, type);
|
||||
auto tuple = Slice<F>::subtupleBySlice(abc, type);
|
||||
for (auto& slice: u->slices) {
|
||||
if ( slice.info.type == type
|
||||
&& slice.info.tuple == tuple
|
||||
&& slice.isDirectlyFetchable()
|
||||
) {
|
||||
if (slice.info.state == Slice::Dispatched)
|
||||
if (slice.info.state == Slice<F>::Dispatched)
|
||||
throw std::domain_error( "This slice should not be undispatched! "
|
||||
+ pretty_print(slice.info));
|
||||
}
|
||||
@ -555,4 +557,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
||||
return { - globalEnergy };
|
||||
|
||||
}
|
||||
// instantiate
|
||||
template Atrip::Output Atrip::run(Atrip::Input<double> const& in);
|
||||
template Atrip::Output Atrip::run(Atrip::Input<Complex> const& in);
|
||||
// Main:1 ends here
|
||||
|
||||
Loading…
Reference in New Issue
Block a user