338 lines
12 KiB
C++
338 lines
12 KiB
C++
// [[file:../../atrip.org::*Equations][Equations:1]]
|
|
#pragma once
|
|
|
|
#include<atrip/Slice.hpp>
|
|
#include<atrip/Blas.hpp>
|
|
|
|
namespace atrip {
|
|
|
|
double getEnergyDistinct
|
|
( const double epsabc
|
|
, std::vector<double> const& epsi
|
|
, std::vector<double> const& Tijk_
|
|
, std::vector<double> const& Zijk_
|
|
) {
|
|
constexpr size_t blockSize=16;
|
|
double energy(0.);
|
|
const size_t No = epsi.size();
|
|
for (size_t kk=0; kk<No; kk+=blockSize){
|
|
const size_t kend( std::min(No, kk+blockSize) );
|
|
for (size_t jj(kk); jj<No; jj+=blockSize){
|
|
const size_t jend( std::min( No, jj+blockSize) );
|
|
for (size_t ii(jj); ii<No; ii+=blockSize){
|
|
const size_t iend( std::min( No, ii+blockSize) );
|
|
for (size_t k(kk); k < kend; k++){
|
|
const double ek(epsi[k]);
|
|
const size_t jstart = jj > k ? jj : k;
|
|
for (size_t j(jstart); j < jend; j++){
|
|
const double ej(epsi[j]);
|
|
double facjk( j == k ? 0.5 : 1.0);
|
|
size_t istart = ii > j ? ii : j;
|
|
for (size_t i(istart); i < iend; i++){
|
|
const double ei(epsi[i]);
|
|
double facij ( i==j ? 0.5 : 1.0);
|
|
double denominator(epsabc - ei - ej - ek);
|
|
double U(Zijk_[i + No*j + No*No*k]);
|
|
double V(Zijk_[i + No*k + No*No*j]);
|
|
double W(Zijk_[j + No*i + No*No*k]);
|
|
double X(Zijk_[j + No*k + No*No*i]);
|
|
double Y(Zijk_[k + No*i + No*No*j]);
|
|
double Z(Zijk_[k + No*j + No*No*i]);
|
|
|
|
double A(Tijk_[i + No*j + No*No*k]);
|
|
double B(Tijk_[i + No*k + No*No*j]);
|
|
double C(Tijk_[j + No*i + No*No*k]);
|
|
double D(Tijk_[j + No*k + No*No*i]);
|
|
double E(Tijk_[k + No*i + No*No*j]);
|
|
double F(Tijk_[k + No*j + No*No*i]);
|
|
double value(3.0*(A*U+B*V+C*W+D*X+E*Y+F*Z)
|
|
+((U+X+Y)-2.0*(V+W+Z))*(A+D+E)
|
|
+((V+W+Z)-2.0*(U+X+Y))*(B+C+F));
|
|
energy += 2.0*value / denominator * facjk * facij;
|
|
} // i
|
|
} // j
|
|
} // k
|
|
} // ii
|
|
} // jj
|
|
} // kk
|
|
return energy;
|
|
}
|
|
|
|
|
|
double getEnergySame
|
|
( const double epsabc
|
|
, std::vector<double> const& epsi
|
|
, std::vector<double> const& Tijk_
|
|
, std::vector<double> const& Zijk_
|
|
) {
|
|
constexpr size_t blockSize = 16;
|
|
const size_t No = epsi.size();
|
|
double energy(0.);
|
|
for (size_t kk=0; kk<No; kk+=blockSize){
|
|
const size_t kend( std::min( kk+blockSize, No) );
|
|
for (size_t jj(kk); jj<No; jj+=blockSize){
|
|
const size_t jend( std::min( jj+blockSize, No) );
|
|
for (size_t ii(jj); ii<No; ii+=blockSize){
|
|
const size_t iend( std::min( ii+blockSize, No) );
|
|
for (size_t k(kk); k < kend; k++){
|
|
const double ek(epsi[k]);
|
|
const size_t jstart = jj > k ? jj : k;
|
|
for(size_t j(jstart); j < jend; j++){
|
|
const double facjk( j == k ? 0.5 : 1.0);
|
|
const double ej(epsi[j]);
|
|
const size_t istart = ii > j ? ii : j;
|
|
for(size_t i(istart); i < iend; i++){
|
|
double ei(epsi[i]);
|
|
double facij ( i==j ? 0.5 : 1.0);
|
|
double denominator(epsabc - ei - ej - ek);
|
|
double U(Zijk_[i + No*j + No*No*k]);
|
|
double V(Zijk_[j + No*k + No*No*i]);
|
|
double W(Zijk_[k + No*i + No*No*j]);
|
|
double A(Tijk_[i + No*j + No*No*k]);
|
|
double B(Tijk_[j + No*k + No*No*i]);
|
|
double C(Tijk_[k + No*i + No*No*j]);
|
|
double value(3.0*( A*U + B*V + C*W) - (A+B+C)*(U+V+W));
|
|
energy += 2.0*value / denominator * facjk * facij;
|
|
} // i
|
|
} // j
|
|
} // k
|
|
} // ii
|
|
} // jj
|
|
} // kk
|
|
return energy;
|
|
}
|
|
|
|
void singlesContribution
|
|
( size_t No
|
|
, size_t Nv
|
|
, const ABCTuple &abc
|
|
, double const* Tph
|
|
, double const* VABij
|
|
, double const* VACij
|
|
, double const* VBCij
|
|
, double *Zijk
|
|
) {
|
|
const size_t a(abc[0]), b(abc[1]), c(abc[2]);
|
|
for (size_t k=0; k < No; k++)
|
|
for (size_t i=0; i < No; i++)
|
|
for (size_t j=0; j < No; j++) {
|
|
const size_t ijk = i + j*No + k*No*No
|
|
, jk = j + No * k
|
|
;
|
|
Zijk[ijk] += Tph[ a + i * Nv ] * VBCij[ j + k * No ];
|
|
Zijk[ijk] += Tph[ b + j * Nv ] * VACij[ i + k * No ];
|
|
Zijk[ijk] += Tph[ c + k * Nv ] * VABij[ i + j * No ];
|
|
}
|
|
}
|
|
|
|
void doublesContribution
|
|
( const ABCTuple &abc
|
|
, size_t const No
|
|
, size_t const Nv
|
|
// -- VABCI
|
|
, double const* VABph
|
|
, double const* VACph
|
|
, double const* VBCph
|
|
, double const* VBAph
|
|
, double const* VCAph
|
|
, double const* VCBph
|
|
// -- VHHHA
|
|
, double const* VhhhA
|
|
, double const* VhhhB
|
|
, double const* VhhhC
|
|
// -- TA
|
|
, double const* TAphh
|
|
, double const* TBphh
|
|
, double const* TCphh
|
|
// -- TABIJ
|
|
, double const* TABhh
|
|
, double const* TAChh
|
|
, double const* TBChh
|
|
// -- TIJK
|
|
, double *Tijk
|
|
, atrip::Timings& chrono
|
|
) {
|
|
|
|
auto& t_reorder = chrono["doubles:reorder"];
|
|
const size_t a = abc[0], b = abc[1], c = abc[2]
|
|
, NoNo = No*No, NoNv = No*Nv
|
|
;
|
|
|
|
#if defined(ATRIP_USE_DGEMM)
|
|
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
|
#define REORDER(__II, __JJ, __KK) \
|
|
t_reorder.start(); \
|
|
for (size_t k = 0; k < No; k++) \
|
|
for (size_t j = 0; j < No; j++) \
|
|
for (size_t i = 0; i < No; i++) { \
|
|
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
|
} \
|
|
t_reorder.stop();
|
|
#define DGEMM_PARTICLES(__A, __B) \
|
|
atrip::dgemm_( "T" \
|
|
, "N" \
|
|
, (int const*)&NoNo \
|
|
, (int const*)&No \
|
|
, (int const*)&Nv \
|
|
, &one \
|
|
, __A \
|
|
, (int const*)&Nv \
|
|
, __B \
|
|
, (int const*)&Nv \
|
|
, &zero \
|
|
, _t_buffer.data() \
|
|
, (int const*)&NoNo \
|
|
);
|
|
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
|
atrip::dgemm_( "N" \
|
|
, __TRANSB \
|
|
, (int const*)&NoNo \
|
|
, (int const*)&No \
|
|
, (int const*)&No \
|
|
, &m_one \
|
|
, __A \
|
|
, (int const*)&NoNo \
|
|
, __B \
|
|
, (int const*)&No \
|
|
, &zero \
|
|
, _t_buffer.data() \
|
|
, (int const*)&NoNo \
|
|
);
|
|
|
|
using F = double;
|
|
const size_t NoNoNo = No*NoNo;
|
|
std::vector<double> _t_buffer;
|
|
_t_buffer.reserve(NoNoNo);
|
|
F one{1.0}, m_one{-1.0}, zero{0.0};
|
|
|
|
t_reorder.start();
|
|
for (size_t k = 0; k < NoNoNo; k++) {
|
|
// zero the Tijk
|
|
Tijk[k] = 0.0;
|
|
}
|
|
t_reorder.stop();
|
|
|
|
chrono["doubles:holes"].start();
|
|
{ // Holes part ============================================================
|
|
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
|
chrono["doubles:holes:1"].start();
|
|
DGEMM_HOLES(VhhhC, TABhh, "N")
|
|
REORDER(i, k, j)
|
|
chrono["doubles:holes:1"].stop();
|
|
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
|
|
chrono["doubles:holes:2"].start();
|
|
DGEMM_HOLES(VhhhC, TABhh, "T")
|
|
REORDER(j, k, i)
|
|
chrono["doubles:holes:2"].stop();
|
|
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
|
|
chrono["doubles:holes:3"].start();
|
|
DGEMM_HOLES(VhhhB, TAChh, "N")
|
|
REORDER(i, j, k)
|
|
chrono["doubles:holes:3"].stop();
|
|
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
|
|
chrono["doubles:holes:4"].start();
|
|
DGEMM_HOLES(VhhhB, TAChh, "T")
|
|
REORDER(k, j, i)
|
|
chrono["doubles:holes:4"].stop();
|
|
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
|
|
chrono["doubles:holes:5"].start();
|
|
DGEMM_HOLES(VhhhA, TBChh, "N")
|
|
REORDER(j, i, k)
|
|
chrono["doubles:holes:5"].stop();
|
|
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
|
|
chrono["doubles:holes:6"].start();
|
|
DGEMM_HOLES(VhhhA, TBChh, "T")
|
|
REORDER(k, i, j)
|
|
chrono["doubles:holes:6"].stop();
|
|
}
|
|
chrono["doubles:holes"].stop();
|
|
|
|
chrono["doubles:particles"].start();
|
|
{ // Particle part =========================================================
|
|
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
|
|
chrono["doubles:particles:1"].start();
|
|
DGEMM_PARTICLES(TAphh, VBCph)
|
|
REORDER(i, j, k)
|
|
chrono["doubles:particles:1"].stop();
|
|
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
|
|
chrono["doubles:particles:2"].start();
|
|
DGEMM_PARTICLES(TAphh, VCBph)
|
|
REORDER(i, k, j)
|
|
chrono["doubles:particles:2"].stop();
|
|
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
|
|
chrono["doubles:particles:3"].start();
|
|
DGEMM_PARTICLES(TCphh, VABph)
|
|
REORDER(k, i, j)
|
|
chrono["doubles:particles:3"].stop();
|
|
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
|
|
chrono["doubles:particles:4"].start();
|
|
DGEMM_PARTICLES(TCphh, VBAph)
|
|
REORDER(k, j, i)
|
|
chrono["doubles:particles:4"].stop();
|
|
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
|
|
chrono["doubles:particles:5"].start();
|
|
DGEMM_PARTICLES(TBphh, VACph)
|
|
REORDER(j, i, k)
|
|
chrono["doubles:particles:5"].stop();
|
|
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
|
|
chrono["doubles:particles:6"].start();
|
|
DGEMM_PARTICLES(TBphh, VCAph)
|
|
REORDER(j, k, i)
|
|
chrono["doubles:particles:6"].stop();
|
|
}
|
|
chrono["doubles:particles"].stop();
|
|
|
|
#undef REORDER
|
|
#undef DGEMM_HOLES
|
|
#undef DGEMM_PARTICLES
|
|
#undef _IJK_
|
|
#else
|
|
for (size_t k = 0; k < No; k++)
|
|
for (size_t j = 0; j < No; j++)
|
|
for (size_t i = 0; i < No; i++){
|
|
const size_t ijk = i + j*No + k*NoNo
|
|
, jk = j + k*No
|
|
;
|
|
Tijk[ijk] = 0.0; // :important
|
|
// HOLE DIAGRAMS: TABHH and VHHHA
|
|
for (size_t L = 0; L < No; L++){
|
|
// t[abLj] * V[Lcik] H1
|
|
// t[baLi] * V[Lcjk] H0 TODO: conjugate T for complex
|
|
Tijk[ijk] -= TABhh[L + j*No] * VhhhC[i + k*No + L*NoNo];
|
|
Tijk[ijk] -= TABhh[i + L*No] * VhhhC[j + k*No + L*NoNo];
|
|
|
|
// t[acLk] * V[Lbij] H5
|
|
// t[caLi] * V[Lbkj] H3
|
|
Tijk[ijk] -= TAChh[L + k*No] * VhhhB[i + j*No + L*NoNo];
|
|
Tijk[ijk] -= TAChh[i + L*No] * VhhhB[k + j*No + L*NoNo];
|
|
|
|
// t[bcLk] * V[Laji] H2
|
|
// t[cbLj] * V[Laki] H4
|
|
Tijk[ijk] -= TBChh[L + k*No] * VhhhA[j + i*No + L*NoNo];
|
|
Tijk[ijk] -= TBChh[j + L*No] * VhhhA[k + i*No + L*NoNo];
|
|
}
|
|
// PARTILCE DIAGRAMS: TAPHH and VABPH
|
|
for (size_t E = 0; E < Nv; E++) {
|
|
// t[aEij] * V[bcEk] P0
|
|
// t[aEik] * V[cbEj] P3 // TODO: CHECK THIS ONE, I DONT KNOW
|
|
Tijk[ijk] += TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv];
|
|
Tijk[ijk] += TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv];
|
|
|
|
// t[cEki] * V[abEj] P5
|
|
// t[cEkj] * V[baEi] P2
|
|
Tijk[ijk] += TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv];
|
|
Tijk[ijk] += TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv];
|
|
|
|
// t[bEji] * V[acEk] P1
|
|
// t[bEjk] * V[caEi] P4
|
|
Tijk[ijk] += TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv];
|
|
Tijk[ijk] += TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv];
|
|
}
|
|
|
|
}
|
|
#endif
|
|
}
|
|
|
|
}
|
|
// Equations:1 ends here
|