Merge branch 'complex'
This commit is contained in:
commit
88c03698db
@ -36,12 +36,11 @@ namespace atrip {
|
|||||||
static int rank;
|
static int rank;
|
||||||
static int np;
|
static int np;
|
||||||
static Timings chrono;
|
static Timings chrono;
|
||||||
static size_t networkSend;
|
|
||||||
static size_t localSend;
|
|
||||||
static void init();
|
static void init();
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
struct Input {
|
struct Input {
|
||||||
CTF::Tensor<double> *ei = nullptr
|
CTF::Tensor<F> *ei = nullptr
|
||||||
, *ea = nullptr
|
, *ea = nullptr
|
||||||
, *Tph = nullptr
|
, *Tph = nullptr
|
||||||
, *Tpphh = nullptr
|
, *Tpphh = nullptr
|
||||||
@ -49,13 +48,13 @@ namespace atrip {
|
|||||||
, *Vhhhp = nullptr
|
, *Vhhhp = nullptr
|
||||||
, *Vppph = nullptr
|
, *Vppph = nullptr
|
||||||
;
|
;
|
||||||
Input& with_epsilon_i(CTF::Tensor<double> * t) { ei = t; return *this; }
|
Input& with_epsilon_i(CTF::Tensor<F> * t) { ei = t; return *this; }
|
||||||
Input& with_epsilon_a(CTF::Tensor<double> * t) { ea = t; return *this; }
|
Input& with_epsilon_a(CTF::Tensor<F> * t) { ea = t; return *this; }
|
||||||
Input& with_Tai(CTF::Tensor<double> * t) { Tph = t; return *this; }
|
Input& with_Tai(CTF::Tensor<F> * t) { Tph = t; return *this; }
|
||||||
Input& with_Tabij(CTF::Tensor<double> * t) { Tpphh = t; return *this; }
|
Input& with_Tabij(CTF::Tensor<F> * t) { Tpphh = t; return *this; }
|
||||||
Input& with_Vabij(CTF::Tensor<double> * t) { Vpphh = t; return *this; }
|
Input& with_Vabij(CTF::Tensor<F> * t) { Vpphh = t; return *this; }
|
||||||
Input& with_Vijka(CTF::Tensor<double> * t) { Vhhhp = t; return *this; }
|
Input& with_Vijka(CTF::Tensor<F> * t) { Vhhhp = t; return *this; }
|
||||||
Input& with_Vabci(CTF::Tensor<double> * t) { Vppph = t; return *this; }
|
Input& with_Vabci(CTF::Tensor<F> * t) { Vppph = t; return *this; }
|
||||||
|
|
||||||
enum TuplesDistribution {
|
enum TuplesDistribution {
|
||||||
NAIVE,
|
NAIVE,
|
||||||
@ -70,13 +69,13 @@ namespace atrip {
|
|||||||
ADD_ATTRIBUTE(int, percentageMod, -1)
|
ADD_ATTRIBUTE(int, percentageMod, -1)
|
||||||
ADD_ATTRIBUTE(TuplesDistribution, tuplesDistribution, NAIVE)
|
ADD_ATTRIBUTE(TuplesDistribution, tuplesDistribution, NAIVE)
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Output {
|
struct Output {
|
||||||
double energy;
|
double energy;
|
||||||
};
|
};
|
||||||
static Output run(Input const& in);
|
template <typename F=double>
|
||||||
|
static Output run(Input<F> const& in);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -15,6 +15,9 @@
|
|||||||
// [[file:../../atrip.org::*Blas][Blas:1]]
|
// [[file:../../atrip.org::*Blas][Blas:1]]
|
||||||
#pragma once
|
#pragma once
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
|
using Complex = std::complex<double>;
|
||||||
|
|
||||||
extern "C" {
|
extern "C" {
|
||||||
void dgemm_(
|
void dgemm_(
|
||||||
const char *transa,
|
const char *transa,
|
||||||
@ -23,14 +26,73 @@ namespace atrip {
|
|||||||
const int *n,
|
const int *n,
|
||||||
const int *k,
|
const int *k,
|
||||||
double *alpha,
|
double *alpha,
|
||||||
const double *A,
|
const double *a,
|
||||||
const int *lda,
|
const int *lda,
|
||||||
const double *B,
|
const double *b,
|
||||||
const int *ldb,
|
const int *ldb,
|
||||||
double *beta,
|
double *beta,
|
||||||
double *C,
|
double *c,
|
||||||
|
const int *ldc
|
||||||
|
);
|
||||||
|
|
||||||
|
void zgemm_(
|
||||||
|
const char *transa,
|
||||||
|
const char *transb,
|
||||||
|
const int *m,
|
||||||
|
const int *n,
|
||||||
|
const int *k,
|
||||||
|
Complex *alpha,
|
||||||
|
const Complex *A,
|
||||||
|
const int *lda,
|
||||||
|
const Complex *B,
|
||||||
|
const int *ldb,
|
||||||
|
Complex *beta,
|
||||||
|
Complex *C,
|
||||||
const int *ldc
|
const int *ldc
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
|
void xgemm(const char *transa,
|
||||||
|
const char *transb,
|
||||||
|
const int *m,
|
||||||
|
const int *n,
|
||||||
|
const int *k,
|
||||||
|
F *alpha,
|
||||||
|
const F *A,
|
||||||
|
const int *lda,
|
||||||
|
const F *B,
|
||||||
|
const int *ldb,
|
||||||
|
F *beta,
|
||||||
|
F *C,
|
||||||
|
const int *ldc) {
|
||||||
|
dgemm_(transa, transb,
|
||||||
|
m, n, k,
|
||||||
|
alpha, A, lda,
|
||||||
|
B, ldb, beta,
|
||||||
|
C, ldc);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
void xgemm(const char *transa,
|
||||||
|
const char *transb,
|
||||||
|
const int *m,
|
||||||
|
const int *n,
|
||||||
|
const int *k,
|
||||||
|
Complex *alpha,
|
||||||
|
const Complex *A,
|
||||||
|
const int *lda,
|
||||||
|
const Complex *B,
|
||||||
|
const int *ldb,
|
||||||
|
Complex *beta,
|
||||||
|
Complex *C,
|
||||||
|
const int *ldc) {
|
||||||
|
zgemm_(transa, transb,
|
||||||
|
m, n, k,
|
||||||
|
alpha, A, lda,
|
||||||
|
B, ldb, beta,
|
||||||
|
C, ldc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
// Blas:1 ends here
|
// Blas:1 ends here
|
||||||
|
|||||||
@ -17,6 +17,9 @@
|
|||||||
#include <functional>
|
#include <functional>
|
||||||
#define ATRIP_BENCHMARK
|
#define ATRIP_BENCHMARK
|
||||||
//#define ATRIP_DONT_SLICE
|
//#define ATRIP_DONT_SLICE
|
||||||
|
#ifndef ATRIP_DEBUG
|
||||||
|
# define ATRIP_DEBUG 1
|
||||||
|
#endif
|
||||||
//#define ATRIP_WORKLOAD_DUMP
|
//#define ATRIP_WORKLOAD_DUMP
|
||||||
#define ATRIP_USE_DGEMM
|
#define ATRIP_USE_DGEMM
|
||||||
//#define ATRIP_PRINT_TUPLES
|
//#define ATRIP_PRINT_TUPLES
|
||||||
@ -52,7 +55,6 @@
|
|||||||
# define DBG(...) dbg(__VA_ARGS__)
|
# define DBG(...) dbg(__VA_ARGS__)
|
||||||
#elif ATRIP_DEBUG == 2
|
#elif ATRIP_DEBUG == 2
|
||||||
# pragma message("WARNING: You have some debugging info for ABC triples")
|
# pragma message("WARNING: You have some debugging info for ABC triples")
|
||||||
# include <dbg.h>
|
|
||||||
# define OCD_Barrier(com)
|
# define OCD_Barrier(com)
|
||||||
# define WITH_OCD if (false)
|
# define WITH_OCD if (false)
|
||||||
# define WITH_ROOT if (atrip::Atrip::rank == 0)
|
# define WITH_ROOT if (atrip::Atrip::rank == 0)
|
||||||
|
|||||||
@ -20,14 +20,15 @@
|
|||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
double getEnergyDistinct
|
double getEnergyDistinct
|
||||||
( const double epsabc
|
( const F epsabc
|
||||||
, std::vector<double> const& epsi
|
, std::vector<F> const& epsi
|
||||||
, std::vector<double> const& Tijk_
|
, std::vector<F> const& Tijk_
|
||||||
, std::vector<double> const& Zijk_
|
, std::vector<F> const& Zijk_
|
||||||
) {
|
) {
|
||||||
constexpr size_t blockSize=16;
|
constexpr size_t blockSize=16;
|
||||||
double energy(0.);
|
F energy(0.);
|
||||||
const size_t No = epsi.size();
|
const size_t No = epsi.size();
|
||||||
for (size_t kk=0; kk<No; kk+=blockSize){
|
for (size_t kk=0; kk<No; kk+=blockSize){
|
||||||
const size_t kend( std::min(No, kk+blockSize) );
|
const size_t kend( std::min(No, kk+blockSize) );
|
||||||
@ -36,52 +37,64 @@ namespace atrip {
|
|||||||
for (size_t ii(jj); ii<No; ii+=blockSize){
|
for (size_t ii(jj); ii<No; ii+=blockSize){
|
||||||
const size_t iend( std::min( No, ii+blockSize) );
|
const size_t iend( std::min( No, ii+blockSize) );
|
||||||
for (size_t k(kk); k < kend; k++){
|
for (size_t k(kk); k < kend; k++){
|
||||||
const double ek(epsi[k]);
|
const F ek(epsi[k]);
|
||||||
const size_t jstart = jj > k ? jj : k;
|
const size_t jstart = jj > k ? jj : k;
|
||||||
for (size_t j(jstart); j < jend; j++){
|
for (size_t j(jstart); j < jend; j++){
|
||||||
const double ej(epsi[j]);
|
F const ej(epsi[j]);
|
||||||
double facjk( j == k ? 0.5 : 1.0);
|
F const facjk = j == k ? F(0.5) : F(1.0);
|
||||||
size_t istart = ii > j ? ii : j;
|
size_t istart = ii > j ? ii : j;
|
||||||
for (size_t i(istart); i < iend; i++){
|
for (size_t i(istart); i < iend; i++){
|
||||||
const double ei(epsi[i]);
|
const F
|
||||||
double facij ( i==j ? 0.5 : 1.0);
|
ei(epsi[i])
|
||||||
double denominator(epsabc - ei - ej - ek);
|
, facij = i == j ? F(0.5) : F(1.0)
|
||||||
double U(Zijk_[i + No*j + No*No*k]);
|
, denominator(epsabc - ei - ej - ek)
|
||||||
double V(Zijk_[i + No*k + No*No*j]);
|
, U(Zijk_[i + No*j + No*No*k])
|
||||||
double W(Zijk_[j + No*i + No*No*k]);
|
, V(Zijk_[i + No*k + No*No*j])
|
||||||
double X(Zijk_[j + No*k + No*No*i]);
|
, W(Zijk_[j + No*i + No*No*k])
|
||||||
double Y(Zijk_[k + No*i + No*No*j]);
|
, X(Zijk_[j + No*k + No*No*i])
|
||||||
double Z(Zijk_[k + No*j + No*No*i]);
|
, Y(Zijk_[k + No*i + No*No*j])
|
||||||
|
, Z(Zijk_[k + No*j + No*No*i])
|
||||||
double A(Tijk_[i + No*j + No*No*k]);
|
, A(maybeConjugate<F>(Tijk_[i + No*j + No*No*k]))
|
||||||
double B(Tijk_[i + No*k + No*No*j]);
|
, B(maybeConjugate<F>(Tijk_[i + No*k + No*No*j]))
|
||||||
double C(Tijk_[j + No*i + No*No*k]);
|
, C(maybeConjugate<F>(Tijk_[j + No*i + No*No*k]))
|
||||||
double D(Tijk_[j + No*k + No*No*i]);
|
, D(maybeConjugate<F>(Tijk_[j + No*k + No*No*i]))
|
||||||
double E(Tijk_[k + No*i + No*No*j]);
|
, E(maybeConjugate<F>(Tijk_[k + No*i + No*No*j]))
|
||||||
double F(Tijk_[k + No*j + No*No*i]);
|
, _F(maybeConjugate<F>(Tijk_[k + No*j + No*No*i]))
|
||||||
double value(3.0*(A*U+B*V+C*W+D*X+E*Y+F*Z)
|
, value
|
||||||
+((U+X+Y)-2.0*(V+W+Z))*(A+D+E)
|
= 3.0 * ( A * U
|
||||||
+((V+W+Z)-2.0*(U+X+Y))*(B+C+F));
|
+ B * V
|
||||||
energy += 2.0*value / denominator * facjk * facij;
|
+ C * W
|
||||||
|
+ D * X
|
||||||
|
+ E * Y
|
||||||
|
+ _F * Z )
|
||||||
|
+ ( ( U + X + Y )
|
||||||
|
- 2.0 * ( V + W + Z )
|
||||||
|
) * ( A + D + E )
|
||||||
|
+ ( ( V + W + Z )
|
||||||
|
- 2.0 * ( U + X + Y )
|
||||||
|
) * ( B + C + _F )
|
||||||
|
;
|
||||||
|
energy += 2.0 * value / denominator * facjk * facij;
|
||||||
} // i
|
} // i
|
||||||
} // j
|
} // j
|
||||||
} // k
|
} // k
|
||||||
} // ii
|
} // ii
|
||||||
} // jj
|
} // jj
|
||||||
} // kk
|
} // kk
|
||||||
return energy;
|
return std::real(energy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
double getEnergySame
|
double getEnergySame
|
||||||
( const double epsabc
|
( const F epsabc
|
||||||
, std::vector<double> const& epsi
|
, std::vector<F> const& epsi
|
||||||
, std::vector<double> const& Tijk_
|
, std::vector<F> const& Tijk_
|
||||||
, std::vector<double> const& Zijk_
|
, std::vector<F> const& Zijk_
|
||||||
) {
|
) {
|
||||||
constexpr size_t blockSize = 16;
|
constexpr size_t blockSize = 16;
|
||||||
const size_t No = epsi.size();
|
const size_t No = epsi.size();
|
||||||
double energy(0.);
|
F energy = F(0.);
|
||||||
for (size_t kk=0; kk<No; kk+=blockSize){
|
for (size_t kk=0; kk<No; kk+=blockSize){
|
||||||
const size_t kend( std::min( kk+blockSize, No) );
|
const size_t kend( std::min( kk+blockSize, No) );
|
||||||
for (size_t jj(kk); jj<No; jj+=blockSize){
|
for (size_t jj(kk); jj<No; jj+=blockSize){
|
||||||
@ -89,42 +102,50 @@ namespace atrip {
|
|||||||
for (size_t ii(jj); ii<No; ii+=blockSize){
|
for (size_t ii(jj); ii<No; ii+=blockSize){
|
||||||
const size_t iend( std::min( ii+blockSize, No) );
|
const size_t iend( std::min( ii+blockSize, No) );
|
||||||
for (size_t k(kk); k < kend; k++){
|
for (size_t k(kk); k < kend; k++){
|
||||||
const double ek(epsi[k]);
|
const F ek(epsi[k]);
|
||||||
const size_t jstart = jj > k ? jj : k;
|
const size_t jstart = jj > k ? jj : k;
|
||||||
for(size_t j(jstart); j < jend; j++){
|
for(size_t j(jstart); j < jend; j++){
|
||||||
const double facjk( j == k ? 0.5 : 1.0);
|
const F facjk( j == k ? F(0.5) : F(1.0));
|
||||||
const double ej(epsi[j]);
|
const F ej(epsi[j]);
|
||||||
const size_t istart = ii > j ? ii : j;
|
const size_t istart = ii > j ? ii : j;
|
||||||
for(size_t i(istart); i < iend; i++){
|
for(size_t i(istart); i < iend; i++){
|
||||||
double ei(epsi[i]);
|
const F
|
||||||
double facij ( i==j ? 0.5 : 1.0);
|
ei(epsi[i])
|
||||||
double denominator(epsabc - ei - ej - ek);
|
, facij ( i==j ? F(0.5) : F(1.0))
|
||||||
double U(Zijk_[i + No*j + No*No*k]);
|
, denominator(epsabc - ei - ej - ek)
|
||||||
double V(Zijk_[j + No*k + No*No*i]);
|
, U(Zijk_[i + No*j + No*No*k])
|
||||||
double W(Zijk_[k + No*i + No*No*j]);
|
, V(Zijk_[j + No*k + No*No*i])
|
||||||
double A(Tijk_[i + No*j + No*No*k]);
|
, W(Zijk_[k + No*i + No*No*j])
|
||||||
double B(Tijk_[j + No*k + No*No*i]);
|
, A(maybeConjugate<F>(Tijk_[i + No*j + No*No*k]))
|
||||||
double C(Tijk_[k + No*i + No*No*j]);
|
, B(maybeConjugate<F>(Tijk_[j + No*k + No*No*i]))
|
||||||
double value(3.0*( A*U + B*V + C*W) - (A+B+C)*(U+V+W));
|
, C(maybeConjugate<F>(Tijk_[k + No*i + No*No*j]))
|
||||||
energy += 2.0*value / denominator * facjk * facij;
|
, value
|
||||||
|
= F(3.0) * ( A * U
|
||||||
|
+ B * V
|
||||||
|
+ C * W
|
||||||
|
)
|
||||||
|
- ( A + B + C ) * ( U + V + W )
|
||||||
|
;
|
||||||
|
energy += F(2.0) * value / denominator * facjk * facij;
|
||||||
} // i
|
} // i
|
||||||
} // j
|
} // j
|
||||||
} // k
|
} // k
|
||||||
} // ii
|
} // ii
|
||||||
} // jj
|
} // jj
|
||||||
} // kk
|
} // kk
|
||||||
return energy;
|
return std::real(energy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
void singlesContribution
|
void singlesContribution
|
||||||
( size_t No
|
( size_t No
|
||||||
, size_t Nv
|
, size_t Nv
|
||||||
, const ABCTuple &abc
|
, const ABCTuple &abc
|
||||||
, double const* Tph
|
, F const* Tph
|
||||||
, double const* VABij
|
, F const* VABij
|
||||||
, double const* VACij
|
, F const* VACij
|
||||||
, double const* VBCij
|
, F const* VBCij
|
||||||
, double *Zijk
|
, F *Zijk
|
||||||
) {
|
) {
|
||||||
const size_t a(abc[0]), b(abc[1]), c(abc[2]);
|
const size_t a(abc[0]), b(abc[1]), c(abc[2]);
|
||||||
for (size_t k=0; k < No; k++)
|
for (size_t k=0; k < No; k++)
|
||||||
@ -139,80 +160,84 @@ namespace atrip {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
void doublesContribution
|
void doublesContribution
|
||||||
( const ABCTuple &abc
|
( const ABCTuple &abc
|
||||||
, size_t const No
|
, size_t const No
|
||||||
, size_t const Nv
|
, size_t const Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, double const* VABph
|
, F const* VABph
|
||||||
, double const* VACph
|
, F const* VACph
|
||||||
, double const* VBCph
|
, F const* VBCph
|
||||||
, double const* VBAph
|
, F const* VBAph
|
||||||
, double const* VCAph
|
, F const* VCAph
|
||||||
, double const* VCBph
|
, F const* VCBph
|
||||||
// -- VHHHA
|
// -- VHHHA
|
||||||
, double const* VhhhA
|
, F const* VhhhA
|
||||||
, double const* VhhhB
|
, F const* VhhhB
|
||||||
, double const* VhhhC
|
, F const* VhhhC
|
||||||
// -- TA
|
// -- TA
|
||||||
, double const* TAphh
|
, F const* TAphh
|
||||||
, double const* TBphh
|
, F const* TBphh
|
||||||
, double const* TCphh
|
, F const* TCphh
|
||||||
// -- TABIJ
|
// -- TABIJ
|
||||||
, double const* TABhh
|
, F const* TABhh
|
||||||
, double const* TAChh
|
, F const* TAChh
|
||||||
, double const* TBChh
|
, F const* TBChh
|
||||||
// -- TIJK
|
// -- TIJK
|
||||||
, double *Tijk
|
, F *Tijk
|
||||||
) {
|
) {
|
||||||
|
|
||||||
const size_t a = abc[0], b = abc[1], c = abc[2]
|
const size_t a = abc[0], b = abc[1], c = abc[2]
|
||||||
, NoNo = No*No, NoNv = No*Nv
|
, NoNo = No*No, NoNv = No*Nv
|
||||||
;
|
;
|
||||||
|
|
||||||
#if defined(ATRIP_USE_DGEMM)
|
#if defined(ATRIP_USE_DGEMM)
|
||||||
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
||||||
#define REORDER(__II, __JJ, __KK) \
|
#define REORDER(__II, __JJ, __KK) \
|
||||||
WITH_CHRONO("double:reorder", \
|
WITH_CHRONO("doubles:reorder", \
|
||||||
for (size_t k = 0; k < No; k++) \
|
for (size_t k = 0; k < No; k++) \
|
||||||
for (size_t j = 0; j < No; j++) \
|
for (size_t j = 0; j < No; j++) \
|
||||||
for (size_t i = 0; i < No; i++) { \
|
for (size_t i = 0; i < No; i++) { \
|
||||||
Tijk[_IJK_(i, j, k)] \
|
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
||||||
+= _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
} \
|
||||||
} \
|
)
|
||||||
)
|
#define DGEMM_PARTICLES(__A, __B) \
|
||||||
#define DGEMM_PARTICLES(__A, __B) \
|
atrip::xgemm<F>( "T" \
|
||||||
atrip::dgemm_("T", \
|
, "N" \
|
||||||
"N", \
|
, (int const*)&NoNo \
|
||||||
(int const*)&NoNo, \
|
, (int const*)&No \
|
||||||
(int const*)&No, \
|
, (int const*)&Nv \
|
||||||
(int const*)&Nv, \
|
, &one \
|
||||||
&one, \
|
, __A \
|
||||||
__A, \
|
, (int const*)&Nv \
|
||||||
(int const*)&Nv, \
|
, __B \
|
||||||
__B, \
|
, (int const*)&Nv \
|
||||||
(int const*)&Nv, \
|
, &zero \
|
||||||
&zero, \
|
, _t_buffer.data() \
|
||||||
_t_buffer.data(), \
|
, (int const*)&NoNo \
|
||||||
(int const*)&NoNo);
|
);
|
||||||
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||||
atrip::dgemm_("N", \
|
atrip::xgemm<F>( "N" \
|
||||||
__TRANSB, \
|
, __TRANSB \
|
||||||
(int const*)&NoNo, \
|
, (int const*)&NoNo \
|
||||||
(int const*)&No, \
|
, (int const*)&No \
|
||||||
(int const*)&No, \
|
, (int const*)&No \
|
||||||
&m_one, \
|
, &m_one \
|
||||||
__A, \
|
, __A \
|
||||||
(int const*)&NoNo, \
|
, (int const*)&NoNo \
|
||||||
__B, \
|
, __B \
|
||||||
(int const*)&No, \
|
, (int const*)&No \
|
||||||
&zero, \
|
, &zero \
|
||||||
_t_buffer.data(), \
|
, _t_buffer.data() \
|
||||||
(int const*)&NoNo);
|
, (int const*)&NoNo \
|
||||||
|
);
|
||||||
|
#define MAYBE_CONJ(_conj, _buffer) \
|
||||||
|
for (size_t __i = 0; __i < NoNoNo; ++__i) \
|
||||||
|
_conj[__i] = maybeConjugate<F>(_buffer[__i]); \
|
||||||
|
|
||||||
using F = double;
|
|
||||||
const size_t NoNoNo = No*NoNo;
|
const size_t NoNoNo = No*NoNo;
|
||||||
std::vector<double> _t_buffer;
|
std::vector<F> _t_buffer;
|
||||||
_t_buffer.reserve(NoNoNo);
|
_t_buffer.reserve(NoNoNo);
|
||||||
F one{1.0}, m_one{-1.0}, zero{0.0};
|
F one{1.0}, m_one{-1.0}, zero{0.0};
|
||||||
|
|
||||||
@ -221,81 +246,92 @@ namespace atrip {
|
|||||||
Tijk[k] = 0.0;
|
Tijk[k] = 0.0;
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// TOMERGE: replace chronos
|
||||||
WITH_CHRONO("doubles:holes",
|
WITH_CHRONO("doubles:holes",
|
||||||
{ // Holes part ================================================
|
{ // Holes part %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
|
||||||
WITH_CHRONO("doubles:holes:1",
|
|
||||||
DGEMM_HOLES(VhhhC, TABhh, "N")
|
|
||||||
REORDER(i, k, j)
|
|
||||||
)
|
|
||||||
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
|
|
||||||
WITH_CHRONO("doubles:holes:2",
|
|
||||||
DGEMM_HOLES(VhhhC, TABhh, "T")
|
|
||||||
REORDER(j, k, i)
|
|
||||||
)
|
|
||||||
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
|
|
||||||
WITH_CHRONO("doubles:holes:3",
|
|
||||||
DGEMM_HOLES(VhhhB, TAChh, "N")
|
|
||||||
REORDER(i, j, k)
|
|
||||||
)
|
|
||||||
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
|
|
||||||
WITH_CHRONO("doubles:holes:4",
|
|
||||||
DGEMM_HOLES(VhhhB, TAChh, "T")
|
|
||||||
REORDER(k, j, i)
|
|
||||||
)
|
|
||||||
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
|
|
||||||
WITH_CHRONO("doubles:holes:5",
|
|
||||||
DGEMM_HOLES(VhhhA, TBChh, "N")
|
|
||||||
REORDER(j, i, k)
|
|
||||||
)
|
|
||||||
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
|
|
||||||
WITH_CHRONO("doubles:holes:6",
|
|
||||||
DGEMM_HOLES(VhhhA, TBChh, "T")
|
|
||||||
REORDER(k, i, j)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
WITH_CHRONO("doubles:particles",
|
std::vector<F> _vhhh(NoNoNo);
|
||||||
{ // Particle part ===========================================
|
|
||||||
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
|
|
||||||
WITH_CHRONO("doubles:particles:1",
|
|
||||||
DGEMM_PARTICLES(TAphh, VBCph)
|
|
||||||
REORDER(i, j, k)
|
|
||||||
)
|
|
||||||
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
|
|
||||||
WITH_CHRONO("doubles:particles:2",
|
|
||||||
DGEMM_PARTICLES(TAphh, VCBph)
|
|
||||||
REORDER(i, k, j)
|
|
||||||
)
|
|
||||||
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
|
|
||||||
WITH_CHRONO("doubles:particles:3",
|
|
||||||
DGEMM_PARTICLES(TCphh, VABph)
|
|
||||||
REORDER(k, i, j)
|
|
||||||
)
|
|
||||||
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
|
|
||||||
WITH_CHRONO("doubles:particles:4",
|
|
||||||
DGEMM_PARTICLES(TCphh, VBAph)
|
|
||||||
REORDER(k, j, i)
|
|
||||||
)
|
|
||||||
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
|
|
||||||
WITH_CHRONO("doubles:particles:5",
|
|
||||||
DGEMM_PARTICLES(TBphh, VACph)
|
|
||||||
REORDER(j, i, k)
|
|
||||||
)
|
|
||||||
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
|
|
||||||
WITH_CHRONO("doubles:particles:6",
|
|
||||||
DGEMM_PARTICLES(TBphh, VCAph)
|
|
||||||
REORDER(j, k, i)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
#undef REORDER
|
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
||||||
#undef DGEMM_HOLES
|
MAYBE_CONJ(_vhhh, VhhhC)
|
||||||
#undef DGEMM_PARTICLES
|
WITH_CHRONO("doubles:holes:1",
|
||||||
#undef _IJK_
|
DGEMM_HOLES(_vhhh.data(), TABhh, "N")
|
||||||
#else
|
REORDER(i, k, j)
|
||||||
|
)
|
||||||
|
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
|
||||||
|
WITH_CHRONO("doubles:holes:2",
|
||||||
|
DGEMM_HOLES(_vhhh.data(), TABhh, "T")
|
||||||
|
REORDER(j, k, i)
|
||||||
|
)
|
||||||
|
|
||||||
|
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
|
||||||
|
MAYBE_CONJ(_vhhh, VhhhB)
|
||||||
|
WITH_CHRONO("doubles:holes:3",
|
||||||
|
DGEMM_HOLES(_vhhh.data(), TAChh, "N")
|
||||||
|
REORDER(i, j, k)
|
||||||
|
)
|
||||||
|
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
|
||||||
|
WITH_CHRONO("doubles:holes:4",
|
||||||
|
DGEMM_HOLES(_vhhh.data(), TAChh, "T")
|
||||||
|
REORDER(k, j, i)
|
||||||
|
)
|
||||||
|
|
||||||
|
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
|
||||||
|
MAYBE_CONJ(_vhhh, VhhhA)
|
||||||
|
WITH_CHRONO("doubles:holes:5",
|
||||||
|
DGEMM_HOLES(_vhhh.data(), TBChh, "N")
|
||||||
|
REORDER(j, i, k)
|
||||||
|
)
|
||||||
|
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
|
||||||
|
WITH_CHRONO("doubles:holes:6",
|
||||||
|
DGEMM_HOLES(_vhhh.data(), TBChh, "T")
|
||||||
|
REORDER(k, i, j)
|
||||||
|
)
|
||||||
|
|
||||||
|
}
|
||||||
|
)
|
||||||
|
#undef MAYBE_CONJ
|
||||||
|
|
||||||
|
WITH_CHRONO("doubles:particles",
|
||||||
|
{ // Particle part %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
|
||||||
|
WITH_CHRONO("doubles:particles:1",
|
||||||
|
DGEMM_PARTICLES(TAphh, VBCph)
|
||||||
|
REORDER(i, j, k)
|
||||||
|
)
|
||||||
|
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
|
||||||
|
WITH_CHRONO("doubles:particles:2",
|
||||||
|
DGEMM_PARTICLES(TAphh, VCBph)
|
||||||
|
REORDER(i, k, j)
|
||||||
|
)
|
||||||
|
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
|
||||||
|
WITH_CHRONO("doubles:particles:3",
|
||||||
|
DGEMM_PARTICLES(TCphh, VABph)
|
||||||
|
REORDER(k, i, j)
|
||||||
|
)
|
||||||
|
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
|
||||||
|
WITH_CHRONO("doubles:particles:4",
|
||||||
|
DGEMM_PARTICLES(TCphh, VBAph)
|
||||||
|
REORDER(k, j, i)
|
||||||
|
)
|
||||||
|
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
|
||||||
|
WITH_CHRONO("doubles:particles:5",
|
||||||
|
DGEMM_PARTICLES(TBphh, VACph)
|
||||||
|
REORDER(j, i, k)
|
||||||
|
)
|
||||||
|
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
|
||||||
|
WITH_CHRONO("doubles:particles:6",
|
||||||
|
DGEMM_PARTICLES(TBphh, VCAph)
|
||||||
|
REORDER(j, k, i)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
#undef REORDER
|
||||||
|
#undef DGEMM_HOLES
|
||||||
|
#undef DGEMM_PARTICLES
|
||||||
|
#undef _IJK_
|
||||||
|
#else
|
||||||
for (size_t k = 0; k < No; k++)
|
for (size_t k = 0; k < No; k++)
|
||||||
for (size_t j = 0; j < No; j++)
|
for (size_t j = 0; j < No; j++)
|
||||||
for (size_t i = 0; i < No; i++){
|
for (size_t i = 0; i < No; i++){
|
||||||
|
|||||||
@ -22,6 +22,8 @@
|
|||||||
#include <atrip/Tuples.hpp>
|
#include <atrip/Tuples.hpp>
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
struct RankMap {
|
struct RankMap {
|
||||||
|
|
||||||
static bool RANK_ROUND_ROBIN;
|
static bool RANK_ROUND_ROBIN;
|
||||||
@ -37,7 +39,7 @@ namespace atrip {
|
|||||||
, clusterInfo(getClusterInfo(comm))
|
, clusterInfo(getClusterInfo(comm))
|
||||||
{ assert(lengths.size() <= 2); }
|
{ assert(lengths.size() <= 2); }
|
||||||
|
|
||||||
size_t find(Slice::Location const& p) const noexcept {
|
size_t find(typename Slice<F>::Location const& p) const noexcept {
|
||||||
if (RANK_ROUND_ROBIN) {
|
if (RANK_ROUND_ROBIN) {
|
||||||
return p.source * np + p.rank;
|
return p.source * np + p.rank;
|
||||||
} else {
|
} else {
|
||||||
@ -67,11 +69,11 @@ namespace atrip {
|
|||||||
return source == nSources() && isPaddingRank(rank);
|
return source == nSources() && isPaddingRank(rank);
|
||||||
}
|
}
|
||||||
|
|
||||||
Slice::Location
|
typename Slice<F>::Location
|
||||||
find(ABCTuple const& abc, Slice::Type sliceType) const {
|
find(ABCTuple const& abc, typename Slice<F>::Type sliceType) const {
|
||||||
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
|
// tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB
|
||||||
// tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A
|
// tuple = {11, 0} when abc = {11, 8, 9} and sliceType = A
|
||||||
const auto tuple = Slice::subtupleBySlice(abc, sliceType);
|
const auto tuple = Slice<F>::subtupleBySlice(abc, sliceType);
|
||||||
|
|
||||||
const size_t index
|
const size_t index
|
||||||
= tuple[0]
|
= tuple[0]
|
||||||
|
|||||||
@ -21,13 +21,26 @@
|
|||||||
|
|
||||||
#include <atrip/Tuples.hpp>
|
#include <atrip/Tuples.hpp>
|
||||||
#include <atrip/Utils.hpp>
|
#include <atrip/Utils.hpp>
|
||||||
|
#include <atrip/Blas.hpp>
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
|
template <typename FF> FF maybeConjugate(const FF a) { return a; }
|
||||||
|
template <> Complex maybeConjugate(const Complex a) { return std::conj(a); }
|
||||||
|
|
||||||
|
namespace traits {
|
||||||
|
template <typename FF> bool isComplex() { return false; }
|
||||||
|
template <> bool isComplex<Complex>() { return true; }
|
||||||
|
namespace mpi {
|
||||||
|
template <typename FF> MPI_Datatype datatypeOf(void);
|
||||||
|
template <> MPI_Datatype datatypeOf<double>() { return MPI_DOUBLE; }
|
||||||
|
template <> MPI_Datatype datatypeOf<Complex>() { return MPI_DOUBLE_COMPLEX; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
struct Slice {
|
struct Slice {
|
||||||
|
|
||||||
using F = double;
|
|
||||||
// Prolog:1 ends here
|
// Prolog:1 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Location][Location:1]]
|
// [[file:../../atrip.org::*Location][Location:1]]
|
||||||
@ -99,8 +112,8 @@ enum Name
|
|||||||
|
|
||||||
// [[file:../../atrip.org::*Database][Database:1]]
|
// [[file:../../atrip.org::*Database][Database:1]]
|
||||||
struct LocalDatabaseElement {
|
struct LocalDatabaseElement {
|
||||||
Slice::Name name;
|
Slice<F>::Name name;
|
||||||
Slice::Info info;
|
Slice<F>::Info info;
|
||||||
};
|
};
|
||||||
// Database:1 ends here
|
// Database:1 ends here
|
||||||
|
|
||||||
@ -123,50 +136,60 @@ struct mpi {
|
|||||||
constexpr int n = 2;
|
constexpr int n = 2;
|
||||||
// create a sliceLocation to measure in the current architecture
|
// create a sliceLocation to measure in the current architecture
|
||||||
// the packing of the struct
|
// the packing of the struct
|
||||||
Slice::Location measure;
|
Slice<F>::Location measure;
|
||||||
MPI_Datatype dt;
|
MPI_Datatype dt;
|
||||||
const std::vector<int> lengths(n, 1);
|
const std::vector<int> lengths(n, 1);
|
||||||
const MPI_Datatype types[n] = {usizeDt(), usizeDt()};
|
const MPI_Datatype types[n] = {usizeDt(), usizeDt()};
|
||||||
|
|
||||||
|
static_assert(sizeof(Slice<F>::Location) == 2 * sizeof(size_t),
|
||||||
|
"The Location packing is wrong in your compiler");
|
||||||
|
|
||||||
// measure the displacements in the struct
|
// measure the displacements in the struct
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
MPI_Aint displacements[n];
|
MPI_Aint base_address, displacements[n];
|
||||||
|
MPI_Get_address(&measure, &base_address);
|
||||||
MPI_Get_address(&measure.rank, &displacements[j++]);
|
MPI_Get_address(&measure.rank, &displacements[j++]);
|
||||||
MPI_Get_address(&measure.source, &displacements[j++]);
|
MPI_Get_address(&measure.source, &displacements[j++]);
|
||||||
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
|
for (size_t i = 0; i < n; i++)
|
||||||
displacements[0] = 0;
|
displacements[i] = MPI_Aint_diff(displacements[i], base_address);
|
||||||
|
|
||||||
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
||||||
MPI_Type_commit(&dt);
|
MPI_Type_commit(&dt);
|
||||||
return dt;
|
return dt;
|
||||||
}
|
}
|
||||||
|
|
||||||
static MPI_Datatype enumDt() { return MPI_INT; }
|
|
||||||
static MPI_Datatype usizeDt() { return MPI_UINT64_T; }
|
static MPI_Datatype usizeDt() { return MPI_UINT64_T; }
|
||||||
|
|
||||||
static MPI_Datatype sliceInfo () {
|
static MPI_Datatype sliceInfo () {
|
||||||
constexpr int n = 5;
|
constexpr int n = 5;
|
||||||
MPI_Datatype dt;
|
MPI_Datatype dt;
|
||||||
Slice::Info measure;
|
Slice<F>::Info measure;
|
||||||
const std::vector<int> lengths(n, 1);
|
const std::vector<int> lengths(n, 1);
|
||||||
const MPI_Datatype types[n]
|
const MPI_Datatype types[n]
|
||||||
= { vector(2, usizeDt())
|
= { vector(2, usizeDt())
|
||||||
, enumDt()
|
, vector(sizeof(enum Type), MPI_CHAR)
|
||||||
, enumDt()
|
, vector(sizeof(enum State), MPI_CHAR)
|
||||||
, sliceLocation()
|
, sliceLocation()
|
||||||
, enumDt()
|
, vector(sizeof(enum Type), MPI_CHAR)
|
||||||
|
// TODO: Why this does not work on intel mpi?
|
||||||
|
/*, MPI_UINT64_T*/
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static_assert(sizeof(enum Type) == 4, "Enum type not 4 bytes long");
|
||||||
|
static_assert(sizeof(enum State) == 4, "Enum State not 4 bytes long");
|
||||||
|
static_assert(sizeof(enum Name) == 4, "Enum Name not 4 bytes long");
|
||||||
|
|
||||||
// create the displacements from the info measurement struct
|
// create the displacements from the info measurement struct
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
MPI_Aint displacements[n];
|
MPI_Aint base_address, displacements[n];
|
||||||
MPI_Get_address(measure.tuple.data(), &displacements[j++]);
|
MPI_Get_address(&measure, &base_address);
|
||||||
|
MPI_Get_address(&measure.tuple[0], &displacements[j++]);
|
||||||
MPI_Get_address(&measure.type, &displacements[j++]);
|
MPI_Get_address(&measure.type, &displacements[j++]);
|
||||||
MPI_Get_address(&measure.state, &displacements[j++]);
|
MPI_Get_address(&measure.state, &displacements[j++]);
|
||||||
MPI_Get_address(&measure.from, &displacements[j++]);
|
MPI_Get_address(&measure.from, &displacements[j++]);
|
||||||
MPI_Get_address(&measure.recycling, &displacements[j++]);
|
MPI_Get_address(&measure.recycling, &displacements[j++]);
|
||||||
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
|
for (size_t i = 0; i < n; i++)
|
||||||
displacements[0] = 0;
|
displacements[i] = MPI_Aint_diff(displacements[i], base_address);
|
||||||
|
|
||||||
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
||||||
MPI_Type_commit(&dt);
|
MPI_Type_commit(&dt);
|
||||||
@ -179,20 +202,26 @@ struct mpi {
|
|||||||
LocalDatabaseElement measure;
|
LocalDatabaseElement measure;
|
||||||
const std::vector<int> lengths(n, 1);
|
const std::vector<int> lengths(n, 1);
|
||||||
const MPI_Datatype types[n]
|
const MPI_Datatype types[n]
|
||||||
= { enumDt()
|
= { vector(sizeof(enum Name), MPI_CHAR)
|
||||||
, sliceInfo()
|
, sliceInfo()
|
||||||
};
|
};
|
||||||
|
|
||||||
// measure the displacements in the struct
|
// measure the displacements in the struct
|
||||||
size_t j = 0;
|
size_t j = 0;
|
||||||
MPI_Aint displacements[n];
|
MPI_Aint base_address, displacements[n];
|
||||||
|
MPI_Get_address(&measure, &base_address);
|
||||||
MPI_Get_address(&measure.name, &displacements[j++]);
|
MPI_Get_address(&measure.name, &displacements[j++]);
|
||||||
MPI_Get_address(&measure.info, &displacements[j++]);
|
MPI_Get_address(&measure.info, &displacements[j++]);
|
||||||
for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0];
|
for (size_t i = 0; i < n; i++)
|
||||||
displacements[0] = 0;
|
displacements[i] = MPI_Aint_diff(displacements[i], base_address);
|
||||||
|
|
||||||
|
static_assert( sizeof(LocalDatabaseElement) == sizeof(measure)
|
||||||
|
, "Measure has bad size");
|
||||||
|
|
||||||
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt);
|
||||||
MPI_Type_commit(&dt);
|
MPI_Type_commit(&dt);
|
||||||
|
return vector(sizeof(LocalDatabaseElement), MPI_CHAR);
|
||||||
|
// TODO: write tests in order to know if this works
|
||||||
return dt;
|
return dt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -218,32 +247,32 @@ PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
|
|||||||
// Static utilities:1 ends here
|
// Static utilities:1 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Static utilities][Static utilities:2]]
|
// [[file:../../atrip.org::*Static utilities][Static utilities:2]]
|
||||||
static std::vector<Slice*> hasRecycledReferencingToIt
|
static std::vector<Slice<F>*> hasRecycledReferencingToIt
|
||||||
( std::vector<Slice> &slices
|
( std::vector<Slice<F>> &slices
|
||||||
, Info const& info
|
, Info const& info
|
||||||
) {
|
) {
|
||||||
std::vector<Slice*> result;
|
std::vector<Slice<F>*> result;
|
||||||
|
|
||||||
for (auto& s: slices)
|
for (auto& s: slices)
|
||||||
if ( s.info.recycling == info.type
|
if ( s.info.recycling == info.type
|
||||||
&& s.info.tuple == info.tuple
|
&& s.info.tuple == info.tuple
|
||||||
&& s.info.state == Recycled
|
&& s.info.state == Recycled
|
||||||
) result.push_back(&s);
|
) result.push_back(&s);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
// Static utilities:2 ends here
|
// Static utilities:2 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Static utilities][Static utilities:3]]
|
// [[file:../../atrip.org::*Static utilities][Static utilities:3]]
|
||||||
static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
|
static Slice<F>& findOneByType(std::vector<Slice<F>> &slices, Slice<F>::Type type) {
|
||||||
const auto sliceIt
|
const auto sliceIt
|
||||||
= std::find_if(slices.begin(), slices.end(),
|
= std::find_if(slices.begin(), slices.end(),
|
||||||
[&type](Slice const& s) {
|
[&type](Slice<F> const& s) {
|
||||||
return type == s.info.type;
|
return type == s.info.type;
|
||||||
});
|
});
|
||||||
WITH_CRAZY_DEBUG
|
WITH_CRAZY_DEBUG
|
||||||
WITH_RANK
|
WITH_RANK
|
||||||
<< "__slice__:find:looking for " << type << "\n";
|
<< "\t__ looking for " << type << "\n";
|
||||||
if (sliceIt == slices.end())
|
if (sliceIt == slices.end())
|
||||||
throw std::domain_error("Slice by type not found!");
|
throw std::domain_error("Slice by type not found!");
|
||||||
return *sliceIt;
|
return *sliceIt;
|
||||||
@ -251,80 +280,80 @@ static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
|
|||||||
// Static utilities:3 ends here
|
// Static utilities:3 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Static utilities][Static utilities:4]]
|
// [[file:../../atrip.org::*Static utilities][Static utilities:4]]
|
||||||
static Slice&
|
static Slice<F>&
|
||||||
findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
|
findRecycledSource (std::vector<Slice<F>> &slices, Slice<F>::Info info) {
|
||||||
const auto sliceIt
|
const auto sliceIt
|
||||||
= std::find_if(slices.begin(), slices.end(),
|
= std::find_if(slices.begin(), slices.end(),
|
||||||
[&info](Slice const& s) {
|
[&info](Slice<F> const& s) {
|
||||||
return info.recycling == s.info.type
|
return info.recycling == s.info.type
|
||||||
&& info.tuple == s.info.tuple
|
&& info.tuple == s.info.tuple
|
||||||
&& State::Recycled != s.info.state
|
&& State::Recycled != s.info.state
|
||||||
;
|
;
|
||||||
});
|
});
|
||||||
|
|
||||||
WITH_CRAZY_DEBUG
|
WITH_CRAZY_DEBUG
|
||||||
WITH_RANK << "__slice__:find: recycling source of "
|
WITH_RANK << "__slice__:find: recycling source of "
|
||||||
<< pretty_print(info) << "\n";
|
<< pretty_print(info) << "\n";
|
||||||
if (sliceIt == slices.end())
|
if (sliceIt == slices.end())
|
||||||
throw std::domain_error( "Slice not found: "
|
throw std::domain_error( "Slice not found: "
|
||||||
+ pretty_print(info)
|
+ pretty_print(info)
|
||||||
+ " rank: "
|
+ " rank: "
|
||||||
+ pretty_print(Atrip::rank)
|
+ pretty_print(Atrip::rank)
|
||||||
);
|
);
|
||||||
WITH_RANK << "__slice__:find: " << pretty_print(sliceIt->info) << "\n";
|
WITH_RANK << "__slice__:find: " << pretty_print(sliceIt->info) << "\n";
|
||||||
return *sliceIt;
|
return *sliceIt;
|
||||||
}
|
}
|
||||||
// Static utilities:4 ends here
|
// Static utilities:4 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Static utilities][Static utilities:5]]
|
// [[file:../../atrip.org::*Static utilities][Static utilities:5]]
|
||||||
static Slice& findByTypeAbc
|
static Slice<F>& findByTypeAbc
|
||||||
( std::vector<Slice> &slices
|
( std::vector<Slice<F>> &slices
|
||||||
, Slice::Type type
|
, Slice<F>::Type type
|
||||||
, ABCTuple const& abc
|
, ABCTuple const& abc
|
||||||
) {
|
) {
|
||||||
const auto tuple = Slice::subtupleBySlice(abc, type);
|
const auto tuple = Slice<F>::subtupleBySlice(abc, type);
|
||||||
const auto sliceIt
|
const auto sliceIt
|
||||||
= std::find_if(slices.begin(), slices.end(),
|
= std::find_if(slices.begin(), slices.end(),
|
||||||
[&type, &tuple](Slice const& s) {
|
[&type, &tuple](Slice<F> const& s) {
|
||||||
return type == s.info.type
|
return type == s.info.type
|
||||||
&& tuple == s.info.tuple
|
&& tuple == s.info.tuple
|
||||||
;
|
;
|
||||||
});
|
});
|
||||||
WITH_CRAZY_DEBUG
|
WITH_CRAZY_DEBUG
|
||||||
WITH_RANK << "__slice__:find:" << type << " and tuple "
|
WITH_RANK << "__slice__:find:" << type << " and tuple "
|
||||||
<< pretty_print(tuple)
|
<< pretty_print(tuple)
|
||||||
<< "\n";
|
<< "\n";
|
||||||
if (sliceIt == slices.end())
|
if (sliceIt == slices.end())
|
||||||
throw std::domain_error( "Slice not found: "
|
throw std::domain_error( "Slice not found: "
|
||||||
+ pretty_print(tuple)
|
+ pretty_print(tuple)
|
||||||
+ ", "
|
+ ", "
|
||||||
+ pretty_print(type)
|
+ pretty_print(type)
|
||||||
+ " rank: "
|
+ " rank: "
|
||||||
+ pretty_print(Atrip::rank)
|
+ pretty_print(Atrip::rank)
|
||||||
);
|
);
|
||||||
return *sliceIt;
|
return *sliceIt;
|
||||||
}
|
}
|
||||||
// Static utilities:5 ends here
|
// Static utilities:5 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Static utilities][Static utilities:6]]
|
// [[file:../../atrip.org::*Static utilities][Static utilities:6]]
|
||||||
static Slice& findByInfo(std::vector<Slice> &slices,
|
static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
|
||||||
Slice::Info const& info) {
|
Slice<F>::Info const& info) {
|
||||||
const auto sliceIt
|
const auto sliceIt
|
||||||
= std::find_if(slices.begin(), slices.end(),
|
= std::find_if(slices.begin(), slices.end(),
|
||||||
[&info](Slice const& s) {
|
[&info](Slice<F> const& s) {
|
||||||
// TODO: maybe implement comparison in Info struct
|
// TODO: maybe implement comparison in Info struct
|
||||||
return info.type == s.info.type
|
return info.type == s.info.type
|
||||||
&& info.state == s.info.state
|
&& info.state == s.info.state
|
||||||
&& info.tuple == s.info.tuple
|
&& info.tuple == s.info.tuple
|
||||||
&& info.from.rank == s.info.from.rank
|
&& info.from.rank == s.info.from.rank
|
||||||
&& info.from.source == s.info.from.source
|
&& info.from.source == s.info.from.source
|
||||||
;
|
;
|
||||||
});
|
});
|
||||||
WITH_CRAZY_DEBUG
|
WITH_CRAZY_DEBUG
|
||||||
WITH_RANK << "__slice__:find:looking for " << pretty_print(info) << "\n";
|
WITH_RANK << "__slice__:find:looking for " << pretty_print(info) << "\n";
|
||||||
if (sliceIt == slices.end())
|
if (sliceIt == slices.end())
|
||||||
throw std::domain_error( "Slice by info not found: "
|
throw std::domain_error( "Slice by info not found: "
|
||||||
+ pretty_print(info));
|
+ pretty_print(info));
|
||||||
return *sliceIt;
|
return *sliceIt;
|
||||||
}
|
}
|
||||||
// Static utilities:6 ends here
|
// Static utilities:6 ends here
|
||||||
@ -457,13 +486,15 @@ Slice(size_t size_)
|
|||||||
// Epilog:1 ends here
|
// Epilog:1 ends here
|
||||||
|
|
||||||
// [[file:../../atrip.org::*Debug][Debug:1]]
|
// [[file:../../atrip.org::*Debug][Debug:1]]
|
||||||
std::ostream& operator<<(std::ostream& out, Slice::Location const& v) {
|
template <typename F=double>
|
||||||
|
std::ostream& operator<<(std::ostream& out, typename Slice<F>::Location const& v) {
|
||||||
// TODO: remove me
|
// TODO: remove me
|
||||||
out << "{.r(" << v.rank << "), .s(" << v.source << ")};";
|
out << "{.r(" << v.rank << "), .s(" << v.source << ")};";
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, Slice::Info const& i) {
|
template <typename F=double>
|
||||||
|
std::ostream& operator<<(std::ostream& out, typename Slice<F>::Info const& i) {
|
||||||
out << "«t" << i.type << ", s" << i.state << "»"
|
out << "«t" << i.type << ", s" << i.state << "»"
|
||||||
<< " ⊙ {" << i.from.rank << ", " << i.from.source << "}"
|
<< " ⊙ {" << i.from.rank << ", " << i.from.source << "}"
|
||||||
<< " ∴ {" << i.tuple[0] << ", " << i.tuple[1] << "}"
|
<< " ∴ {" << i.tuple[0] << ", " << i.tuple[1] << "}"
|
||||||
|
|||||||
@ -20,8 +20,8 @@
|
|||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
struct SliceUnion {
|
struct SliceUnion {
|
||||||
using F = double;
|
|
||||||
using Tensor = CTF::Tensor<F>;
|
using Tensor = CTF::Tensor<F>;
|
||||||
|
|
||||||
virtual void
|
virtual void
|
||||||
@ -34,7 +34,7 @@ namespace atrip {
|
|||||||
* This means that there can be at most one slice with a given Ty_x_Tu.
|
* This means that there can be at most one slice with a given Ty_x_Tu.
|
||||||
*/
|
*/
|
||||||
void checkForDuplicates() const {
|
void checkForDuplicates() const {
|
||||||
std::vector<Slice::Ty_x_Tu> tytus;
|
std::vector<typename Slice<F>::Ty_x_Tu> tytus;
|
||||||
for (auto const& s: slices) {
|
for (auto const& s: slices) {
|
||||||
if (s.isFree()) continue;
|
if (s.isFree()) continue;
|
||||||
tytus.push_back({s.info.type, s.info.tuple});
|
tytus.push_back({s.info.type, s.info.tuple});
|
||||||
@ -47,13 +47,13 @@ namespace atrip {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<Slice::Ty_x_Tu> neededSlices(ABCTuple const& abc) {
|
std::vector<typename Slice<F>::Ty_x_Tu> neededSlices(ABCTuple const& abc) {
|
||||||
std::vector<Slice::Ty_x_Tu> needed(sliceTypes.size());
|
std::vector<typename Slice<F>::Ty_x_Tu> needed(sliceTypes.size());
|
||||||
// build the needed vector
|
// build the needed vector
|
||||||
std::transform(sliceTypes.begin(), sliceTypes.end(),
|
std::transform(sliceTypes.begin(), sliceTypes.end(),
|
||||||
needed.begin(),
|
needed.begin(),
|
||||||
[&abc](Slice::Type const type) {
|
[&abc](typename Slice<F>::Type const type) {
|
||||||
auto tuple = Slice::subtupleBySlice(abc, type);
|
auto tuple = Slice<F>::subtupleBySlice(abc, type);
|
||||||
return std::make_pair(type, tuple);
|
return std::make_pair(type, tuple);
|
||||||
});
|
});
|
||||||
return needed;
|
return needed;
|
||||||
@ -78,8 +78,9 @@ namespace atrip {
|
|||||||
* slices.
|
* slices.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
Slice::LocalDatabase buildLocalDatabase(ABCTuple const& abc) {
|
typename
|
||||||
Slice::LocalDatabase result;
|
Slice<F>::LocalDatabase buildLocalDatabase(ABCTuple const& abc) {
|
||||||
|
typename Slice<F>::LocalDatabase result;
|
||||||
|
|
||||||
auto const needed = neededSlices(abc);
|
auto const needed = neededSlices(abc);
|
||||||
|
|
||||||
@ -109,7 +110,7 @@ namespace atrip {
|
|||||||
// need
|
// need
|
||||||
auto const& it
|
auto const& it
|
||||||
= std::find_if(slices.begin(), slices.end(),
|
= std::find_if(slices.begin(), slices.end(),
|
||||||
[&tuple, &type](Slice const& other) {
|
[&tuple, &type](Slice<F> const& other) {
|
||||||
return other.info.tuple == tuple
|
return other.info.tuple == tuple
|
||||||
&& other.info.type == type
|
&& other.info.type == type
|
||||||
// we only want another slice when it
|
// we only want another slice when it
|
||||||
@ -135,7 +136,7 @@ namespace atrip {
|
|||||||
// tuple and that has a valid data pointer.
|
// tuple and that has a valid data pointer.
|
||||||
auto const& recycleIt
|
auto const& recycleIt
|
||||||
= std::find_if(slices.begin(), slices.end(),
|
= std::find_if(slices.begin(), slices.end(),
|
||||||
[&tuple, &type](Slice const& other) {
|
[&tuple, &type](Slice<F> const& other) {
|
||||||
return other.info.tuple == tuple
|
return other.info.tuple == tuple
|
||||||
&& other.info.type != type
|
&& other.info.type != type
|
||||||
&& other.isRecyclable()
|
&& other.isRecyclable()
|
||||||
@ -146,13 +147,13 @@ namespace atrip {
|
|||||||
// (which should exist by construction :THINK)
|
// (which should exist by construction :THINK)
|
||||||
//
|
//
|
||||||
if (recycleIt != slices.end()) {
|
if (recycleIt != slices.end()) {
|
||||||
auto& blank = Slice::findOneByType(slices, Slice::Blank);
|
auto& blank = Slice<F>::findOneByType(slices, Slice<F>::Blank);
|
||||||
// TODO: formalize this through a method to copy information
|
// TODO: formalize this through a method to copy information
|
||||||
// from another slice
|
// from another slice
|
||||||
blank.data = recycleIt->data;
|
blank.data = recycleIt->data;
|
||||||
blank.info.type = type;
|
blank.info.type = type;
|
||||||
blank.info.tuple = tuple;
|
blank.info.tuple = tuple;
|
||||||
blank.info.state = Slice::Recycled;
|
blank.info.state = Slice<F>::Recycled;
|
||||||
blank.info.from = from;
|
blank.info.from = from;
|
||||||
blank.info.recycling = recycleIt->info.type;
|
blank.info.recycling = recycleIt->info.type;
|
||||||
result.push_back({name, blank.info});
|
result.push_back({name, blank.info});
|
||||||
@ -179,17 +180,17 @@ namespace atrip {
|
|||||||
<< " for tuple " << tuple[0] << ", " << tuple[1]
|
<< " for tuple " << tuple[0] << ", " << tuple[1]
|
||||||
<< "\n"
|
<< "\n"
|
||||||
;
|
;
|
||||||
auto& blank = Slice::findOneByType(slices, Slice::Blank);
|
auto& blank = Slice<F>::findOneByType(slices, Slice<F>::Blank);
|
||||||
blank.info.type = type;
|
blank.info.type = type;
|
||||||
blank.info.tuple = tuple;
|
blank.info.tuple = tuple;
|
||||||
blank.info.from = from;
|
blank.info.from = from;
|
||||||
|
|
||||||
// Handle self sufficiency
|
// Handle self sufficiency
|
||||||
blank.info.state = Atrip::rank == from.rank
|
blank.info.state = Atrip::rank == from.rank
|
||||||
? Slice::SelfSufficient
|
? Slice<F>::SelfSufficient
|
||||||
: Slice::Fetch
|
: Slice<F>::Fetch
|
||||||
;
|
;
|
||||||
if (blank.info.state == Slice::SelfSufficient) {
|
if (blank.info.state == Slice<F>::SelfSufficient) {
|
||||||
blank.data = sources[from.source].data();
|
blank.data = sources[from.source].data();
|
||||||
} else {
|
} else {
|
||||||
if (freePointers.size() == 0) {
|
if (freePointers.size() == 0) {
|
||||||
@ -239,7 +240,7 @@ namespace atrip {
|
|||||||
// try to find the slice in the needed slices list
|
// try to find the slice in the needed slices list
|
||||||
auto const found
|
auto const found
|
||||||
= std::find_if(needed.begin(), needed.end(),
|
= std::find_if(needed.begin(), needed.end(),
|
||||||
[&slice] (Slice::Ty_x_Tu const& tytu) {
|
[&slice] (typename Slice<F>::Ty_x_Tu const& tytu) {
|
||||||
return slice.info.tuple == tytu.second
|
return slice.info.tuple == tytu.second
|
||||||
&& slice.info.type == tytu.first
|
&& slice.info.type == tytu.first
|
||||||
;
|
;
|
||||||
@ -258,7 +259,7 @@ namespace atrip {
|
|||||||
|
|
||||||
// allow to gc unwrapped and recycled, never Fetch,
|
// allow to gc unwrapped and recycled, never Fetch,
|
||||||
// if we have a Fetch slice then something has gone very wrong.
|
// if we have a Fetch slice then something has gone very wrong.
|
||||||
if (!slice.isUnwrapped() && slice.info.state != Slice::Recycled)
|
if (!slice.isUnwrapped() && slice.info.state != Slice<F>::Recycled)
|
||||||
throw
|
throw
|
||||||
std::domain_error("Trying to garbage collect "
|
std::domain_error("Trying to garbage collect "
|
||||||
" a non-unwrapped slice! "
|
" a non-unwrapped slice! "
|
||||||
@ -279,13 +280,13 @@ namespace atrip {
|
|||||||
// - we should make sure that the data pointer of slice
|
// - we should make sure that the data pointer of slice
|
||||||
// does not get freed.
|
// does not get freed.
|
||||||
//
|
//
|
||||||
if (slice.info.state == Slice::Ready) {
|
if (slice.info.state == Slice<F>::Ready) {
|
||||||
WITH_OCD WITH_RANK
|
WITH_OCD WITH_RANK
|
||||||
<< "__gc__:" << "checking for data recycled dependencies\n";
|
<< "__gc__:" << "checking for data recycled dependencies\n";
|
||||||
auto recycled
|
auto recycled
|
||||||
= Slice::hasRecycledReferencingToIt(slices, slice.info);
|
= Slice<F>::hasRecycledReferencingToIt(slices, slice.info);
|
||||||
if (recycled.size()) {
|
if (recycled.size()) {
|
||||||
Slice* newReady = recycled[0];
|
Slice<F>* newReady = recycled[0];
|
||||||
WITH_OCD WITH_RANK
|
WITH_OCD WITH_RANK
|
||||||
<< "__gc__:" << "swaping recycled "
|
<< "__gc__:" << "swaping recycled "
|
||||||
<< pretty_print(newReady->info)
|
<< pretty_print(newReady->info)
|
||||||
@ -310,8 +311,8 @@ namespace atrip {
|
|||||||
|
|
||||||
// if the slice is self sufficient, do not dare touching the
|
// if the slice is self sufficient, do not dare touching the
|
||||||
// pointer, since it is a pointer to our sources in our rank.
|
// pointer, since it is a pointer to our sources in our rank.
|
||||||
if ( slice.info.state == Slice::SelfSufficient
|
if ( slice.info.state == Slice<F>::SelfSufficient
|
||||||
|| slice.info.state == Slice::Recycled
|
|| slice.info.state == Slice<F>::Recycled
|
||||||
) {
|
) {
|
||||||
freeSlicePointer = false;
|
freeSlicePointer = false;
|
||||||
}
|
}
|
||||||
@ -333,7 +334,9 @@ namespace atrip {
|
|||||||
// at this point, let us blank the slice
|
// at this point, let us blank the slice
|
||||||
WITH_RANK << "~~~:cl(" << name << ")"
|
WITH_RANK << "~~~:cl(" << name << ")"
|
||||||
<< " freeing up slice "
|
<< " freeing up slice "
|
||||||
<< " info " << slice.info
|
// TODO: make this possible because of Templates
|
||||||
|
// TODO: there is a deduction error here
|
||||||
|
// << " info " << slice.info
|
||||||
<< "\n";
|
<< "\n";
|
||||||
slice.free();
|
slice.free();
|
||||||
}
|
}
|
||||||
@ -343,13 +346,13 @@ namespace atrip {
|
|||||||
|
|
||||||
// CONSTRUCTOR
|
// CONSTRUCTOR
|
||||||
SliceUnion( Tensor const& sourceTensor
|
SliceUnion( Tensor const& sourceTensor
|
||||||
, std::vector<Slice::Type> sliceTypes_
|
, std::vector<typename Slice<F>::Type> sliceTypes_
|
||||||
, std::vector<size_t> sliceLength_
|
, std::vector<size_t> sliceLength_
|
||||||
, std::vector<size_t> paramLength
|
, std::vector<size_t> paramLength
|
||||||
, size_t np
|
, size_t np
|
||||||
, MPI_Comm child_world
|
, MPI_Comm child_world
|
||||||
, MPI_Comm global_world
|
, MPI_Comm global_world
|
||||||
, Slice::Name name_
|
, typename Slice<F>::Name name_
|
||||||
, size_t nSliceBuffers = 4
|
, size_t nSliceBuffers = 4
|
||||||
)
|
)
|
||||||
: rankMap(paramLength, np, global_world)
|
: rankMap(paramLength, np, global_world)
|
||||||
@ -364,14 +367,14 @@ namespace atrip {
|
|||||||
, name(name_)
|
, name(name_)
|
||||||
, sliceTypes(sliceTypes_)
|
, sliceTypes(sliceTypes_)
|
||||||
, sliceBuffers(nSliceBuffers, sources[0])
|
, sliceBuffers(nSliceBuffers, sources[0])
|
||||||
//, slices(2 * sliceTypes.size(), Slice{ sources[0].size() })
|
//, slices(2 * sliceTypes.size(), Slice<F>{ sources[0].size() })
|
||||||
{ // constructor begin
|
{ // constructor begin
|
||||||
|
|
||||||
LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n";
|
LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n";
|
||||||
|
|
||||||
slices
|
slices
|
||||||
= std::vector<Slice>(2 * sliceTypes.size(), { sources[0].size() });
|
= std::vector<Slice<F>>(2 * sliceTypes.size(), { sources[0].size() });
|
||||||
// TODO: think exactly ^------------------- about this number
|
// TODO: think exactly ^------------------- about this number
|
||||||
|
|
||||||
// initialize the freePointers with the pointers to the buffers
|
// initialize the freePointers with the pointers to the buffers
|
||||||
std::transform(sliceBuffers.begin(), sliceBuffers.end(),
|
std::transform(sliceBuffers.begin(), sliceBuffers.end(),
|
||||||
@ -439,30 +442,20 @@ namespace atrip {
|
|||||||
* \brief Send asynchronously only if the state is Fetch
|
* \brief Send asynchronously only if the state is Fetch
|
||||||
*/
|
*/
|
||||||
void send( size_t otherRank
|
void send( size_t otherRank
|
||||||
, Slice::LocalDatabaseElement const& el
|
, typename Slice<F>::LocalDatabaseElement const& el
|
||||||
, size_t tag) const noexcept {
|
, size_t tag) const noexcept {
|
||||||
MPI_Request request;
|
MPI_Request request;
|
||||||
bool sendData_p = false;
|
bool sendData_p = false;
|
||||||
auto const& info = el.info;
|
auto const& info = el.info;
|
||||||
|
|
||||||
if (info.state == Slice::Fetch) sendData_p = true;
|
if (info.state == Slice<F>::Fetch) sendData_p = true;
|
||||||
// TODO: remove this because I have SelfSufficient
|
// TODO: remove this because I have SelfSufficient
|
||||||
if (otherRank == info.from.rank) sendData_p = false;
|
if (otherRank == info.from.rank) sendData_p = false;
|
||||||
if (!sendData_p) return;
|
if (!sendData_p) return;
|
||||||
|
|
||||||
switch (el.name) {
|
|
||||||
case Slice::Name::TA:
|
|
||||||
case Slice::Name::VIJKA:
|
|
||||||
if (otherRank / 48 == Atrip::rank / 48) {
|
|
||||||
Atrip::localSend++;
|
|
||||||
} else {
|
|
||||||
Atrip::networkSend++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Isend( sources[info.from.source].data()
|
MPI_Isend( sources[info.from.source].data()
|
||||||
, sources[info.from.source].size()
|
, sources[info.from.source].size()
|
||||||
, MPI_DOUBLE /* TODO: adapt this with traits */
|
, traits::mpi::datatypeOf<F>()
|
||||||
, otherRank
|
, otherRank
|
||||||
, tag
|
, tag
|
||||||
, universe
|
, universe
|
||||||
@ -476,19 +469,19 @@ namespace atrip {
|
|||||||
/**
|
/**
|
||||||
* \brief Receive asynchronously only if the state is Fetch
|
* \brief Receive asynchronously only if the state is Fetch
|
||||||
*/
|
*/
|
||||||
void receive(Slice::Info const& info, size_t tag) noexcept {
|
void receive(typename Slice<F>::Info const& info, size_t tag) noexcept {
|
||||||
auto& slice = Slice::findByInfo(slices, info);
|
auto& slice = Slice<F>::findByInfo(slices, info);
|
||||||
|
|
||||||
if (Atrip::rank == info.from.rank) return;
|
if (Atrip::rank == info.from.rank) return;
|
||||||
|
|
||||||
if (slice.info.state == Slice::Fetch) {
|
if (slice.info.state == Slice<F>::Fetch) {
|
||||||
// TODO: do it through the slice class
|
// TODO: do it through the slice class
|
||||||
slice.info.state = Slice::Dispatched;
|
slice.info.state = Slice<F>::Dispatched;
|
||||||
MPI_Request request;
|
MPI_Request request;
|
||||||
slice.request = request;
|
slice.request = request;
|
||||||
MPI_Irecv( slice.data
|
MPI_Irecv( slice.data
|
||||||
, slice.size
|
, slice.size
|
||||||
, MPI_DOUBLE // TODO: Adapt this with traits
|
, traits::mpi::datatypeOf<F>()
|
||||||
, info.from.rank
|
, info.from.rank
|
||||||
, tag
|
, tag
|
||||||
, universe
|
, universe
|
||||||
@ -502,42 +495,42 @@ namespace atrip {
|
|||||||
for (auto type: sliceTypes) unwrapSlice(type, abc);
|
for (auto type: sliceTypes) unwrapSlice(type, abc);
|
||||||
}
|
}
|
||||||
|
|
||||||
F* unwrapSlice(Slice::Type type, ABCTuple const& abc) {
|
F* unwrapSlice(typename Slice<F>::Type type, ABCTuple const& abc) {
|
||||||
WITH_CRAZY_DEBUG
|
WITH_CRAZY_DEBUG
|
||||||
WITH_RANK << "__unwrap__:slice " << type << " w n "
|
WITH_RANK << "__unwrap__:slice " << type << " w n "
|
||||||
<< name
|
<< name
|
||||||
<< " abc" << pretty_print(abc)
|
<< " abc" << pretty_print(abc)
|
||||||
<< "\n";
|
<< "\n";
|
||||||
auto& slice = Slice::findByTypeAbc(slices, type, abc);
|
auto& slice = Slice<F>::findByTypeAbc(slices, type, abc);
|
||||||
WITH_RANK << "__unwrap__:info " << slice.info << "\n";
|
//WITH_RANK << "__unwrap__:info " << slice.info << "\n";
|
||||||
switch (slice.info.state) {
|
switch (slice.info.state) {
|
||||||
case Slice::Dispatched:
|
case Slice<F>::Dispatched:
|
||||||
WITH_RANK << "__unwrap__:Fetch: " << &slice
|
WITH_RANK << "__unwrap__:Fetch: " << &slice
|
||||||
<< " info " << pretty_print(slice.info)
|
<< " info " << pretty_print(slice.info)
|
||||||
<< "\n";
|
<< "\n";
|
||||||
slice.unwrapAndMarkReady();
|
slice.unwrapAndMarkReady();
|
||||||
return slice.data;
|
return slice.data;
|
||||||
break;
|
break;
|
||||||
case Slice::SelfSufficient:
|
case Slice<F>::SelfSufficient:
|
||||||
WITH_RANK << "__unwrap__:SelfSufficient: " << &slice
|
WITH_RANK << "__unwrap__:SelfSufficient: " << &slice
|
||||||
<< " info " << pretty_print(slice.info)
|
<< " info " << pretty_print(slice.info)
|
||||||
<< "\n";
|
<< "\n";
|
||||||
return slice.data;
|
return slice.data;
|
||||||
break;
|
break;
|
||||||
case Slice::Ready:
|
case Slice<F>::Ready:
|
||||||
WITH_RANK << "__unwrap__:READY: UNWRAPPED ALREADY" << &slice
|
WITH_RANK << "__unwrap__:READY: UNWRAPPED ALREADY" << &slice
|
||||||
<< " info " << pretty_print(slice.info)
|
<< " info " << pretty_print(slice.info)
|
||||||
<< "\n";
|
<< "\n";
|
||||||
return slice.data;
|
return slice.data;
|
||||||
break;
|
break;
|
||||||
case Slice::Recycled:
|
case Slice<F>::Recycled:
|
||||||
WITH_RANK << "__unwrap__:RECYCLED " << &slice
|
WITH_RANK << "__unwrap__:RECYCLED " << &slice
|
||||||
<< " info " << pretty_print(slice.info)
|
<< " info " << pretty_print(slice.info)
|
||||||
<< "\n";
|
<< "\n";
|
||||||
return unwrapSlice(slice.info.recycling, abc);
|
return unwrapSlice(slice.info.recycling, abc);
|
||||||
break;
|
break;
|
||||||
case Slice::Fetch:
|
case Slice<F>::Fetch:
|
||||||
case Slice::Acceptor:
|
case Slice<F>::Acceptor:
|
||||||
throw std::domain_error("Can't unwrap an acceptor or fetch slice!");
|
throw std::domain_error("Can't unwrap an acceptor or fetch slice!");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -546,24 +539,26 @@ namespace atrip {
|
|||||||
return slice.data;
|
return slice.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
const RankMap rankMap;
|
const RankMap<F> rankMap;
|
||||||
const MPI_Comm world;
|
const MPI_Comm world;
|
||||||
const MPI_Comm universe;
|
const MPI_Comm universe;
|
||||||
const std::vector<size_t> sliceLength;
|
const std::vector<size_t> sliceLength;
|
||||||
std::vector< std::vector<F> > sources;
|
std::vector< std::vector<F> > sources;
|
||||||
std::vector< Slice > slices;
|
std::vector< Slice<F> > slices;
|
||||||
Slice::Name name;
|
typename Slice<F>::Name name;
|
||||||
const std::vector<Slice::Type> sliceTypes;
|
const std::vector<typename Slice<F>::Type> sliceTypes;
|
||||||
std::vector< std::vector<F> > sliceBuffers;
|
std::vector< std::vector<F> > sliceBuffers;
|
||||||
std::set<F*> freePointers;
|
std::set<F*> freePointers;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
SliceUnion&
|
template <typename F=double>
|
||||||
unionByName(std::vector<SliceUnion*> const& unions, Slice::Name name) {
|
SliceUnion<F>&
|
||||||
|
unionByName(std::vector<SliceUnion<F>*> const& unions,
|
||||||
|
typename Slice<F>::Name name) {
|
||||||
const auto sliceUnionIt
|
const auto sliceUnionIt
|
||||||
= std::find_if(unions.begin(), unions.end(),
|
= std::find_if(unions.begin(), unions.end(),
|
||||||
[&name](SliceUnion const* s) {
|
[&name](SliceUnion<F> const* s) {
|
||||||
return name == s->name;
|
return name == s->name;
|
||||||
});
|
});
|
||||||
if (sliceUnionIt == unions.end()) {
|
if (sliceUnionIt == unions.end()) {
|
||||||
|
|||||||
@ -255,7 +255,7 @@ ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
|
|||||||
, container3d(nNodes * nNodes * nNodes)
|
, container3d(nNodes * nNodes * nNodes)
|
||||||
;
|
;
|
||||||
|
|
||||||
if (info.nodeId == 0)
|
WITH_DBG if (info.nodeId == 0)
|
||||||
std::cout << "\tGoing through all "
|
std::cout << "\tGoing through all "
|
||||||
<< allTuples.size()
|
<< allTuples.size()
|
||||||
<< " tuples in "
|
<< " tuples in "
|
||||||
@ -287,7 +287,7 @@ ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.nodeId == 0)
|
WITH_DBG if (info.nodeId == 0)
|
||||||
std::cout << "\tBuilding 1-d containers\n";
|
std::cout << "\tBuilding 1-d containers\n";
|
||||||
// DISTRIBUTE 1-d containers
|
// DISTRIBUTE 1-d containers
|
||||||
// every tuple which is only located at one node belongs to this node
|
// every tuple which is only located at one node belongs to this node
|
||||||
@ -297,7 +297,7 @@ ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
|
|||||||
std::copy(_tuples.begin(), _tuples.end(), nodeTuples.begin());
|
std::copy(_tuples.begin(), _tuples.end(), nodeTuples.begin());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.nodeId == 0)
|
WITH_DBG if (info.nodeId == 0)
|
||||||
std::cout << "\tBuilding 2-d containers\n";
|
std::cout << "\tBuilding 2-d containers\n";
|
||||||
// DISTRIBUTE 2-d containers
|
// DISTRIBUTE 2-d containers
|
||||||
//the tuples which are located at two nodes are half/half given to these nodes
|
//the tuples which are located at two nodes are half/half given to these nodes
|
||||||
@ -332,7 +332,7 @@ ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.nodeId == 0)
|
WITH_DBG if (info.nodeId == 0)
|
||||||
std::cout << "\tBuilding 3-d containers\n";
|
std::cout << "\tBuilding 3-d containers\n";
|
||||||
// DISTRIBUTE 3-d containers
|
// DISTRIBUTE 3-d containers
|
||||||
for (size_t zyx = 0; zyx < container3d.size(); zyx++) {
|
for (size_t zyx = 0; zyx < container3d.size(); zyx++) {
|
||||||
@ -371,7 +371,7 @@ ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (info.nodeId == 0) std::cout << "\tswapping tuples...\n";
|
WITH_DBG if (info.nodeId == 0) std::cout << "\tswapping tuples...\n";
|
||||||
/*
|
/*
|
||||||
* sort part of group-and-sort algorithm
|
* sort part of group-and-sort algorithm
|
||||||
* every tuple on a given node is sorted in a way that
|
* every tuple on a given node is sorted in a way that
|
||||||
@ -401,16 +401,16 @@ ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
|
WITH_DBG if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
|
||||||
//now we sort the list of tuples
|
//now we sort the list of tuples
|
||||||
std::sort(nodeTuples.begin(), nodeTuples.end());
|
std::sort(nodeTuples.begin(), nodeTuples.end());
|
||||||
|
|
||||||
if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
|
WITH_DBG if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
|
||||||
// we bring the tuples abc back in the order a<b<c
|
// we bring the tuples abc back in the order a<b<c
|
||||||
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
|
for (auto &t: nodeTuples) std::sort(t.begin(), t.end());
|
||||||
|
|
||||||
#if ATRIP_DEBUG > 1
|
#if ATRIP_DEBUG > 1
|
||||||
if (info.nodeId == 0)
|
WITH_DBG if (info.nodeId == 0)
|
||||||
std::cout << "checking for validity of " << nodeTuples.size() << std::endl;
|
std::cout << "checking for validity of " << nodeTuples.size() << std::endl;
|
||||||
const bool anyInvalid
|
const bool anyInvalid
|
||||||
= std::any_of(nodeTuples.begin(),
|
= std::any_of(nodeTuples.begin(),
|
||||||
@ -419,6 +419,7 @@ ABCTuples specialDistribution(Info const& info, ABCTuples const& allTuples) {
|
|||||||
if (anyInvalid) throw "Some tuple is invalid in group-and-sort algorithm";
|
if (anyInvalid) throw "Some tuple is invalid in group-and-sort algorithm";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
WITH_DBG if (info.nodeId == 0) std::cout << "\treturning tuples...\n";
|
||||||
return nodeTuples;
|
return nodeTuples;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,12 +18,13 @@
|
|||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
|
template <typename F=double>
|
||||||
void sliceIntoVector
|
void sliceIntoVector
|
||||||
( std::vector<double> &v
|
( std::vector<F> &v
|
||||||
, CTF::Tensor<double> &toSlice
|
, CTF::Tensor<F> &toSlice
|
||||||
, std::vector<int64_t> const low
|
, std::vector<int64_t> const low
|
||||||
, std::vector<int64_t> const up
|
, std::vector<int64_t> const up
|
||||||
, CTF::Tensor<double> const& origin
|
, CTF::Tensor<F> const& origin
|
||||||
, std::vector<int64_t> const originLow
|
, std::vector<int64_t> const originLow
|
||||||
, std::vector<int64_t> const originUp
|
, std::vector<int64_t> const originUp
|
||||||
) {
|
) {
|
||||||
@ -50,155 +51,159 @@ namespace atrip {
|
|||||||
, origin_.low.data()
|
, origin_.low.data()
|
||||||
, origin_.up.data()
|
, origin_.up.data()
|
||||||
, 1.0);
|
, 1.0);
|
||||||
memcpy(v.data(), toSlice.data, sizeof(double) * v.size());
|
memcpy(v.data(), toSlice.data, sizeof(F) * v.size());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct TAPHH : public SliceUnion {
|
template <typename F=double>
|
||||||
TAPHH( Tensor const& sourceTensor
|
struct TAPHH : public SliceUnion<F> {
|
||||||
|
TAPHH( CTF::Tensor<F> const& sourceTensor
|
||||||
, size_t No
|
, size_t No
|
||||||
, size_t Nv
|
, size_t Nv
|
||||||
, size_t np
|
, size_t np
|
||||||
, MPI_Comm child_world
|
, MPI_Comm child_world
|
||||||
, MPI_Comm global_world
|
, MPI_Comm global_world
|
||||||
) : SliceUnion( sourceTensor
|
) : SliceUnion<F>( sourceTensor
|
||||||
, {Slice::A, Slice::B, Slice::C}
|
, {Slice<F>::A, Slice<F>::B, Slice<F>::C}
|
||||||
, {Nv, No, No} // size of the slices
|
, {Nv, No, No} // size of the slices
|
||||||
, {Nv}
|
, {Nv}
|
||||||
, np
|
, np
|
||||||
, child_world
|
, child_world
|
||||||
, global_world
|
, global_world
|
||||||
, Slice::TA
|
, Slice<F>::TA
|
||||||
, 6) {
|
, 6) {
|
||||||
init(sourceTensor);
|
this->init(sourceTensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
||||||
{
|
{
|
||||||
const int Nv = sliceLength[0]
|
const int Nv = this->sliceLength[0]
|
||||||
, No = sliceLength[1]
|
, No = this->sliceLength[1]
|
||||||
, a = rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
, a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
sliceIntoVector( sources[it]
|
sliceIntoVector<F>( this->sources[it]
|
||||||
, to, {0, 0, 0}, {Nv, No, No}
|
, to, {0, 0, 0}, {Nv, No, No}
|
||||||
, from, {a, 0, 0, 0}, {a+1, Nv, No, No}
|
, from, {a, 0, 0, 0}, {a+1, Nv, No, No}
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct HHHA : public SliceUnion {
|
template <typename F=double>
|
||||||
HHHA( Tensor const& sourceTensor
|
struct HHHA : public SliceUnion<F> {
|
||||||
|
HHHA( CTF::Tensor<F> const& sourceTensor
|
||||||
, size_t No
|
, size_t No
|
||||||
, size_t Nv
|
, size_t Nv
|
||||||
, size_t np
|
, size_t np
|
||||||
, MPI_Comm child_world
|
, MPI_Comm child_world
|
||||||
, MPI_Comm global_world
|
, MPI_Comm global_world
|
||||||
) : SliceUnion( sourceTensor
|
) : SliceUnion<F>( sourceTensor
|
||||||
, {Slice::A, Slice::B, Slice::C}
|
, {Slice<F>::A, Slice<F>::B, Slice<F>::C}
|
||||||
, {No, No, No} // size of the slices
|
, {No, No, No} // size of the slices
|
||||||
, {Nv} // size of the parametrization
|
, {Nv} // size of the parametrization
|
||||||
, np
|
, np
|
||||||
, child_world
|
, child_world
|
||||||
, global_world
|
, global_world
|
||||||
, Slice::VIJKA
|
, Slice<F>::VIJKA
|
||||||
, 6) {
|
, 6) {
|
||||||
init(sourceTensor);
|
this->init(sourceTensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
||||||
{
|
{
|
||||||
|
|
||||||
const int No = sliceLength[0]
|
const int No = this->sliceLength[0]
|
||||||
, a = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
, a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||||
;
|
;
|
||||||
|
|
||||||
sliceIntoVector( sources[it]
|
sliceIntoVector<F>( this->sources[it]
|
||||||
, to, {0, 0, 0}, {No, No, No}
|
, to, {0, 0, 0}, {No, No, No}
|
||||||
, from, {0, 0, 0, a}, {No, No, No, a+1}
|
, from, {0, 0, 0, a}, {No, No, No, a+1}
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ABPH : public SliceUnion {
|
template <typename F=double>
|
||||||
ABPH( Tensor const& sourceTensor
|
struct ABPH : public SliceUnion<F> {
|
||||||
|
ABPH( CTF::Tensor<F> const& sourceTensor
|
||||||
, size_t No
|
, size_t No
|
||||||
, size_t Nv
|
, size_t Nv
|
||||||
, size_t np
|
, size_t np
|
||||||
, MPI_Comm child_world
|
, MPI_Comm child_world
|
||||||
, MPI_Comm global_world
|
, MPI_Comm global_world
|
||||||
) : SliceUnion( sourceTensor
|
) : SliceUnion<F>( sourceTensor
|
||||||
, { Slice::AB, Slice::BC, Slice::AC
|
, { Slice<F>::AB, Slice<F>::BC, Slice<F>::AC
|
||||||
, Slice::BA, Slice::CB, Slice::CA
|
, Slice<F>::BA, Slice<F>::CB, Slice<F>::CA
|
||||||
}
|
}
|
||||||
, {Nv, No} // size of the slices
|
, {Nv, No} // size of the slices
|
||||||
, {Nv, Nv} // size of the parametrization
|
, {Nv, Nv} // size of the parametrization
|
||||||
, np
|
, np
|
||||||
, child_world
|
, child_world
|
||||||
, global_world
|
, global_world
|
||||||
, Slice::VABCI
|
, Slice<F>::VABCI
|
||||||
, 2*6) {
|
, 2*6) {
|
||||||
init(sourceTensor);
|
this->init(sourceTensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||||
|
|
||||||
const int Nv = sliceLength[0]
|
const int Nv = this->sliceLength[0]
|
||||||
, No = sliceLength[1]
|
, No = this->sliceLength[1]
|
||||||
, el = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||||
, a = el % Nv
|
, a = el % Nv
|
||||||
, b = el / Nv
|
, b = el / Nv
|
||||||
;
|
;
|
||||||
|
|
||||||
|
|
||||||
sliceIntoVector( sources[it]
|
sliceIntoVector<F>( this->sources[it]
|
||||||
, to, {0, 0}, {Nv, No}
|
, to, {0, 0}, {Nv, No}
|
||||||
, from, {a, b, 0, 0}, {a+1, b+1, Nv, No}
|
, from, {a, b, 0, 0}, {a+1, b+1, Nv, No}
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ABHH : public SliceUnion {
|
template <typename F=double>
|
||||||
ABHH( Tensor const& sourceTensor
|
struct ABHH : public SliceUnion<F> {
|
||||||
|
ABHH( CTF::Tensor<F> const& sourceTensor
|
||||||
, size_t No
|
, size_t No
|
||||||
, size_t Nv
|
, size_t Nv
|
||||||
, size_t np
|
, size_t np
|
||||||
, MPI_Comm child_world
|
, MPI_Comm child_world
|
||||||
, MPI_Comm global_world
|
, MPI_Comm global_world
|
||||||
) : SliceUnion( sourceTensor
|
) : SliceUnion<F>( sourceTensor
|
||||||
, {Slice::AB, Slice::BC, Slice::AC}
|
, {Slice<F>::AB, Slice<F>::BC, Slice<F>::AC}
|
||||||
, {No, No} // size of the slices
|
, {No, No} // size of the slices
|
||||||
, {Nv, Nv} // size of the parametrization
|
, {Nv, Nv} // size of the parametrization
|
||||||
, np
|
, np
|
||||||
, child_world
|
, child_world
|
||||||
, global_world
|
, global_world
|
||||||
, Slice::VABIJ
|
, Slice<F>::VABIJ
|
||||||
, 6) {
|
, 6) {
|
||||||
init(sourceTensor);
|
this->init(sourceTensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||||
|
|
||||||
const int Nv = from.lens[0]
|
const int Nv = from.lens[0]
|
||||||
, No = sliceLength[1]
|
, No = this->sliceLength[1]
|
||||||
, el = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||||
, a = el % Nv
|
, a = el % Nv
|
||||||
, b = el / Nv
|
, b = el / Nv
|
||||||
;
|
;
|
||||||
|
|
||||||
sliceIntoVector( sources[it]
|
sliceIntoVector<F>( this->sources[it]
|
||||||
, to, {0, 0}, {No, No}
|
, to, {0, 0}, {No, No}
|
||||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -206,39 +211,40 @@ namespace atrip {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct TABHH : public SliceUnion {
|
template <typename F=double>
|
||||||
TABHH( Tensor const& sourceTensor
|
struct TABHH : public SliceUnion<F> {
|
||||||
|
TABHH( CTF::Tensor<F> const& sourceTensor
|
||||||
, size_t No
|
, size_t No
|
||||||
, size_t Nv
|
, size_t Nv
|
||||||
, size_t np
|
, size_t np
|
||||||
, MPI_Comm child_world
|
, MPI_Comm child_world
|
||||||
, MPI_Comm global_world
|
, MPI_Comm global_world
|
||||||
) : SliceUnion( sourceTensor
|
) : SliceUnion<F>( sourceTensor
|
||||||
, {Slice::AB, Slice::BC, Slice::AC}
|
, {Slice<F>::AB, Slice<F>::BC, Slice<F>::AC}
|
||||||
, {No, No} // size of the slices
|
, {No, No} // size of the slices
|
||||||
, {Nv, Nv} // size of the parametrization
|
, {Nv, Nv} // size of the parametrization
|
||||||
, np
|
, np
|
||||||
, child_world
|
, child_world
|
||||||
, global_world
|
, global_world
|
||||||
, Slice::TABIJ
|
, Slice<F>::TABIJ
|
||||||
, 6) {
|
, 6) {
|
||||||
init(sourceTensor);
|
this->init(sourceTensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override {
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||||
// TODO: maybe generalize this with ABHH
|
// TODO: maybe generalize this with ABHH
|
||||||
|
|
||||||
const int Nv = from.lens[0]
|
const int Nv = from.lens[0]
|
||||||
, No = sliceLength[1]
|
, No = this->sliceLength[1]
|
||||||
, el = rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
||||||
, a = el % Nv
|
, a = el % Nv
|
||||||
, b = el / Nv
|
, b = el / Nv
|
||||||
;
|
;
|
||||||
|
|
||||||
sliceIntoVector( sources[it]
|
sliceIntoVector<F>( this->sources[it]
|
||||||
, to, {0, 0}, {No, No}
|
, to, {0, 0}, {No, No}
|
||||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -29,7 +29,7 @@ namespace atrip {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
std::string pretty_print(T&& value) {
|
std::string pretty_print(T&& value) {
|
||||||
std::stringstream stream;
|
std::stringstream stream;
|
||||||
#if ATRIP_DEBUG > 1
|
#if ATRIP_DEBUG > 2
|
||||||
dbg::pretty_print(stream, std::forward<T>(value));
|
dbg::pretty_print(stream, std::forward<T>(value));
|
||||||
#endif
|
#endif
|
||||||
return stream.str();
|
return stream.str();
|
||||||
|
|||||||
@ -23,12 +23,12 @@
|
|||||||
|
|
||||||
using namespace atrip;
|
using namespace atrip;
|
||||||
|
|
||||||
bool RankMap::RANK_ROUND_ROBIN;
|
template <typename F> bool RankMap<F>::RANK_ROUND_ROBIN;
|
||||||
|
template bool RankMap<double>::RANK_ROUND_ROBIN;
|
||||||
|
template bool RankMap<Complex>::RANK_ROUND_ROBIN;
|
||||||
int Atrip::rank;
|
int Atrip::rank;
|
||||||
int Atrip::np;
|
int Atrip::np;
|
||||||
Timings Atrip::chrono;
|
Timings Atrip::chrono;
|
||||||
size_t Atrip::networkSend;
|
|
||||||
size_t Atrip::localSend;
|
|
||||||
|
|
||||||
// user printing block
|
// user printing block
|
||||||
IterationDescriptor IterationDescription::descriptor;
|
IterationDescriptor IterationDescription::descriptor;
|
||||||
@ -39,11 +39,10 @@ void atrip::registerIterationDescriptor(IterationDescriptor d) {
|
|||||||
void Atrip::init() {
|
void Atrip::init() {
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
|
||||||
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
|
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
|
||||||
Atrip::networkSend = 0;
|
|
||||||
Atrip::localSend = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Atrip::Output Atrip::run(Atrip::Input const& in) {
|
template <typename F>
|
||||||
|
Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||||
|
|
||||||
const int np = Atrip::np;
|
const int np = Atrip::np;
|
||||||
const int rank = Atrip::rank;
|
const int rank = Atrip::rank;
|
||||||
@ -56,21 +55,21 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
LOG(0,"Atrip") << "np: " << np << "\n";
|
LOG(0,"Atrip") << "np: " << np << "\n";
|
||||||
|
|
||||||
// allocate the three scratches, see piecuch
|
// allocate the three scratches, see piecuch
|
||||||
std::vector<double> Tijk(No*No*No) // doubles only (see piecuch)
|
std::vector<F> Tijk(No*No*No) // doubles only (see piecuch)
|
||||||
, Zijk(No*No*No) // singles + doubles (see piecuch)
|
, Zijk(No*No*No) // singles + doubles (see piecuch)
|
||||||
// we need local copies of the following tensors on every
|
// we need local copies of the following tensors on every
|
||||||
// rank
|
// rank
|
||||||
, epsi(No)
|
, epsi(No)
|
||||||
, epsa(Nv)
|
, epsa(Nv)
|
||||||
, Tai(No * Nv)
|
, Tai(No * Nv)
|
||||||
;
|
;
|
||||||
|
|
||||||
in.ei->read_all(epsi.data());
|
in.ei->read_all(epsi.data());
|
||||||
in.ea->read_all(epsa.data());
|
in.ea->read_all(epsa.data());
|
||||||
in.Tph->read_all(Tai.data());
|
in.Tph->read_all(Tai.data());
|
||||||
|
|
||||||
RankMap::RANK_ROUND_ROBIN = in.rankRoundRobin;
|
RankMap<F>::RANK_ROUND_ROBIN = in.rankRoundRobin;
|
||||||
if (RankMap::RANK_ROUND_ROBIN) {
|
if (RankMap<F>::RANK_ROUND_ROBIN) {
|
||||||
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
|
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
|
||||||
} else {
|
} else {
|
||||||
LOG(0,"Atrip")
|
LOG(0,"Atrip")
|
||||||
@ -101,25 +100,25 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
|
// BUILD SLICES PARAMETRIZED BY NV ==================================={{{1
|
||||||
WITH_CHRONO("nv-slices",
|
WITH_CHRONO("nv-slices",
|
||||||
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
|
LOG(0,"Atrip") << "BUILD NV-SLICES\n";
|
||||||
TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
TAPHH<F> taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||||
HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
HHHA<F> hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||||
)
|
)
|
||||||
|
|
||||||
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
|
// BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1
|
||||||
WITH_CHRONO("nv-nv-slices",
|
WITH_CHRONO("nv-nv-slices",
|
||||||
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
|
LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n";
|
||||||
ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
ABPH<F> abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||||
ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
ABHH<F> abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||||
TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
TABHH<F> tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe);
|
||||||
)
|
)
|
||||||
|
|
||||||
// all tensors
|
// all tensors
|
||||||
std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
std::vector< SliceUnion<F>* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh};
|
||||||
|
|
||||||
// get tuples for the current rank
|
// get tuples for the current rank
|
||||||
TuplesDistribution *distribution;
|
TuplesDistribution *distribution;
|
||||||
|
|
||||||
if (in.tuplesDistribution == Atrip::Input::TuplesDistribution::NAIVE) {
|
if (in.tuplesDistribution == Atrip::Input<F>::TuplesDistribution::NAIVE) {
|
||||||
LOG(0,"Atrip") << "Using the naive distribution\n";
|
LOG(0,"Atrip") << "Using the naive distribution\n";
|
||||||
distribution = new NaiveDistribution();
|
distribution = new NaiveDistribution();
|
||||||
} else {
|
} else {
|
||||||
@ -157,34 +156,36 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
using Database = typename Slice<F>::Database;
|
||||||
|
using LocalDatabase = typename Slice<F>::LocalDatabase;
|
||||||
auto communicateDatabase
|
auto communicateDatabase
|
||||||
= [ &unions
|
= [ &unions
|
||||||
, np
|
, np
|
||||||
] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database {
|
] (ABCTuple const& abc, MPI_Comm const& c) -> Database {
|
||||||
|
|
||||||
WITH_CHRONO("db:comm:type:do",
|
WITH_CHRONO("db:comm:type:do",
|
||||||
auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement();
|
auto MPI_LDB_ELEMENT = Slice<F>::mpi::localDatabaseElement();
|
||||||
)
|
)
|
||||||
|
|
||||||
WITH_CHRONO("db:comm:ldb",
|
WITH_CHRONO("db:comm:ldb",
|
||||||
Slice::LocalDatabase ldb;
|
typename Slice<F>::LocalDatabase ldb;
|
||||||
for (auto const& tensor: unions) {
|
for (auto const& tensor: unions) {
|
||||||
auto const& tensorDb = tensor->buildLocalDatabase(abc);
|
auto const& tensorDb = tensor->buildLocalDatabase(abc);
|
||||||
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
|
ldb.insert(ldb.end(), tensorDb.begin(), tensorDb.end());
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
Slice::Database db(np * ldb.size(), ldb[0]);
|
Database db(np * ldb.size(), ldb[0]);
|
||||||
|
|
||||||
WITH_CHRONO("oneshot-db:comm:allgather",
|
WITH_CHRONO("oneshot-db:comm:allgather",
|
||||||
WITH_CHRONO("db:comm:allgather",
|
WITH_CHRONO("db:comm:allgather",
|
||||||
MPI_Allgather( ldb.data()
|
MPI_Allgather( ldb.data()
|
||||||
, ldb.size()
|
, ldb.size()
|
||||||
, MPI_LDB_ELEMENT
|
, MPI_LDB_ELEMENT
|
||||||
, db.data()
|
, db.data()
|
||||||
, ldb.size()
|
, ldb.size()
|
||||||
, MPI_LDB_ELEMENT
|
, MPI_LDB_ELEMENT
|
||||||
, c);
|
, c);
|
||||||
))
|
))
|
||||||
|
|
||||||
WITH_CHRONO("db:comm:type:free",
|
WITH_CHRONO("db:comm:type:free",
|
||||||
@ -195,7 +196,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
auto doIOPhase
|
auto doIOPhase
|
||||||
= [&unions, &rank, &np, &universe] (Slice::Database const& db) {
|
= [&unions, &rank, &np, &universe] (Database const& db) {
|
||||||
|
|
||||||
const size_t localDBLength = db.size() / np;
|
const size_t localDBLength = db.size() / np;
|
||||||
|
|
||||||
@ -245,7 +246,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
;
|
;
|
||||||
for (auto it = begin; it != end; ++it) {
|
for (auto it = begin; it != end; ++it) {
|
||||||
sendTag++;
|
sendTag++;
|
||||||
Slice::LocalDatabaseElement const& el = *it;
|
typename Slice<F>::LocalDatabaseElement const& el = *it;
|
||||||
|
|
||||||
if (el.info.from.rank != rank) continue;
|
if (el.info.from.rank != rank) continue;
|
||||||
|
|
||||||
@ -287,8 +288,9 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
* double(No)
|
* double(No)
|
||||||
* double(No)
|
* double(No)
|
||||||
* (double(No) + double(Nv))
|
* (double(No) + double(Nv))
|
||||||
* 2
|
* 2.0
|
||||||
* 6
|
* (traits::isComplex<F>() ? 2.0 : 1.0)
|
||||||
|
* 6.0
|
||||||
/ 1e9
|
/ 1e9
|
||||||
;
|
;
|
||||||
|
|
||||||
@ -321,24 +323,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t networkSend;
|
|
||||||
MPI_Reduce(&Atrip::networkSend,
|
|
||||||
&networkSend,
|
|
||||||
1,
|
|
||||||
MPI_UINT64_T,
|
|
||||||
MPI_SUM,
|
|
||||||
0,
|
|
||||||
universe);
|
|
||||||
|
|
||||||
size_t localSend;
|
|
||||||
MPI_Reduce(&Atrip::localSend,
|
|
||||||
&localSend,
|
|
||||||
1,
|
|
||||||
MPI_UINT64_T,
|
|
||||||
MPI_SUM,
|
|
||||||
0,
|
|
||||||
universe);
|
|
||||||
|
|
||||||
LOG(0,"Atrip")
|
LOG(0,"Atrip")
|
||||||
<< "iteration " << iteration
|
<< "iteration " << iteration
|
||||||
<< " [" << 100 * iteration / nIterations << "%]"
|
<< " [" << 100 * iteration / nIterations << "%]"
|
||||||
@ -346,10 +330,6 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
<< "GF)"
|
<< "GF)"
|
||||||
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
|
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
|
||||||
<< "GF)"
|
<< "GF)"
|
||||||
<< " :net " << networkSend
|
|
||||||
<< " :loc " << localSend
|
|
||||||
<< " :loc/net " << (double(localSend) / double(networkSend))
|
|
||||||
//<< " ===========================\n"
|
|
||||||
<< "\n";
|
<< "\n";
|
||||||
|
|
||||||
|
|
||||||
@ -418,34 +398,34 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
)))
|
)))
|
||||||
WITH_CHRONO("oneshot-doubles",
|
WITH_CHRONO("oneshot-doubles",
|
||||||
WITH_CHRONO("doubles",
|
WITH_CHRONO("doubles",
|
||||||
doublesContribution( abc, (size_t)No, (size_t)Nv
|
doublesContribution<F>( abc, (size_t)No, (size_t)Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, abph.unwrapSlice(Slice::AB, abc)
|
, abph.unwrapSlice(Slice<F>::AB, abc)
|
||||||
, abph.unwrapSlice(Slice::AC, abc)
|
, abph.unwrapSlice(Slice<F>::AC, abc)
|
||||||
, abph.unwrapSlice(Slice::BC, abc)
|
, abph.unwrapSlice(Slice<F>::BC, abc)
|
||||||
, abph.unwrapSlice(Slice::BA, abc)
|
, abph.unwrapSlice(Slice<F>::BA, abc)
|
||||||
, abph.unwrapSlice(Slice::CA, abc)
|
, abph.unwrapSlice(Slice<F>::CA, abc)
|
||||||
, abph.unwrapSlice(Slice::CB, abc)
|
, abph.unwrapSlice(Slice<F>::CB, abc)
|
||||||
// -- VHHHA
|
// -- VHHHA
|
||||||
, hhha.unwrapSlice(Slice::A, abc)
|
, hhha.unwrapSlice(Slice<F>::A, abc)
|
||||||
, hhha.unwrapSlice(Slice::B, abc)
|
, hhha.unwrapSlice(Slice<F>::B, abc)
|
||||||
, hhha.unwrapSlice(Slice::C, abc)
|
, hhha.unwrapSlice(Slice<F>::C, abc)
|
||||||
// -- TA
|
// -- TA
|
||||||
, taphh.unwrapSlice(Slice::A, abc)
|
, taphh.unwrapSlice(Slice<F>::A, abc)
|
||||||
, taphh.unwrapSlice(Slice::B, abc)
|
, taphh.unwrapSlice(Slice<F>::B, abc)
|
||||||
, taphh.unwrapSlice(Slice::C, abc)
|
, taphh.unwrapSlice(Slice<F>::C, abc)
|
||||||
// -- TABIJ
|
// -- TABIJ
|
||||||
, tabhh.unwrapSlice(Slice::AB, abc)
|
, tabhh.unwrapSlice(Slice<F>::AB, abc)
|
||||||
, tabhh.unwrapSlice(Slice::AC, abc)
|
, tabhh.unwrapSlice(Slice<F>::AC, abc)
|
||||||
, tabhh.unwrapSlice(Slice::BC, abc)
|
, tabhh.unwrapSlice(Slice<F>::BC, abc)
|
||||||
// -- TIJK
|
// -- TIJK
|
||||||
, Tijk.data()
|
, Tijk.data()
|
||||||
);
|
);
|
||||||
WITH_RANK << iteration << "-th doubles done\n";
|
WITH_RANK << iteration << "-th doubles done\n";
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
// COMPUTE SINGLES =================================================== {{{1
|
// COMPUTE SINGLES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {{{1
|
||||||
OCD_Barrier(universe);
|
OCD_Barrier(universe);
|
||||||
if (!isFakeTuple(i)) {
|
if (!isFakeTuple(i)) {
|
||||||
WITH_CHRONO("oneshot-unwrap",
|
WITH_CHRONO("oneshot-unwrap",
|
||||||
@ -457,30 +437,30 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I];
|
||||||
)
|
)
|
||||||
WITH_CHRONO("singles",
|
WITH_CHRONO("singles",
|
||||||
singlesContribution( No, Nv, abc
|
singlesContribution<F>( No, Nv, abc
|
||||||
, Tai.data()
|
, Tai.data()
|
||||||
, abhh.unwrapSlice(Slice::AB, abc)
|
, abhh.unwrapSlice(Slice<F>::AB, abc)
|
||||||
, abhh.unwrapSlice(Slice::AC, abc)
|
, abhh.unwrapSlice(Slice<F>::AC, abc)
|
||||||
, abhh.unwrapSlice(Slice::BC, abc)
|
, abhh.unwrapSlice(Slice<F>::BC, abc)
|
||||||
, Zijk.data());
|
, Zijk.data());
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// COMPUTE ENERGY ==================================================== {{{1
|
// COMPUTE ENERGY %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% {{{1
|
||||||
if (!isFakeTuple(i)) {
|
if (!isFakeTuple(i)) {
|
||||||
double tupleEnergy(0.);
|
double tupleEnergy(0.);
|
||||||
|
|
||||||
int distinct(0);
|
int distinct(0);
|
||||||
if (abc[0] == abc[1]) distinct++;
|
if (abc[0] == abc[1]) distinct++;
|
||||||
if (abc[1] == abc[2]) distinct--;
|
if (abc[1] == abc[2]) distinct--;
|
||||||
const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
const F epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]);
|
||||||
|
|
||||||
WITH_CHRONO("energy",
|
WITH_CHRONO("energy",
|
||||||
if ( distinct == 0)
|
if ( distinct == 0)
|
||||||
tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk);
|
tupleEnergy = getEnergyDistinct<F>(epsabc, epsi, Tijk, Zijk);
|
||||||
else
|
else
|
||||||
tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk);
|
tupleEnergy = getEnergySame<F>(epsabc, epsi, Tijk, Zijk);
|
||||||
)
|
)
|
||||||
|
|
||||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||||
@ -510,7 +490,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// CLEANUP UNIONS ===================================================={{{1
|
// CLEANUP UNIONS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{{{1
|
||||||
OCD_Barrier(universe);
|
OCD_Barrier(universe);
|
||||||
if (abcNext) {
|
if (abcNext) {
|
||||||
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
|
WITH_RANK << "__gc__:" << iteration << "-th cleaning up.......\n";
|
||||||
@ -521,8 +501,8 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
<< " :abc " << pretty_print(abc)
|
<< " :abc " << pretty_print(abc)
|
||||||
<< " :abcN " << pretty_print(*abcNext)
|
<< " :abcN " << pretty_print(*abcNext)
|
||||||
<< "\n";
|
<< "\n";
|
||||||
for (auto const& slice: u->slices)
|
// for (auto const& slice: u->slices)
|
||||||
WITH_RANK << "__gc__:guts:" << slice.info << "\n";
|
// WITH_RANK << "__gc__:guts:" << slice.info << "\n";
|
||||||
u->clearUnusedSlicesForNext(*abcNext);
|
u->clearUnusedSlicesForNext(*abcNext);
|
||||||
|
|
||||||
WITH_RANK << "__gc__: checking validity\n";
|
WITH_RANK << "__gc__: checking validity\n";
|
||||||
@ -530,13 +510,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
#ifdef HAVE_OCD
|
#ifdef HAVE_OCD
|
||||||
// check for validity of the slices
|
// check for validity of the slices
|
||||||
for (auto type: u->sliceTypes) {
|
for (auto type: u->sliceTypes) {
|
||||||
auto tuple = Slice::subtupleBySlice(abc, type);
|
auto tuple = Slice<F>::subtupleBySlice(abc, type);
|
||||||
for (auto& slice: u->slices) {
|
for (auto& slice: u->slices) {
|
||||||
if ( slice.info.type == type
|
if ( slice.info.type == type
|
||||||
&& slice.info.tuple == tuple
|
&& slice.info.tuple == tuple
|
||||||
&& slice.isDirectlyFetchable()
|
&& slice.isDirectlyFetchable()
|
||||||
) {
|
) {
|
||||||
if (slice.info.state == Slice::Dispatched)
|
if (slice.info.state == Slice<F>::Dispatched)
|
||||||
throw std::domain_error( "This slice should not be undispatched! "
|
throw std::domain_error( "This slice should not be undispatched! "
|
||||||
+ pretty_print(slice.info));
|
+ pretty_print(slice.info));
|
||||||
}
|
}
|
||||||
@ -551,14 +531,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
|
WITH_RANK << iteration << "-th cleaning up....... DONE\n";
|
||||||
|
|
||||||
Atrip::chrono["iterations"].stop();
|
Atrip::chrono["iterations"].stop();
|
||||||
// ITERATION END ====================================================={{{1
|
// ITERATION END %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{{{1
|
||||||
|
|
||||||
}
|
}
|
||||||
// END OF MAIN LOOP
|
// END OF MAIN LOOP
|
||||||
|
|
||||||
MPI_Barrier(universe);
|
MPI_Barrier(universe);
|
||||||
|
|
||||||
// PRINT TUPLES ========================================================={{{1
|
// PRINT TUPLES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{{{1
|
||||||
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
#if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES)
|
||||||
LOG(0,"Atrip") << "tuple energies" << "\n";
|
LOG(0,"Atrip") << "tuple energies" << "\n";
|
||||||
for (size_t i = 0; i < np; i++) {
|
for (size_t i = 0; i < np; i++) {
|
||||||
@ -576,7 +556,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// COMMUNICATE THE ENERGIES ============================================={{{1
|
// COMMUNICATE THE ENERGIES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{{{1
|
||||||
LOG(0,"Atrip") << "COMMUNICATING ENERGIES \n";
|
LOG(0,"Atrip") << "COMMUNICATING ENERGIES \n";
|
||||||
double globalEnergy = 0;
|
double globalEnergy = 0;
|
||||||
MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe);
|
MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe);
|
||||||
@ -602,4 +582,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) {
|
|||||||
return { - globalEnergy };
|
return { - globalEnergy };
|
||||||
|
|
||||||
}
|
}
|
||||||
|
// instantiate
|
||||||
|
template Atrip::Output Atrip::run(Atrip::Input<double> const& in);
|
||||||
|
template Atrip::Output Atrip::run(Atrip::Input<Complex> const& in);
|
||||||
// Main:1 ends here
|
// Main:1 ends here
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user