From c2b1c78c67d756c29b6a0f9b884248491a38f4d7 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Mon, 7 Feb 2022 22:29:47 +0100 Subject: [PATCH] Tanlge source files for complex --- include/atrip.hpp | 2 +- include/atrip/Atrip.hpp | 24 ++-- include/atrip/Blas.hpp | 70 +++++++++- include/atrip/Debug.hpp | 12 +- include/atrip/Equations.hpp | 257 ++++++++++++++++++++--------------- include/atrip/RankMap.hpp | 12 +- include/atrip/Slice.hpp | 64 +++++---- include/atrip/SliceUnion.hpp | 110 +++++++-------- include/atrip/Tuples.hpp | 2 +- include/atrip/Unions.hpp | 204 +++++++++++++-------------- include/atrip/Utils.hpp | 2 +- src/atrip/Atrip.cxx | 129 +++++++++--------- 12 files changed, 511 insertions(+), 377 deletions(-) diff --git a/include/atrip.hpp b/include/atrip.hpp index b3ef823..8ecf6ce 100644 --- a/include/atrip.hpp +++ b/include/atrip.hpp @@ -1,4 +1,4 @@ -// [[file:../atrip.org::*Include header][Include header:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Include%20header][Include header:1]] #pragma once #include diff --git a/include/atrip/Atrip.hpp b/include/atrip/Atrip.hpp index a8bcd78..6f3859c 100644 --- a/include/atrip/Atrip.hpp +++ b/include/atrip/Atrip.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Atrip][Atrip:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Header][Header:1]] #pragma once #include #include @@ -15,8 +15,9 @@ namespace atrip { static int np; static void init(); + template struct Input { - CTF::Tensor *ei = nullptr + CTF::Tensor *ei = nullptr , *ea = nullptr , *Tph = nullptr , *Tpphh = nullptr @@ -27,13 +28,13 @@ namespace atrip { int maxIterations = 0, iterationMod = -1, percentageMod = -1; bool barrier = false; bool chrono = false; - Input& with_epsilon_i(CTF::Tensor * t) { ei = t; return *this; } - Input& with_epsilon_a(CTF::Tensor * t) { ea = t; return *this; } - Input& with_Tai(CTF::Tensor * t) { Tph = t; return *this; } - Input& with_Tabij(CTF::Tensor * t) { Tpphh = t; return *this; } - Input& with_Vabij(CTF::Tensor * t) { Vpphh = t; return *this; } - Input& with_Vijka(CTF::Tensor * t) { Vhhhp = t; return *this; } - Input& with_Vabci(CTF::Tensor * t) { Vppph = t; return *this; } + Input& with_epsilon_i(CTF::Tensor * t) { ei = t; return *this; } + Input& with_epsilon_a(CTF::Tensor * t) { ea = t; return *this; } + Input& with_Tai(CTF::Tensor * t) { Tph = t; return *this; } + Input& with_Tabij(CTF::Tensor * t) { Tpphh = t; return *this; } + Input& with_Vabij(CTF::Tensor * t) { Vpphh = t; return *this; } + Input& with_Vijka(CTF::Tensor * t) { Vhhhp = t; return *this; } + Input& with_Vabci(CTF::Tensor * t) { Vppph = t; return *this; } Input& with_maxIterations(int i) { maxIterations = i; return *this; } Input& with_iterationMod(int i) { iterationMod = i; return *this; } Input& with_percentageMod(int i) { percentageMod = i; return *this; } @@ -44,8 +45,9 @@ namespace atrip { struct Output { double energy; }; - static Output run(Input const& in); + template + static Output run(Input const& in); }; } -// Atrip:1 ends here +// Header:1 ends here diff --git a/include/atrip/Blas.hpp b/include/atrip/Blas.hpp index fa63028..df81d74 100644 --- a/include/atrip/Blas.hpp +++ b/include/atrip/Blas.hpp @@ -1,6 +1,9 @@ -// [[file:../../atrip.org::*Blas][Blas:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Blas][Blas:1]] #pragma once namespace atrip { + + using Complex = std::complex; + extern "C" { void dgemm_( const char *transa, @@ -9,14 +12,73 @@ namespace atrip { const int *n, const int *k, double *alpha, - const double *A, + const double *a, const int *lda, - const double *B, + const double *b, const int *ldb, double *beta, - double *C, + double *c, + const int *ldc + ); + + void zgemm_( + const char *transa, + const char *transb, + const int *m, + const int *n, + const int *k, + Complex *alpha, + const Complex *A, + const int *lda, + const Complex *B, + const int *ldb, + Complex *beta, + Complex *C, const int *ldc ); } + + + template + void xgemm(const char *transa, + const char *transb, + const int *m, + const int *n, + const int *k, + F *alpha, + const F *A, + const int *lda, + const F *B, + const int *ldb, + F *beta, + F *C, + const int *ldc) { + dgemm_(transa, transb, + m, n, k, + alpha, A, lda, + B, ldb, beta, + C, ldc); + } + + template <> + void xgemm(const char *transa, + const char *transb, + const int *m, + const int *n, + const int *k, + Complex *alpha, + const Complex *A, + const int *lda, + const Complex *B, + const int *ldb, + Complex *beta, + Complex *C, + const int *ldc) { + zgemm_(transa, transb, + m, n, k, + alpha, A, lda, + B, ldb, beta, + C, ldc); + } } // Blas:1 ends here diff --git a/include/atrip/Debug.hpp b/include/atrip/Debug.hpp index 6bdfde2..4347824 100644 --- a/include/atrip/Debug.hpp +++ b/include/atrip/Debug.hpp @@ -1,9 +1,11 @@ -// [[file:../../atrip.org::*Macros][Macros:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Macros][Macros:1]] #pragma once #include #define ATRIP_BENCHMARK //#define ATRIP_DONT_SLICE -#define ATRIP_DEBUG 1 +#ifndef ATRIP_DEBUG +# define ATRIP_DEBUG 1 +#endif //#define ATRIP_WORKLOAD_DUMP #define ATRIP_USE_DGEMM //#define ATRIP_PRINT_TUPLES @@ -60,20 +62,20 @@ #endif // Macros:1 ends here -// [[file:../../atrip.org::*Macros][Macros:2]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Macros][Macros:2]] #ifndef LOG #define LOG(level, name) if (Atrip::rank == 0) std::cout << name << ": " #endif // Macros:2 ends here -// [[file:../../atrip.org::*Macros][Macros:3]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Macros][Macros:3]] #ifdef ATRIP_NO_OUTPUT # undef LOG # define LOG(level, name) if (false) std::cout << name << ": " #endif // Macros:3 ends here -// [[file:../../atrip.org::IterationDescriptor][IterationDescriptor]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::IterationDescriptor][IterationDescriptor]] namespace atrip { struct IterationDescription; diff --git a/include/atrip/Equations.hpp b/include/atrip/Equations.hpp index b8496f6..2b90736 100644 --- a/include/atrip/Equations.hpp +++ b/include/atrip/Equations.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Equations][Equations:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Equations][Equations:1]] #pragma once #include @@ -6,14 +6,15 @@ namespace atrip { + template double getEnergyDistinct - ( const double epsabc - , std::vector const& epsi - , std::vector const& Tijk_ - , std::vector const& Zijk_ + ( const F epsabc + , std::vector const& epsi + , std::vector const& Tijk_ + , std::vector const& Zijk_ ) { constexpr size_t blockSize=16; - double energy(0.); + F energy(0.); const size_t No = epsi.size(); for (size_t kk=0; kk k ? jj : k; for (size_t j(jstart); j < jend; j++){ - const double ej(epsi[j]); - double facjk( j == k ? 0.5 : 1.0); + F const ej(epsi[j]); + F const facjk = j == k ? F(0.5) : F(1.0); size_t istart = ii > j ? ii : j; for (size_t i(istart); i < iend; i++){ - const double ei(epsi[i]); - double facij ( i==j ? 0.5 : 1.0); - double denominator(epsabc - ei - ej - ek); - double U(Zijk_[i + No*j + No*No*k]); - double V(Zijk_[i + No*k + No*No*j]); - double W(Zijk_[j + No*i + No*No*k]); - double X(Zijk_[j + No*k + No*No*i]); - double Y(Zijk_[k + No*i + No*No*j]); - double Z(Zijk_[k + No*j + No*No*i]); - - double A(Tijk_[i + No*j + No*No*k]); - double B(Tijk_[i + No*k + No*No*j]); - double C(Tijk_[j + No*i + No*No*k]); - double D(Tijk_[j + No*k + No*No*i]); - double E(Tijk_[k + No*i + No*No*j]); - double F(Tijk_[k + No*j + No*No*i]); - double value(3.0*(A*U+B*V+C*W+D*X+E*Y+F*Z) - +((U+X+Y)-2.0*(V+W+Z))*(A+D+E) - +((V+W+Z)-2.0*(U+X+Y))*(B+C+F)); - energy += 2.0*value / denominator * facjk * facij; + const F + ei(epsi[i]) + , facij = i == j ? F(0.5) : F(1.0) + , denominator(epsabc - ei - ej - ek) + , U(Zijk_[i + No*j + No*No*k]) + , V(Zijk_[i + No*k + No*No*j]) + , W(Zijk_[j + No*i + No*No*k]) + , X(Zijk_[j + No*k + No*No*i]) + , Y(Zijk_[k + No*i + No*No*j]) + , Z(Zijk_[k + No*j + No*No*i]) + , A(std::conj(Tijk_[i + No*j + No*No*k])) + , B(std::conj(Tijk_[i + No*k + No*No*j])) + , C(std::conj(Tijk_[j + No*i + No*No*k])) + , D(std::conj(Tijk_[j + No*k + No*No*i])) + , E(std::conj(Tijk_[k + No*i + No*No*j])) + , F(std::conj(Tijk_[k + No*j + No*No*i])) + , value + = 3.0 * ( A * U + + B * V + + C * W + + D * X + + E * Y + + F * Z ) + + ( ( U + X + Y ) + - 2.0 * ( V + W + Z ) + ) * ( A + D + E ) + + ( ( V + W + Z ) + - 2.0 * ( U + X + Y ) + ) * ( B + C + F ) + ; + energy += 2.0 * value / denominator * facjk * facij; } // i } // j } // k } // ii } // jj } // kk - return energy; + return std::real(energy); } + template double getEnergySame - ( const double epsabc - , std::vector const& epsi - , std::vector const& Tijk_ - , std::vector const& Zijk_ + ( const F epsabc + , std::vector const& epsi + , std::vector const& Tijk_ + , std::vector const& Zijk_ ) { constexpr size_t blockSize = 16; const size_t No = epsi.size(); - double energy(0.); + F energy = F(0.); for (size_t kk=0; kk k ? jj : k; for(size_t j(jstart); j < jend; j++){ - const double facjk( j == k ? 0.5 : 1.0); - const double ej(epsi[j]); + const F facjk( j == k ? F(0.5) : F(1.0)); + const F ej(epsi[j]); const size_t istart = ii > j ? ii : j; for(size_t i(istart); i < iend; i++){ - double ei(epsi[i]); - double facij ( i==j ? 0.5 : 1.0); - double denominator(epsabc - ei - ej - ek); - double U(Zijk_[i + No*j + No*No*k]); - double V(Zijk_[j + No*k + No*No*i]); - double W(Zijk_[k + No*i + No*No*j]); - double A(Tijk_[i + No*j + No*No*k]); - double B(Tijk_[j + No*k + No*No*i]); - double C(Tijk_[k + No*i + No*No*j]); - double value(3.0*( A*U + B*V + C*W) - (A+B+C)*(U+V+W)); - energy += 2.0*value / denominator * facjk * facij; + const F + ei(epsi[i]) + , facij ( i==j ? F(0.5) : F(1.0)) + , denominator(epsabc - ei - ej - ek) + , U(Zijk_[i + No*j + No*No*k]) + , V(Zijk_[j + No*k + No*No*i]) + , W(Zijk_[k + No*i + No*No*j]) + , A(std::conj(Tijk_[i + No*j + No*No*k])) + , B(std::conj(Tijk_[j + No*k + No*No*i])) + , C(std::conj(Tijk_[k + No*i + No*No*j])) + , value + = F(3.0) * ( A * U + + B * V + + C * W + ) + - ( A + B + C ) * ( U + V + W ) + ; + energy += F(2.0) * value / denominator * facjk * facij; } // i } // j } // k } // ii } // jj } // kk - return energy; + return std::real(energy); } + template void singlesContribution ( size_t No , size_t Nv , const ABCTuple &abc - , double const* Tph - , double const* VABij - , double const* VACij - , double const* VBCij - , double *Zijk + , F const* Tph + , F const* VABij + , F const* VACij + , F const* VBCij + , F *Zijk ) { const size_t a(abc[0]), b(abc[1]), c(abc[2]); for (size_t k=0; k < No; k++) @@ -125,31 +146,32 @@ namespace atrip { } } + template void doublesContribution ( const ABCTuple &abc , size_t const No , size_t const Nv // -- VABCI - , double const* VABph - , double const* VACph - , double const* VBCph - , double const* VBAph - , double const* VCAph - , double const* VCBph + , F const* VABph + , F const* VACph + , F const* VBCph + , F const* VBAph + , F const* VCAph + , F const* VCBph // -- VHHHA - , double const* VhhhA - , double const* VhhhB - , double const* VhhhC + , F const* VhhhA + , F const* VhhhB + , F const* VhhhC // -- TA - , double const* TAphh - , double const* TBphh - , double const* TCphh + , F const* TAphh + , F const* TBphh + , F const* TCphh // -- TABIJ - , double const* TABhh - , double const* TAChh - , double const* TBChh + , F const* TABhh + , F const* TAChh + , F const* TBChh // -- TIJK - , double *Tijk + , F *Tijk , atrip::Timings& chrono ) { @@ -168,40 +190,47 @@ namespace atrip { Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ } \ t_reorder.stop(); - #define DGEMM_PARTICLES(__A, __B) \ - atrip::dgemm_( "T" \ - , "N" \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&Nv \ - , &one \ - , __A \ - , (int const*)&Nv \ - , __B \ - , (int const*)&Nv \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); - #define DGEMM_HOLES(__A, __B, __TRANSB) \ - atrip::dgemm_( "N" \ - , __TRANSB \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&No \ - , &m_one \ - , __A \ - , (int const*)&NoNo \ - , __B \ - , (int const*)&No \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); + #define DGEMM_PARTICLES(__A, __B) \ + atrip::xgemm( "T" \ + , "N" \ + , (int const*)&NoNo \ + , (int const*)&No \ + , (int const*)&Nv \ + , &one \ + , __A \ + , (int const*)&Nv \ + , __B \ + , (int const*)&Nv \ + , &zero \ + , _t_buffer.data() \ + , (int const*)&NoNo \ + ); + #define DGEMM_HOLES(__A, __B, __TRANSB) \ + atrip::xgemm( "N" \ + , __TRANSB \ + , (int const*)&NoNo \ + , (int const*)&No \ + , (int const*)&No \ + , &m_one \ + , __A \ + , (int const*)&NoNo \ + , __B \ + , (int const*)&No \ + , &zero \ + , _t_buffer.data() \ + , (int const*)&NoNo \ + ); + #define MAYBE_CONJ(_conj, _buffer) \ + if (traits::isComplex()) { \ + for (size_t __i = 0; __i < NoNoNo; ++__i) \ + _conj[__i] = std::conj(_buffer[__i]); \ + } else { \ + for (size_t __i = 0; __i < NoNoNo; ++__i) \ + _conj[__i] = _buffer[__i]; \ + } - using F = double; const size_t NoNoNo = No*NoNo; - std::vector _t_buffer; + std::vector _t_buffer; _t_buffer.reserve(NoNoNo); F one{1.0}, m_one{-1.0}, zero{0.0}; @@ -214,38 +243,48 @@ namespace atrip { chrono["doubles:holes"].start(); { // Holes part ============================================================ + + std::vector _vhhh(NoNoNo); + // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 + MAYBE_CONJ(_vhhh, VhhhC) chrono["doubles:holes:1"].start(); - DGEMM_HOLES(VhhhC, TABhh, "N") + DGEMM_HOLES(_vhhh.data(), TABhh, "N") REORDER(i, k, j) chrono["doubles:holes:1"].stop(); // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 chrono["doubles:holes:2"].start(); - DGEMM_HOLES(VhhhC, TABhh, "T") + DGEMM_HOLES(_vhhh.data(), TABhh, "T") REORDER(j, k, i) chrono["doubles:holes:2"].stop(); + // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 + MAYBE_CONJ(_vhhh, VhhhB) chrono["doubles:holes:3"].start(); - DGEMM_HOLES(VhhhB, TAChh, "N") + DGEMM_HOLES(_vhhh.data(), TAChh, "N") REORDER(i, j, k) chrono["doubles:holes:3"].stop(); // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 chrono["doubles:holes:4"].start(); - DGEMM_HOLES(VhhhB, TAChh, "T") + DGEMM_HOLES(_vhhh.data(), TAChh, "T") REORDER(k, j, i) chrono["doubles:holes:4"].stop(); + // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 + MAYBE_CONJ(_vhhh, VhhhA) chrono["doubles:holes:5"].start(); - DGEMM_HOLES(VhhhA, TBChh, "N") + DGEMM_HOLES(_vhhh.data(), TBChh, "N") REORDER(j, i, k) chrono["doubles:holes:5"].stop(); // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 chrono["doubles:holes:6"].start(); - DGEMM_HOLES(VhhhA, TBChh, "T") + DGEMM_HOLES(_vhhh.data(), TBChh, "T") REORDER(k, i, j) chrono["doubles:holes:6"].stop(); + } chrono["doubles:holes"].stop(); + #undef MAYBE_CONJ chrono["doubles:particles"].start(); { // Particle part ========================================================= diff --git a/include/atrip/RankMap.hpp b/include/atrip/RankMap.hpp index 82bb674..8564f9e 100644 --- a/include/atrip/RankMap.hpp +++ b/include/atrip/RankMap.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20rank%20mapping][The rank mapping:1]] #pragma once #include @@ -7,6 +7,8 @@ #include namespace atrip { + + template struct RankMap { std::vector const lengths; @@ -19,7 +21,7 @@ namespace atrip { 1UL, std::multiplies())) { assert(lengths.size() <= 2); } - size_t find(Slice::Location const& p) const noexcept { + size_t find(typename Slice::Location const& p) const noexcept { return p.source * np + p.rank; } @@ -39,10 +41,10 @@ namespace atrip { return source == nSources() && isPaddingRank(rank); } - Slice::Location - find(ABCTuple const& abc, Slice::Type sliceType) const noexcept { + typename Slice::Location + find(ABCTuple const& abc, typename Slice::Type sliceType) const noexcept { // tuple = {11, 8} when abc = {11, 8, 9} and sliceType = AB - const auto tuple = Slice::subtupleBySlice(abc, sliceType); + const auto tuple = Slice::subtupleBySlice(abc, sliceType); const size_t index = tuple[0] diff --git a/include/atrip/Slice.hpp b/include/atrip/Slice.hpp index a7a5363..877d72a 100644 --- a/include/atrip/Slice.hpp +++ b/include/atrip/Slice.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*The slice][The slice:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice][The slice:1]] #pragma once #include #include @@ -7,16 +7,26 @@ #include #include +#include namespace atrip { +namespace traits { + template bool isComplex() { return false; }; + template <> bool isComplex() { return true; }; +namespace mpi { + template MPI_Datatype datatypeOf(void); + template <> MPI_Datatype datatypeOf() { return MPI_DOUBLE; } + template <> MPI_Datatype datatypeOf() { return MPI_DOUBLE_COMPLEX; } +} +} + +template struct Slice { - - using F = double; // The slice:1 ends here -// [[file:../../atrip.org::*The slice][The slice:2]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice][The slice:2]] // ASSOCIATED TYPES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% struct Location { size_t rank; size_t source; }; @@ -93,8 +103,8 @@ struct Slice { // DATABASE ==========================================================={{{1 struct LocalDatabaseElement { - Slice::Name name; - Slice::Info info; + Slice::Name name; + Slice::Info info; }; using LocalDatabase = std::vector; using Database = LocalDatabase; @@ -117,7 +127,7 @@ struct Slice { constexpr int n = 2; // create a sliceLocation to measure in the current architecture // the packing of the struct - Slice::Location measure; + Slice::Location measure; MPI_Datatype dt; const std::vector lengths(n, 1); const MPI_Datatype types[n] = {usizeDt(), usizeDt()}; @@ -141,7 +151,7 @@ struct Slice { static MPI_Datatype sliceInfo () { constexpr int n = 5; MPI_Datatype dt; - Slice::Info measure; + Slice::Info measure; const std::vector lengths(n, 1); const MPI_Datatype types[n] = { vector(2, usizeDt()) @@ -213,10 +223,10 @@ struct Slice { * It is important here to return a reference to a Slice * not to accidentally copy the associated buffer of the slice. */ - static Slice& findOneByType(std::vector &slices, Slice::Type type) { + static Slice& findOneByType(std::vector> &slices, Slice::Type type) { const auto sliceIt = std::find_if(slices.begin(), slices.end(), - [&type](Slice const& s) { + [&type](Slice const& s) { return type == s.info.type; }); WITH_CRAZY_DEBUG @@ -231,11 +241,11 @@ struct Slice { * Check if an info has * */ - static std::vector hasRecycledReferencingToIt - ( std::vector &slices + static std::vector*> hasRecycledReferencingToIt + ( std::vector> &slices , Info const& info ) { - std::vector result; + std::vector*> result; for (auto& s: slices) if ( s.info.recycling == info.type @@ -246,11 +256,11 @@ struct Slice { return result; } - static Slice& - findRecycledSource (std::vector &slices, Slice::Info info) { + static Slice& + findRecycledSource (std::vector> &slices, Slice::Info info) { const auto sliceIt = std::find_if(slices.begin(), slices.end(), - [&info](Slice const& s) { + [&info](Slice const& s) { return info.recycling == s.info.type && info.tuple == s.info.tuple && State::Recycled != s.info.state @@ -270,15 +280,15 @@ struct Slice { return *sliceIt; } - static Slice& findByTypeAbc - ( std::vector &slices - , Slice::Type type + static Slice& findByTypeAbc + ( std::vector> &slices + , Slice::Type type , ABCTuple const& abc ) { - const auto tuple = Slice::subtupleBySlice(abc, type); + const auto tuple = Slice::subtupleBySlice(abc, type); const auto sliceIt = std::find_if(slices.begin(), slices.end(), - [&type, &tuple](Slice const& s) { + [&type, &tuple](Slice const& s) { return type == s.info.type && tuple == s.info.tuple ; @@ -298,11 +308,11 @@ struct Slice { return *sliceIt; } - static Slice& findByInfo(std::vector &slices, - Slice::Info const& info) { + static Slice& findByInfo(std::vector> &slices, + Slice::Info const& info) { const auto sliceIt = std::find_if(slices.begin(), slices.end(), - [&info](Slice const& s) { + [&info](Slice const& s) { // TODO: maybe implement comparison in Info struct return info.type == s.info.type && info.state == s.info.state @@ -448,13 +458,15 @@ struct Slice { }; // struct Slice -std::ostream& operator<<(std::ostream& out, Slice::Location const& v) { +template +std::ostream& operator<<(std::ostream& out, typename Slice::Location const& v) { // TODO: remove me out << "{.r(" << v.rank << "), .s(" << v.source << ")};"; return out; } -std::ostream& operator<<(std::ostream& out, Slice::Info const& i) { +template +std::ostream& operator<<(std::ostream& out, typename Slice::Info const& i) { out << "«t" << i.type << ", s" << i.state << "»" << " ⊙ {" << i.from.rank << ", " << i.from.source << "}" << " ∴ {" << i.tuple[0] << ", " << i.tuple[1] << "}" diff --git a/include/atrip/SliceUnion.hpp b/include/atrip/SliceUnion.hpp index 060dcc2..ec7aff6 100644 --- a/include/atrip/SliceUnion.hpp +++ b/include/atrip/SliceUnion.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*The slice union][The slice union:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*The%20slice%20union][The slice union:1]] #pragma once #include #include @@ -6,8 +6,8 @@ namespace atrip { + template struct SliceUnion { - using F = double; using Tensor = CTF::Tensor; virtual void @@ -20,7 +20,7 @@ namespace atrip { * This means that there can be at most one slice with a given Ty_x_Tu. */ void checkForDuplicates() const { - std::vector tytus; + std::vector::Ty_x_Tu> tytus; for (auto const& s: slices) { if (s.isFree()) continue; tytus.push_back({s.info.type, s.info.tuple}); @@ -33,13 +33,13 @@ namespace atrip { } - std::vector neededSlices(ABCTuple const& abc) { - std::vector needed(sliceTypes.size()); + std::vector::Ty_x_Tu> neededSlices(ABCTuple const& abc) { + std::vector::Ty_x_Tu> needed(sliceTypes.size()); // build the needed vector std::transform(sliceTypes.begin(), sliceTypes.end(), needed.begin(), - [&abc](Slice::Type const type) { - auto tuple = Slice::subtupleBySlice(abc, type); + [&abc](typename Slice::Type const type) { + auto tuple = Slice::subtupleBySlice(abc, type); return std::make_pair(type, tuple); }); return needed; @@ -64,8 +64,9 @@ namespace atrip { * slices. * */ - Slice::LocalDatabase buildLocalDatabase(ABCTuple const& abc) { - Slice::LocalDatabase result; + typename + Slice::LocalDatabase buildLocalDatabase(ABCTuple const& abc) { + typename Slice::LocalDatabase result; auto const needed = neededSlices(abc); @@ -95,7 +96,7 @@ namespace atrip { // need auto const& it = std::find_if(slices.begin(), slices.end(), - [&tuple, &type](Slice const& other) { + [&tuple, &type](Slice const& other) { return other.info.tuple == tuple && other.info.type == type // we only want another slice when it @@ -121,7 +122,7 @@ namespace atrip { // tuple and that has a valid data pointer. auto const& recycleIt = std::find_if(slices.begin(), slices.end(), - [&tuple, &type](Slice const& other) { + [&tuple, &type](Slice const& other) { return other.info.tuple == tuple && other.info.type != type && other.isRecyclable() @@ -132,13 +133,13 @@ namespace atrip { // (which should exist by construction :THINK) // if (recycleIt != slices.end()) { - auto& blank = Slice::findOneByType(slices, Slice::Blank); + auto& blank = Slice::findOneByType(slices, Slice::Blank); // TODO: formalize this through a method to copy information // from another slice blank.data = recycleIt->data; blank.info.type = type; blank.info.tuple = tuple; - blank.info.state = Slice::Recycled; + blank.info.state = Slice::Recycled; blank.info.from = from; blank.info.recycling = recycleIt->info.type; result.push_back({name, blank.info}); @@ -165,17 +166,17 @@ namespace atrip { << " for tuple " << tuple[0] << ", " << tuple[1] << "\n" ; - auto& blank = Slice::findOneByType(slices, Slice::Blank); + auto& blank = Slice::findOneByType(slices, Slice::Blank); blank.info.type = type; blank.info.tuple = tuple; blank.info.from = from; // Handle self sufficiency blank.info.state = Atrip::rank == from.rank - ? Slice::SelfSufficient - : Slice::Fetch + ? Slice::SelfSufficient + : Slice::Fetch ; - if (blank.info.state == Slice::SelfSufficient) { + if (blank.info.state == Slice::SelfSufficient) { blank.data = sources[from.source].data(); } else { if (freePointers.size() == 0) @@ -219,7 +220,7 @@ namespace atrip { // try to find the slice in the needed slices list auto const found = std::find_if(needed.begin(), needed.end(), - [&slice] (Slice::Ty_x_Tu const& tytu) { + [&slice] (typename Slice::Ty_x_Tu const& tytu) { return slice.info.tuple == tytu.second && slice.info.type == tytu.first ; @@ -238,7 +239,7 @@ namespace atrip { // allow to gc unwrapped and recycled, never Fetch, // if we have a Fetch slice then something has gone very wrong. - if (!slice.isUnwrapped() && slice.info.state != Slice::Recycled) + if (!slice.isUnwrapped() && slice.info.state != Slice::Recycled) throw std::domain_error("Trying to garbage collect " " a non-unwrapped slice! " @@ -259,13 +260,13 @@ namespace atrip { // - we should make sure that the data pointer of slice // does not get freed. // - if (slice.info.state == Slice::Ready) { + if (slice.info.state == Slice::Ready) { WITH_OCD WITH_RANK << "__gc__:" << "checking for data recycled dependencies\n"; auto recycled - = Slice::hasRecycledReferencingToIt(slices, slice.info); + = Slice::hasRecycledReferencingToIt(slices, slice.info); if (recycled.size()) { - Slice* newReady = recycled[0]; + Slice* newReady = recycled[0]; WITH_OCD WITH_RANK << "__gc__:" << "swaping recycled " << pretty_print(newReady->info) @@ -290,8 +291,8 @@ namespace atrip { // if the slice is self sufficient, do not dare touching the // pointer, since it is a pointer to our sources in our rank. - if ( slice.info.state == Slice::SelfSufficient - || slice.info.state == Slice::Recycled + if ( slice.info.state == Slice::SelfSufficient + || slice.info.state == Slice::Recycled ) { freeSlicePointer = false; } @@ -313,7 +314,8 @@ namespace atrip { // at this point, let us blank the slice WITH_RANK << "~~~:cl(" << name << ")" << " freeing up slice " - << " info " << slice.info + // TODO: make this possible + // << " info " << slice.info << "\n"; slice.free(); } @@ -323,13 +325,13 @@ namespace atrip { // CONSTRUCTOR SliceUnion( Tensor const& sourceTensor - , std::vector sliceTypes_ + , std::vector::Type> sliceTypes_ , std::vector sliceLength_ , std::vector paramLength , size_t np , MPI_Comm child_world , MPI_Comm global_world - , Slice::Name name_ + , typename Slice::Name name_ , size_t nSliceBuffers = 4 ) : rankMap(paramLength, np) @@ -344,13 +346,13 @@ namespace atrip { , name(name_) , sliceTypes(sliceTypes_) , sliceBuffers(nSliceBuffers, sources[0]) - //, slices(2 * sliceTypes.size(), Slice{ sources[0].size() }) + //, slices(2 * sliceTypes.size(), Slice{ sources[0].size() }) { // constructor begin LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n"; slices - = std::vector(2 * sliceTypes.size(), { sources[0].size() }); + = std::vector>(2 * sliceTypes.size(), { sources[0].size() }); // TODO: think exactly ^------------------- about this number // initialize the freePointers with the pointers to the buffers @@ -419,19 +421,19 @@ namespace atrip { * \brief Send asynchronously only if the state is Fetch */ void send( size_t otherRank - , Slice::Info const& info + , typename Slice::Info const& info , size_t tag) const noexcept { MPI_Request request; bool sendData_p = false; - if (info.state == Slice::Fetch) sendData_p = true; + if (info.state == Slice::Fetch) sendData_p = true; // TODO: remove this because I have SelfSufficient if (otherRank == info.from.rank) sendData_p = false; if (!sendData_p) return; MPI_Isend( sources[info.from.source].data() , sources[info.from.source].size() - , MPI_DOUBLE /* TODO: adapt this with traits */ + , traits::mpi::datatypeOf() , otherRank , tag , universe @@ -445,19 +447,19 @@ namespace atrip { /** * \brief Receive asynchronously only if the state is Fetch */ - void receive(Slice::Info const& info, size_t tag) noexcept { - auto& slice = Slice::findByInfo(slices, info); + void receive(typename Slice::Info const& info, size_t tag) noexcept { + auto& slice = Slice::findByInfo(slices, info); if (Atrip::rank == info.from.rank) return; - if (slice.info.state == Slice::Fetch) { + if (slice.info.state == Slice::Fetch) { // TODO: do it through the slice class - slice.info.state = Slice::Dispatched; + slice.info.state = Slice::Dispatched; MPI_Request request; slice.request = request; MPI_Irecv( slice.data , slice.size - , MPI_DOUBLE // TODO: Adapt this with traits + , traits::mpi::datatypeOf() , info.from.rank , tag , universe @@ -471,42 +473,42 @@ namespace atrip { for (auto type: sliceTypes) unwrapSlice(type, abc); } - F* unwrapSlice(Slice::Type type, ABCTuple const& abc) { + F* unwrapSlice(typename Slice::Type type, ABCTuple const& abc) { WITH_CRAZY_DEBUG WITH_RANK << "__unwrap__:slice " << type << " w n " << name << " abc" << pretty_print(abc) << "\n"; - auto& slice = Slice::findByTypeAbc(slices, type, abc); - WITH_RANK << "__unwrap__:info " << slice.info << "\n"; + auto& slice = Slice::findByTypeAbc(slices, type, abc); + //WITH_RANK << "__unwrap__:info " << slice.info << "\n"; switch (slice.info.state) { - case Slice::Dispatched: + case Slice::Dispatched: WITH_RANK << "__unwrap__:Fetch: " << &slice << " info " << pretty_print(slice.info) << "\n"; slice.unwrapAndMarkReady(); return slice.data; break; - case Slice::SelfSufficient: + case Slice::SelfSufficient: WITH_RANK << "__unwrap__:SelfSufficient: " << &slice << " info " << pretty_print(slice.info) << "\n"; return slice.data; break; - case Slice::Ready: + case Slice::Ready: WITH_RANK << "__unwrap__:READY: UNWRAPPED ALREADY" << &slice << " info " << pretty_print(slice.info) << "\n"; return slice.data; break; - case Slice::Recycled: + case Slice::Recycled: WITH_RANK << "__unwrap__:RECYCLED " << &slice << " info " << pretty_print(slice.info) << "\n"; return unwrapSlice(slice.info.recycling, abc); break; - case Slice::Fetch: - case Slice::Acceptor: + case Slice::Fetch: + case Slice::Acceptor: throw std::domain_error("Can't unwrap an acceptor or fetch slice!"); break; default: @@ -515,24 +517,26 @@ namespace atrip { return slice.data; } - const RankMap rankMap; + const RankMap rankMap; const MPI_Comm world; const MPI_Comm universe; const std::vector sliceLength; std::vector< std::vector > sources; - std::vector< Slice > slices; - Slice::Name name; - const std::vector sliceTypes; + std::vector< Slice > slices; + typename Slice::Name name; + const std::vector::Type> sliceTypes; std::vector< std::vector > sliceBuffers; std::set freePointers; }; - SliceUnion& - unionByName(std::vector const& unions, Slice::Name name) { + template + SliceUnion& + unionByName(std::vector*> const& unions, + typename Slice::Name name) { const auto sliceUnionIt = std::find_if(unions.begin(), unions.end(), - [&name](SliceUnion const* s) { + [&name](SliceUnion const* s) { return name == s->name; }); if (sliceUnionIt == unions.end()) diff --git a/include/atrip/Tuples.hpp b/include/atrip/Tuples.hpp index 090eb9b..5d4b69f 100644 --- a/include/atrip/Tuples.hpp +++ b/include/atrip/Tuples.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Tuples][Tuples:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Tuples][Tuples:1]] #pragma once #include diff --git a/include/atrip/Unions.hpp b/include/atrip/Unions.hpp index de924ee..db3b6b7 100644 --- a/include/atrip/Unions.hpp +++ b/include/atrip/Unions.hpp @@ -1,15 +1,16 @@ -// [[file:../../atrip.org::*Unions][Unions:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Unions][Unions:1]] #pragma once #include namespace atrip { + template void sliceIntoVector - ( std::vector &v - , CTF::Tensor &toSlice + ( std::vector &v + , CTF::Tensor &toSlice , std::vector const low , std::vector const up - , CTF::Tensor const& origin + , CTF::Tensor const& origin , std::vector const originLow , std::vector const originUp ) { @@ -36,155 +37,159 @@ namespace atrip { , origin_.low.data() , origin_.up.data() , 1.0); - memcpy(v.data(), toSlice.data, sizeof(double) * v.size()); + memcpy(v.data(), toSlice.data, sizeof(F) * v.size()); #endif } - struct TAPHH : public SliceUnion { - TAPHH( Tensor const& sourceTensor + template + struct TAPHH : public SliceUnion { + TAPHH( CTF::Tensor const& sourceTensor , size_t No , size_t Nv , size_t np , MPI_Comm child_world , MPI_Comm global_world - ) : SliceUnion( sourceTensor - , {Slice::A, Slice::B, Slice::C} - , {Nv, No, No} // size of the slices - , {Nv} - , np - , child_world - , global_world - , Slice::TA - , 4) { + ) : SliceUnion( sourceTensor + , {Slice::A, Slice::B, Slice::C} + , {Nv, No, No} // size of the slices + , {Nv} + , np + , child_world + , global_world + , Slice::TA + , 4) { init(sourceTensor); } - void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override + void sliceIntoBuffer(size_t it, CTF::Tensor &to, CTF::Tensor const& from) override { - const int Nv = sliceLength[0] - , No = sliceLength[1] - , a = rankMap.find({static_cast(Atrip::rank), it}); + const int Nv = this->sliceLength[0] + , No = this->sliceLength[1] + , a = this->rankMap.find({static_cast(Atrip::rank), it}); ; - sliceIntoVector( sources[it] - , to, {0, 0, 0}, {Nv, No, No} - , from, {a, 0, 0, 0}, {a+1, Nv, No, No} - ); + sliceIntoVector( this->sources[it] + , to, {0, 0, 0}, {Nv, No, No} + , from, {a, 0, 0, 0}, {a+1, Nv, No, No} + ); } }; - struct HHHA : public SliceUnion { - HHHA( Tensor const& sourceTensor + template + struct HHHA : public SliceUnion { + HHHA( CTF::Tensor const& sourceTensor , size_t No , size_t Nv , size_t np , MPI_Comm child_world , MPI_Comm global_world - ) : SliceUnion( sourceTensor - , {Slice::A, Slice::B, Slice::C} - , {No, No, No} // size of the slices - , {Nv} // size of the parametrization - , np - , child_world - , global_world - , Slice::VIJKA - , 4) { + ) : SliceUnion( sourceTensor + , {Slice::A, Slice::B, Slice::C} + , {No, No, No} // size of the slices + , {Nv} // size of the parametrization + , np + , child_world + , global_world + , Slice::VIJKA + , 4) { init(sourceTensor); } - void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override + void sliceIntoBuffer(size_t it, CTF::Tensor &to, CTF::Tensor const& from) override { - const int No = sliceLength[0] - , a = rankMap.find({static_cast(Atrip::rank), it}) + const int No = this->sliceLength[0] + , a = this->rankMap.find({static_cast(Atrip::rank), it}) ; - sliceIntoVector( sources[it] - , to, {0, 0, 0}, {No, No, No} - , from, {0, 0, 0, a}, {No, No, No, a+1} - ); + sliceIntoVector( this->sources[it] + , to, {0, 0, 0}, {No, No, No} + , from, {0, 0, 0, a}, {No, No, No, a+1} + ); } }; - struct ABPH : public SliceUnion { - ABPH( Tensor const& sourceTensor + template + struct ABPH : public SliceUnion { + ABPH( CTF::Tensor const& sourceTensor , size_t No , size_t Nv , size_t np , MPI_Comm child_world , MPI_Comm global_world - ) : SliceUnion( sourceTensor - , { Slice::AB, Slice::BC, Slice::AC - , Slice::BA, Slice::CB, Slice::CA - } - , {Nv, No} // size of the slices - , {Nv, Nv} // size of the parametrization - , np - , child_world - , global_world - , Slice::VABCI - , 2*6) { + ) : SliceUnion( sourceTensor + , { Slice::AB, Slice::BC, Slice::AC + , Slice::BA, Slice::CB, Slice::CA + } + , {Nv, No} // size of the slices + , {Nv, Nv} // size of the parametrization + , np + , child_world + , global_world + , Slice::VABCI + , 2*6) { init(sourceTensor); } - void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override { + void sliceIntoBuffer(size_t it, CTF::Tensor &to, CTF::Tensor const& from) override { - const int Nv = sliceLength[0] - , No = sliceLength[1] - , el = rankMap.find({static_cast(Atrip::rank), it}) + const int Nv = this->sliceLength[0] + , No = this->sliceLength[1] + , el = this->rankMap.find({static_cast(Atrip::rank), it}) , a = el % Nv , b = el / Nv ; - sliceIntoVector( sources[it] - , to, {0, 0}, {Nv, No} - , from, {a, b, 0, 0}, {a+1, b+1, Nv, No} - ); + sliceIntoVector( this->sources[it] + , to, {0, 0}, {Nv, No} + , from, {a, b, 0, 0}, {a+1, b+1, Nv, No} + ); } }; - struct ABHH : public SliceUnion { - ABHH( Tensor const& sourceTensor + template + struct ABHH : public SliceUnion { + ABHH( CTF::Tensor const& sourceTensor , size_t No , size_t Nv , size_t np , MPI_Comm child_world , MPI_Comm global_world - ) : SliceUnion( sourceTensor - , {Slice::AB, Slice::BC, Slice::AC} - , {No, No} // size of the slices - , {Nv, Nv} // size of the parametrization - , np - , child_world - , global_world - , Slice::VABIJ - , 6) { + ) : SliceUnion( sourceTensor + , {Slice::AB, Slice::BC, Slice::AC} + , {No, No} // size of the slices + , {Nv, Nv} // size of the parametrization + , np + , child_world + , global_world + , Slice::VABIJ + , 6) { init(sourceTensor); } - void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override { + void sliceIntoBuffer(size_t it, CTF::Tensor &to, CTF::Tensor const& from) override { const int Nv = from.lens[0] - , No = sliceLength[1] - , el = rankMap.find({static_cast(Atrip::rank), it}) + , No = this->sliceLength[1] + , el = this->rankMap.find({static_cast(Atrip::rank), it}) , a = el % Nv , b = el / Nv ; - sliceIntoVector( sources[it] - , to, {0, 0}, {No, No} - , from, {a, b, 0, 0}, {a+1, b+1, No, No} - ); + sliceIntoVector( this->sources[it] + , to, {0, 0}, {No, No} + , from, {a, b, 0, 0}, {a+1, b+1, No, No} + ); } @@ -192,39 +197,40 @@ namespace atrip { }; - struct TABHH : public SliceUnion { - TABHH( Tensor const& sourceTensor + template + struct TABHH : public SliceUnion { + TABHH( CTF::Tensor const& sourceTensor , size_t No , size_t Nv , size_t np , MPI_Comm child_world , MPI_Comm global_world - ) : SliceUnion( sourceTensor - , {Slice::AB, Slice::BC, Slice::AC} - , {No, No} // size of the slices - , {Nv, Nv} // size of the parametrization - , np - , child_world - , global_world - , Slice::TABIJ - , 6) { + ) : SliceUnion( sourceTensor + , {Slice::AB, Slice::BC, Slice::AC} + , {No, No} // size of the slices + , {Nv, Nv} // size of the parametrization + , np + , child_world + , global_world + , Slice::TABIJ + , 6) { init(sourceTensor); } - void sliceIntoBuffer(size_t it, Tensor &to, Tensor const& from) override { + void sliceIntoBuffer(size_t it, CTF::Tensor &to, CTF::Tensor const& from) override { // TODO: maybe generalize this with ABHH const int Nv = from.lens[0] - , No = sliceLength[1] - , el = rankMap.find({static_cast(Atrip::rank), it}) + , No = this->sliceLength[1] + , el = this->rankMap.find({static_cast(Atrip::rank), it}) , a = el % Nv , b = el / Nv ; - sliceIntoVector( sources[it] - , to, {0, 0}, {No, No} - , from, {a, b, 0, 0}, {a+1, b+1, No, No} - ); + sliceIntoVector( this->sources[it] + , to, {0, 0}, {No, No} + , from, {a, b, 0, 0}, {a+1, b+1, No, No} + ); } diff --git a/include/atrip/Utils.hpp b/include/atrip/Utils.hpp index a6bd743..bff3d19 100644 --- a/include/atrip/Utils.hpp +++ b/include/atrip/Utils.hpp @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Utils][Utils:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Utils][Utils:1]] #pragma once #include #include diff --git a/src/atrip/Atrip.cxx b/src/atrip/Atrip.cxx index 64dea9b..fc613b6 100644 --- a/src/atrip/Atrip.cxx +++ b/src/atrip/Atrip.cxx @@ -1,4 +1,4 @@ -// [[file:../../atrip.org::*Main][Main:1]] +// [[file:~/cc4s/src/atrip/complex/atrip.org::*Main][Main:1]] #include #include @@ -23,7 +23,8 @@ void Atrip::init() { MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np); } -Atrip::Output Atrip::run(Atrip::Input const& in) { +template +Atrip::Output Atrip::run(Atrip::Input const& in) { const int np = Atrip::np; const int rank = Atrip::rank; @@ -38,14 +39,14 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { LOG(0,"Atrip") << "Nv: " << Nv << "\n"; // allocate the three scratches, see piecuch - std::vector Tijk(No*No*No) // doubles only (see piecuch) - , Zijk(No*No*No) // singles + doubles (see piecuch) - // we need local copies of the following tensors on every - // rank - , epsi(No) - , epsa(Nv) - , Tai(No * Nv) - ; + std::vector Tijk(No*No*No) // doubles only (see piecuch) + , Zijk(No*No*No) // singles + doubles (see piecuch) + // we need local copies of the following tensors on every + // rank + , epsi(No) + , epsa(Nv) + , Tai(No * Nv) + ; in.ei->read_all(epsi.data()); in.ea->read_all(epsa.data()); @@ -74,20 +75,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { chrono["nv-slices"].start(); // BUILD SLICES PARAMETRIZED BY NV ==================================={{{1 LOG(0,"Atrip") << "BUILD NV-SLICES\n"; - TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + TAPHH taphh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + HHHA hhha(*in.Vhhhp, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); chrono["nv-slices"].stop(); chrono["nv-nv-slices"].start(); // BUILD SLICES PARAMETRIZED BY NV x NV =============================={{{1 LOG(0,"Atrip") << "BUILD NV x NV-SLICES\n"; - ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); - TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ABPH abph(*in.Vppph, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + ABHH abhh(*in.Vpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); + TABHH tabhh(*in.Tpphh, (size_t)No, (size_t)Nv, (size_t)np, child_comm, universe); chrono["nv-nv-slices"].stop(); // all tensors - std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; + std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; //CONSTRUCT TUPLE LIST ==============================================={{{1 LOG(0,"Atrip") << "BUILD TUPLE LIST\n"; @@ -121,18 +122,20 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { = [&tuplesList](size_t const i) { return i >= tuplesList.size(); }; + using Database = typename Slice::Database; + using LocalDatabase = typename Slice::LocalDatabase; auto communicateDatabase = [ &unions , np , &chrono - ] (ABCTuple const& abc, MPI_Comm const& c) -> Slice::Database { + ] (ABCTuple const& abc, MPI_Comm const& c) -> Database { chrono["db:comm:type:do"].start(); - auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); + auto MPI_LDB_ELEMENT = Slice::mpi::localDatabaseElement(); chrono["db:comm:type:do"].stop(); chrono["db:comm:ldb"].start(); - Slice::LocalDatabase ldb; + LocalDatabase ldb; for (auto const& tensor: unions) { auto const& tensorDb = tensor->buildLocalDatabase(abc); @@ -140,7 +143,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } chrono["db:comm:ldb"].stop(); - Slice::Database db(np * ldb.size(), ldb[0]); + Database db(np * ldb.size(), ldb[0]); chrono["oneshot-db:comm:allgather"].start(); chrono["db:comm:allgather"].start(); @@ -162,7 +165,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { }; auto doIOPhase - = [&unions, &rank, &np, &universe, &chrono] (Slice::Database const& db) { + = [&unions, &rank, &np, &universe, &chrono] (Database const& db) { const size_t localDBLength = db.size() / np; @@ -212,7 +215,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { ; for (auto it = begin; it != end; ++it) { sendTag++; - Slice::LocalDatabaseElement const& el = *it; + typename Slice::LocalDatabaseElement const& el = *it; if (el.info.from.rank != rank) continue; @@ -261,14 +264,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // START MAIN LOOP ======================================================{{{1 - Slice::Database db; - for ( size_t i = abcIndex.first, iteration = 1 ; i < abcIndex.second ; i++, iteration++ ) { chrono["iterations"].start(); + // check overhead from chrono over all iterations chrono["start:stop"].start(); chrono["start:stop"].stop(); @@ -347,7 +349,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; chrono["db:comm"].start(); //const auto db = communicateDatabase(*abcNext, universe); - db = communicateDatabase(*abcNext, universe); + Database db = communicateDatabase(*abcNext, universe); chrono["db:comm"].stop(); chrono["db:io"].start(); doIOPhase(db); @@ -368,30 +370,30 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { ))) chrono["oneshot-doubles"].start(); chrono["doubles"].start(); - doublesContribution( abc, (size_t)No, (size_t)Nv - // -- VABCI - , abph.unwrapSlice(Slice::AB, abc) - , abph.unwrapSlice(Slice::AC, abc) - , abph.unwrapSlice(Slice::BC, abc) - , abph.unwrapSlice(Slice::BA, abc) - , abph.unwrapSlice(Slice::CA, abc) - , abph.unwrapSlice(Slice::CB, abc) - // -- VHHHA - , hhha.unwrapSlice(Slice::A, abc) - , hhha.unwrapSlice(Slice::B, abc) - , hhha.unwrapSlice(Slice::C, abc) - // -- TA - , taphh.unwrapSlice(Slice::A, abc) - , taphh.unwrapSlice(Slice::B, abc) - , taphh.unwrapSlice(Slice::C, abc) - // -- TABIJ - , tabhh.unwrapSlice(Slice::AB, abc) - , tabhh.unwrapSlice(Slice::AC, abc) - , tabhh.unwrapSlice(Slice::BC, abc) - // -- TIJK - , Tijk.data() - , chrono - ); + doublesContribution( abc, (size_t)No, (size_t)Nv + // -- VABCI + , abph.unwrapSlice(Slice::AB, abc) + , abph.unwrapSlice(Slice::AC, abc) + , abph.unwrapSlice(Slice::BC, abc) + , abph.unwrapSlice(Slice::BA, abc) + , abph.unwrapSlice(Slice::CA, abc) + , abph.unwrapSlice(Slice::CB, abc) + // -- VHHHA + , hhha.unwrapSlice(Slice::A, abc) + , hhha.unwrapSlice(Slice::B, abc) + , hhha.unwrapSlice(Slice::C, abc) + // -- TA + , taphh.unwrapSlice(Slice::A, abc) + , taphh.unwrapSlice(Slice::B, abc) + , taphh.unwrapSlice(Slice::C, abc) + // -- TABIJ + , tabhh.unwrapSlice(Slice::AB, abc) + , tabhh.unwrapSlice(Slice::AC, abc) + , tabhh.unwrapSlice(Slice::BC, abc) + // -- TIJK + , Tijk.data() + , chrono + ); WITH_RANK << iteration << "-th doubles done\n"; chrono["doubles"].stop(); chrono["oneshot-doubles"].stop(); @@ -409,12 +411,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { for (size_t I(0); I < Zijk.size(); I++) Zijk[I] = Tijk[I]; chrono["reorder"].stop(); chrono["singles"].start(); - singlesContribution( No, Nv, abc - , Tai.data() - , abhh.unwrapSlice(Slice::AB, abc) - , abhh.unwrapSlice(Slice::AC, abc) - , abhh.unwrapSlice(Slice::BC, abc) - , Zijk.data()); + singlesContribution( No, Nv, abc + , Tai.data() + , abhh.unwrapSlice(Slice::AB, abc) + , abhh.unwrapSlice(Slice::AC, abc) + , abhh.unwrapSlice(Slice::BC, abc) + , Zijk.data()); chrono["singles"].stop(); } @@ -426,13 +428,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { int distinct(0); if (abc[0] == abc[1]) distinct++; if (abc[1] == abc[2]) distinct--; - const double epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); + const F epsabc(epsa[abc[0]] + epsa[abc[1]] + epsa[abc[2]]); chrono["energy"].start(); if ( distinct == 0) - tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); + tupleEnergy = getEnergyDistinct(epsabc, epsi, Tijk, Zijk); else - tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); + tupleEnergy = getEnergySame(epsabc, epsi, Tijk, Zijk); chrono["energy"].stop(); #if defined(HAVE_OCD) || defined(ATRIP_PRINT_TUPLES) @@ -473,8 +475,8 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { << " :abc " << pretty_print(abc) << " :abcN " << pretty_print(*abcNext) << "\n"; - for (auto const& slice: u->slices) - WITH_RANK << "__gc__:guts:" << slice.info << "\n"; + // for (auto const& slice: u->slices) + // WITH_RANK << "__gc__:guts:" << slice.info << "\n"; u->clearUnusedSlicesForNext(*abcNext); WITH_RANK << "__gc__: checking validity\n"; @@ -482,13 +484,13 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { #ifdef HAVE_OCD // check for validity of the slices for (auto type: u->sliceTypes) { - auto tuple = Slice::subtupleBySlice(abc, type); + auto tuple = Slice::subtupleBySlice(abc, type); for (auto& slice: u->slices) { if ( slice.info.type == type && slice.info.tuple == tuple && slice.isDirectlyFetchable() ) { - if (slice.info.state == Slice::Dispatched) + if (slice.info.state == Slice::Dispatched) throw std::domain_error( "This slice should not be undispatched! " + pretty_print(slice.info)); } @@ -555,4 +557,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { return { - globalEnergy }; } +// instantiate +template Atrip::Output Atrip::run(Atrip::Input const& in); +template Atrip::Output Atrip::run(Atrip::Input const& in); // Main:1 ends here