From c7e3fa45bd07a860471b80aec6d15df9a5a2da3e Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Wed, 25 Jan 2023 12:50:19 +0100 Subject: [PATCH] Add old version of energies and only generate code for doubles --- include/atrip/Operations.hpp | 2 + include/atrip/SliceUnion.hpp | 7 +- src/atrip/Atrip.cxx | 2 +- src/atrip/Equations.cxx | 129 +++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+), 3 deletions(-) diff --git a/include/atrip/Operations.hpp b/include/atrip/Operations.hpp index 40e3430..69ef5aa 100644 --- a/include/atrip/Operations.hpp +++ b/include/atrip/Operations.hpp @@ -38,6 +38,8 @@ namespace acc { __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__ F maybeConjugateScalar(const F &a) { return a; } + // TODO: instantiate for std::complex + #if defined(HAVE_CUDA) template <> __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__ diff --git a/include/atrip/SliceUnion.hpp b/include/atrip/SliceUnion.hpp index 06b9f92..fa79a54 100644 --- a/include/atrip/SliceUnion.hpp +++ b/include/atrip/SliceUnion.hpp @@ -573,12 +573,15 @@ template // TODO: do it through the slice class slice.info.state = Slice::Dispatched; #if defined(HAVE_CUDA) && defined(ATRIP_SOURCES_IN_GPU) -# if !defined(ATRIP_CUDA_AWARE_MPI) +# if !defined(ATRIP_CUDA_AWARE_MPI) # error "You need CUDA aware MPI to have slices on the GPU" # endif MPI_Irecv((void*)slice.data, +#elif defined(HAVE_CUDA) && !defined(ATRIP_SOURCES_IN_GPU) + slice.mpi_data = (F*)malloc(sizeof(F) * slice.size); + MPI_Irecv(slice.mpi_data, #else - MPI_Irecv(slice.data, + MPI_Irecv((void*)slice.data, #endif slice.size, traits::mpi::datatypeOf(), diff --git a/src/atrip/Atrip.cxx b/src/atrip/Atrip.cxx index 6c3045f..51cad53 100644 --- a/src/atrip/Atrip.cxx +++ b/src/atrip/Atrip.cxx @@ -903,5 +903,5 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { } // instantiate template Atrip::Output Atrip::run(Atrip::Input const& in); -template Atrip::Output Atrip::run(Atrip::Input const& in); +// template Atrip::Output Atrip::run(Atrip::Input const& in); // Main:1 ends here diff --git a/src/atrip/Equations.cxx b/src/atrip/Equations.cxx index a1b0c4b..efe4ce4 100644 --- a/src/atrip/Equations.cxx +++ b/src/atrip/Equations.cxx @@ -102,6 +102,7 @@ namespace atrip { # define MIN(a, b) std::min((a), (b)) #endif +#if defined(ATRIP_NEW_ENERGY) // [[file:~/cuda/atrip/atrip.org::*Energy][Energy:2]] template @@ -250,6 +251,131 @@ void getEnergySame } // Energy:2 ends here +#else + +// [[file:~/cuda/atrip/atrip.org::*Energy][Energy:2]] +template +__MAYBE_GLOBAL__ +void getEnergyDistinct + ( F const epsabc + , size_t const No + , F* const epsi + , F* const Tijk + , F* const Zijk + , double* _energy + ) { + constexpr size_t blockSize=16; + F energy(0.); + for (size_t kk=0; kk k ? jj : k; + for (size_t j(jstart); j < jend; j++){ + F const ej(epsi[j]); + F const facjk = j == k ? F(0.5) : F(1.0); + size_t istart = ii > j ? ii : j; + for (size_t i(istart); i < iend; i++){ + const F + ei(epsi[i]) + , facij = i == j ? F(0.5) : F(1.0) + , denominator(epsabc - ei - ej - ek) + , U(Zijk[i + No*j + No*No*k]) + , V(Zijk[i + No*k + No*No*j]) + , W(Zijk[j + No*i + No*No*k]) + , X(Zijk[j + No*k + No*No*i]) + , Y(Zijk[k + No*i + No*No*j]) + , Z(Zijk[k + No*j + No*No*i]) + , A(acc::maybeConjugateScalar(Tijk[i + No*j + No*No*k])) + , B(acc::maybeConjugateScalar(Tijk[i + No*k + No*No*j])) + , C(acc::maybeConjugateScalar(Tijk[j + No*i + No*No*k])) + , D(acc::maybeConjugateScalar(Tijk[j + No*k + No*No*i])) + , E(acc::maybeConjugateScalar(Tijk[k + No*i + No*No*j])) + , _F(acc::maybeConjugateScalar(Tijk[k + No*j + No*No*i])) + , value + = 3.0 * ( A * U + + B * V + + C * W + + D * X + + E * Y + + _F * Z ) + + ( ( U + X + Y ) + - 2.0 * ( V + W + Z ) + ) * ( A + D + E ) + + ( ( V + W + Z ) + - 2.0 * ( U + X + Y ) + ) * ( B + C + _F ) + ; + energy += 2.0 * value / denominator * facjk * facij; + } // i + } // j + } // k + } // ii + } // jj + } // kk + *_energy = acc::real(energy); +} + + +template +__MAYBE_GLOBAL__ +void getEnergySame + ( F const epsabc + , size_t const No + , F* const epsi + , F* const Tijk + , F* const Zijk + , double* _energy + ) { + constexpr size_t blockSize = 16; + F energy = F(0.); + for (size_t kk=0; kk k ? jj : k; + for(size_t j(jstart); j < jend; j++){ + const F facjk( j == k ? F(0.5) : F(1.0)); + const F ej(epsi[j]); + const size_t istart = ii > j ? ii : j; + for(size_t i(istart); i < iend; i++){ + const F + ei(epsi[i]) + , facij ( i==j ? F(0.5) : F(1.0)) + , denominator(epsabc - ei - ej - ek) + , U(Zijk[i + No*j + No*No*k]) + , V(Zijk[j + No*k + No*No*i]) + , W(Zijk[k + No*i + No*No*j]) + , A(acc::maybeConjugateScalar(Tijk[i + No*j + No*No*k])) + , B(acc::maybeConjugateScalar(Tijk[j + No*k + No*No*i])) + , C(acc::maybeConjugateScalar(Tijk[k + No*i + No*No*j])) + , value + = F(3.0) * ( A * U + + B * V + + C * W + ) + - ( A + B + C ) * ( U + V + W ) + ; + energy += F(2.0) * value / denominator * facjk * facij; + } // i + } // j + } // k + } // ii + } // jj + } // kk + *_energy = acc::real(energy); +} +// Energy:2 ends here +#endif /* defined(ATRIP_NEW_ENERGY) */ + // [[file:~/cuda/atrip/atrip.org::*Energy][Energy:3]] // instantiate double template @@ -274,6 +400,8 @@ void getEnergySame , DataFieldType* energy ); +// TODO: put this back in +#if defined(ATRIP_WITH_COMPLEX) // instantiate Complex template __MAYBE_GLOBAL__ @@ -297,6 +425,7 @@ void getEnergySame , DataFieldType* energy ); // Energy:3 ends here +#endif // [[file:~/cuda/atrip/atrip.org::*Singles%20contribution][Singles contribution:2]] template __MAYBE_GLOBAL__