Fix problem with complex numbers

This commit is contained in:
Alejandro Gallo 2022-07-14 00:17:24 +02:00
parent 565fb1dcc8
commit c757c4650c
2 changed files with 245 additions and 144 deletions

377
atrip.org
View File

@ -214,17 +214,49 @@ of a GPU architecture.
#+begin_src c++ :tangle (atrip-types-h) #+begin_src c++ :tangle (atrip-types-h)
#pragma once #pragma once
#include <atrip/Complex.hpp>
#include <atrip/Atrip.hpp>
namespace atrip {
template <typename F> template <typename F>
struct DataField;
template <>
struct DataField<double> {
using type = double;
};
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
template <typename F>
using DataPtr = CUdeviceptr; using DataPtr = CUdeviceptr;
#define DataNullPtr 0x00 #define DataNullPtr 0x00
#define _AT_(_array, _idx) ((F*)(_array))[(_idx)]
template <>
struct DataField<Complex> {
using type = cuDoubleComplex;
};
#else #else
template <typename F>
using DataPtr = F*; using DataPtr = F*;
#define DataNullPtr nullptr #define DataNullPtr nullptr
#define _AT_(_array, _idx) (_array)[(_idx)]
template <>
struct DataField<Complex> {
using type = Complex;
};
#endif #endif
template <typename F>
using DataFieldType = typename DataField<F>::type;
}
#+end_src #+end_src
@ -748,7 +780,7 @@ The main behaviour of the function should
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
// copy the retrieved mpi data to the device // copy the retrieved mpi data to the device
cuMemcpy((DataPtr<F>)mpi_data, data, size); cuMemcpyHtoD(data, (void*)mpi_data, sizeof(F) * size);
std::free(mpi_data); std::free(mpi_data);
#endif #endif
@ -2558,6 +2590,10 @@ tensor contractions.
#include<atrip/Blas.hpp> #include<atrip/Blas.hpp>
#include<atrip/Utils.hpp> #include<atrip/Utils.hpp>
#if defined(HAVE_CUDA)
#include<thrust/device_vector.h>
#endif
namespace atrip { namespace atrip {
using ABCTuple = std::array<size_t, 3>; using ABCTuple = std::array<size_t, 3>;
@ -2886,26 +2922,27 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \
, size_t const No , size_t const No
, size_t const Nv , size_t const Nv
// -- VABCI // -- VABCI
, F* const VABph , DataPtr<F> const VABph
, F* const VACph , DataPtr<F> const VACph
, F* const VBCph , DataPtr<F> const VBCph
, F* const VBAph , DataPtr<F> const VBAph
, F* const VCAph , DataPtr<F> const VCAph
, F* const VCBph , DataPtr<F> const VCBph
// -- VHHHA // -- VHHHA
, F* const VhhhA , DataPtr<F> const VhhhA
, F* const VhhhB , DataPtr<F> const VhhhB
, F* const VhhhC , DataPtr<F> const VhhhC
// -- TA // -- TA
, F* const TAphh , DataPtr<F> const TAphh
, F* const TBphh , DataPtr<F> const TBphh
, F* const TCphh , DataPtr<F> const TCphh
// -- TABIJ // -- TABIJ
, F* const TABhh , DataPtr<F> const TABhh
, F* const TAChh , DataPtr<F> const TAChh
, F* const TBChh , DataPtr<F> const TBChh
// -- TIJK // -- TIJK
, F* Tijk // , DataPtr<F> Tijk
, DataFieldType<F>* Tijk_
); );
#+end_src #+end_src
@ -2919,91 +2956,141 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \
, size_t const No , size_t const No
, size_t const Nv , size_t const Nv
// -- VABCI // -- VABCI
, F* const VABph , DataPtr<F> const VABph
, F* const VACph , DataPtr<F> const VACph
, F* const VBCph , DataPtr<F> const VBCph
, F* const VBAph , DataPtr<F> const VBAph
, F* const VCAph , DataPtr<F> const VCAph
, F* const VCBph , DataPtr<F> const VCBph
// -- VHHHA // -- VHHHA
, F* const VhhhA , DataPtr<F> const VhhhA
, F* const VhhhB , DataPtr<F> const VhhhB
, F* const VhhhC , DataPtr<F> const VhhhC
// -- TA // -- TA
, F* const TAphh , DataPtr<F> const TAphh
, F* const TBphh , DataPtr<F> const TBphh
, F* const TCphh , DataPtr<F> const TCphh
// -- TABIJ // -- TABIJ
, F* const TABhh , DataPtr<F> const TABhh
, F* const TAChh , DataPtr<F> const TAChh
, F* const TBChh , DataPtr<F> const TBChh
// -- TIJK // -- TIJK
, F* Tijk // , DataPtr<F> Tijk_
, DataFieldType<F>* Tijk_
) { ) {
const size_t a = abc[0], b = abc[1], c = abc[2] const size_t a = abc[0], b = abc[1], c = abc[2]
, NoNo = No*No, NoNv = No*Nv , NoNo = No*No, NoNv = No*Nv
; ;
#if defined(ATRIP_USE_DGEMM) typename DataField<F>::type* Tijk = (typename DataField<F>::type*) Tijk_;
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \ #if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \
WITH_CHRONO("doubles:reorder", \ WITH_CHRONO("doubles:reorder", \
for (size_t k = 0; k < No; k++) \ for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \ for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \ for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ Tijk[_IJK_(i, j, k)] += _t_buffer_p[_IJK_(__II, __JJ, __KK)]; \
} \ } \
) )
#define DGEMM_PARTICLES(__A, __B) \ #if defined(HAVE_CUDA)
atrip::xgemm<F>( "T" \ #define __TO_DEVICEPTR(_v) (DataFieldType<F>*)(CUdeviceptr)thrust::raw_pointer_cast((_v))
, "N" \ #define DGEMM_PARTICLES(__A, __B) \
, (int const*)&NoNo \ atrip::xgemm<F>("T", \
, (int const*)&No \ "N", \
, (int const*)&Nv \ (int const*)&NoNo, \
, &one \ (int const*)&No, \
, __A \ (int const*)&Nv, \
, (int const*)&Nv \ &one, \
, __B \ (DataFieldType<F>*)__A, \
, (int const*)&Nv \ (int const*)&Nv, \
, &zero \ (DataFieldType<F>*)__B, \
, _t_buffer.data() \ (int const*)&Nv, \
, (int const*)&NoNo \ &zero, \
_t_buffer_p, \
(int const*)&NoNo);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::xgemm<F>("N", \
__TRANSB, \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&No, \
&m_one, \
__TO_DEVICEPTR(__A), \
(int const*)&NoNo, \
(DataFieldType<F>*)__B, \
(int const*)&No, \
&zero, \
_t_buffer_p, \
(int const*)&NoNo \
); );
#define DGEMM_HOLES(__A, __B, __TRANSB) \ #define MAYBE_CONJ(_conj, _buffer) \
atrip::xgemm<F>( "N" \
, __TRANSB \
, (int const*)&NoNo \
, (int const*)&No \
, (int const*)&No \
, &m_one \
, __A \
, (int const*)&NoNo \
, __B \
, (int const*)&No \
, &zero \
, _t_buffer.data() \
, (int const*)&NoNo \
);
#define MAYBE_CONJ(_conj, _buffer) \
for (size_t __i = 0; __i < NoNoNo; ++__i) \ for (size_t __i = 0; __i < NoNoNo; ++__i) \
_conj[__i] = maybeConjugate<F>(_buffer[__i]); \ _conj[__i] = \
maybeConjugate<DataFieldType<F>>(((DataFieldType<F>*)_buffer)[__i]);
#else
#define __TO_DEVICEPTR(_v) (_v)
#define DGEMM_PARTICLES(__A, __B) \
atrip::xgemm<F>("T", \
"N", \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&Nv, \
&one, \
__A, \
(int const*)&Nv, \
__B, \
(int const*)&Nv, \
&zero, \
_t_buffer_p, \
(int const*)&NoNo \
);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::xgemm<F>("N", \
__TRANSB, \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&No, \
&m_one, \
__A, \
(int const*)&NoNo, \
__B, \
(int const*)&No, \
&zero, \
_t_buffer_p, \
(int const*)&NoNo \
);
#define MAYBE_CONJ(_conj, _buffer) \
for (size_t __i = 0; __i < NoNoNo; ++__i) \
_conj[__i] = maybeConjugate<F>(_buffer[__i]);
#endif
const size_t NoNoNo = No*NoNo; const size_t NoNoNo = No*NoNo;
#ifdef HAVE_CUDA
thrust::device_vector< DataFieldType<F> > _t_buffer;
#else
std::vector<F> _t_buffer; std::vector<F> _t_buffer;
#endif
_t_buffer.reserve(NoNoNo); _t_buffer.reserve(NoNoNo);
DataFieldType<F>* _t_buffer_p = __TO_DEVICEPTR(_t_buffer.data());
F one{1.0}, m_one{-1.0}, zero{0.0}; F one{1.0}, m_one{-1.0}, zero{0.0};
WITH_CHRONO("double:reorder", WITH_CHRONO("double:reorder",
for (size_t k = 0; k < NoNoNo; k++) { for (size_t k = 0; k < NoNoNo; k++) {
Tijk[k] = 0.0; Tijk[k] = DataFieldType<F>{0.0};
}) })
// TOMERGE: replace chronos // TOMERGE: replace chronos
WITH_CHRONO("doubles:holes", WITH_CHRONO("doubles:holes",
{ // Holes part %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% { // Holes part %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#ifdef HAVE_CUDA
thrust::device_vector< DataFieldType<F> > _vhhh(NoNoNo);
#else
std::vector<F> _vhhh(NoNoNo); std::vector<F> _vhhh(NoNoNo);
#endif
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
MAYBE_CONJ(_vhhh, VhhhC) MAYBE_CONJ(_vhhh, VhhhC)
@ -3135,59 +3222,59 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \
// instantiate templates // instantiate templates
template template
void doublesContribution void doublesContribution<double>
( const ABCTuple &abc ( const ABCTuple &abc
, size_t const No , size_t const No
, size_t const Nv , size_t const Nv
// -- VABCI // -- VABCI
, double* const VABph , DataPtr<double> const VABph
, double* const VACph , DataPtr<double> const VACph
, double* const VBCph , DataPtr<double> const VBCph
, double* const VBAph , DataPtr<double> const VBAph
, double* const VCAph , DataPtr<double> const VCAph
, double* const VCBph , DataPtr<double> const VCBph
// -- VHHHA // -- VHHHA
, double* const VhhhA , DataPtr<double> const VhhhA
, double* const VhhhB , DataPtr<double> const VhhhB
, double* const VhhhC , DataPtr<double> const VhhhC
// -- TA // -- TA
, double* const TAphh , DataPtr<double> const TAphh
, double* const TBphh , DataPtr<double> const TBphh
, double* const TCphh , DataPtr<double> const TCphh
// -- TABIJ // -- TABIJ
, double* const TABhh , DataPtr<double> const TABhh
, double* const TAChh , DataPtr<double> const TAChh
, double* const TBChh , DataPtr<double> const TBChh
// -- TIJK // -- TIJK
, double* Tijk , DataFieldType<double>* Tijk
); );
template template
void doublesContribution void doublesContribution<Complex>
( const ABCTuple &abc ( const ABCTuple &abc
, size_t const No , size_t const No
, size_t const Nv , size_t const Nv
// -- VABCI // -- VABCI
, Complex* const VABph , DataPtr<Complex> const VABph
, Complex* const VACph , DataPtr<Complex> const VACph
, Complex* const VBCph , DataPtr<Complex> const VBCph
, Complex* const VBAph , DataPtr<Complex> const VBAph
, Complex* const VCAph , DataPtr<Complex> const VCAph
, Complex* const VCBph , DataPtr<Complex> const VCBph
// -- VHHHA // -- VHHHA
, Complex* const VhhhA , DataPtr<Complex> const VhhhA
, Complex* const VhhhB , DataPtr<Complex> const VhhhB
, Complex* const VhhhC , DataPtr<Complex> const VhhhC
// -- TA // -- TA
, Complex* const TAphh , DataPtr<Complex> const TAphh
, Complex* const TBphh , DataPtr<Complex> const TBphh
, Complex* const TCphh , DataPtr<Complex> const TCphh
// -- TABIJ // -- TABIJ
, Complex* const TABhh , DataPtr<Complex> const TABhh
, Complex* const TAChh , DataPtr<Complex> const TAChh
, Complex* const TBChh , DataPtr<Complex> const TBChh
// -- TIJK // -- TIJK
, Complex* Tijk , DataFieldType<Complex>* Tijk
); );
#+end_src #+end_src
@ -3212,6 +3299,7 @@ is mainly using the =DGEMM= function, which we declare as
#pragma once #pragma once
#include <atrip/Complex.hpp> #include <atrip/Complex.hpp>
#include <atrip/Types.hpp>
#include "config.h" #include "config.h"
namespace atrip { namespace atrip {
@ -3259,12 +3347,12 @@ namespace atrip {
const int *n, const int *n,
const int *k, const int *k,
F *alpha, F *alpha,
const F *A, const DataFieldType<F> *A,
const int *lda, const int *lda,
const F *B, const DataFieldType<F> *B,
const int *ldb, const int *ldb,
F *beta, F *beta,
F *C, DataFieldType<F> *C,
const int *ldc); const int *ldc);
} }
#+end_src #+end_src
@ -3292,18 +3380,18 @@ namespace atrip {
template <> template <>
void xgemm(const char *transa, void xgemm<double>(const char *transa,
const char *transb, const char *transb,
const int *m, const int *m,
const int *n, const int *n,
const int *k, const int *k,
double *alpha, double *alpha,
const double *A, const typename DataField<double>::type *A,
const int *lda, const int *lda,
const double *B, const typename DataField<double>::type *B,
const int *ldb, const int *ldb,
double *beta, double *beta,
double *C, typename DataField<double>::type *C,
const int *ldc) { const int *ldc) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
cublasDgemm(Atrip::cuda.handle, cublasDgemm(Atrip::cuda.handle,
@ -3323,20 +3411,21 @@ namespace atrip {
} }
template <> template <>
void xgemm(const char *transa, void xgemm<Complex>(const char *transa,
const char *transb, const char *transb,
const int *m, const int *m,
const int *n, const int *n,
const int *k, const int *k,
Complex *alpha, Complex *alpha,
const Complex *A, const typename DataField<Complex>::type *A,
const int *lda, const int *lda,
const Complex *B, const typename DataField<Complex>::type *B,
const int *ldb, const int *ldb,
Complex *beta, Complex *beta,
Complex *C, typename DataField<Complex>::type *C,
const int *ldc) { const int *ldc) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
#pragma warning HAVE_CUDA
cuDoubleComplex cuDoubleComplex
cu_alpha = {std::real(*alpha), std::imag(*alpha)}, cu_alpha = {std::real(*alpha), std::imag(*alpha)},
cu_beta = {std::real(*beta), std::imag(*beta)}; cu_beta = {std::real(*beta), std::imag(*beta)};
@ -3345,10 +3434,11 @@ namespace atrip {
char_to_cublasOperation(transb), char_to_cublasOperation(transb),
,*m, *n, *k, ,*m, *n, *k,
&cu_alpha, &cu_alpha,
reinterpret_cast<const cuDoubleComplex*>(A), *lda,
reinterpret_cast<const cuDoubleComplex*>(B), *ldb, A, *lda,
B, *ldb,
&cu_beta, &cu_beta,
reinterpret_cast<const cuDoubleComplex*>(C), *ldc); C, *ldc);
#else #else
zgemm_(transa, transb, zgemm_(transa, transb,
m, n, k, m, n, k,
@ -3531,9 +3621,9 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
cuMemAlloc(&epsi, sizeof(F) * _epsi.size()); cuMemAlloc(&epsi, sizeof(F) * _epsi.size());
cuMemAlloc(&epsa, sizeof(F) * _epsa.size()); cuMemAlloc(&epsa, sizeof(F) * _epsa.size());
cuMemcpy(Tai, (DataPtr<F>)_Tai.data(), sizeof(F) * _Tai.size()); cuMemcpyHtoD(Tai, (void*)_Tai.data(), sizeof(F) * _Tai.size());
cuMemcpy(epsi,(DataPtr<F>)_epsi.data(), sizeof(F) * _epsi.size()); cuMemcpyHtoD(epsi,(void*)_epsi.data(), sizeof(F) * _epsi.size());
cuMemcpy(epsa, (DataPtr<F>)_epsa.data(), sizeof(F) * _epsa.size()); cuMemcpyHtoD(epsa, (void*)_epsa.data(), sizeof(F) * _epsa.size());
DataPtr<F> Tijk, Zijk; DataPtr<F> Tijk, Zijk;
cuMemAlloc(&Tijk, sizeof(F) * No * No * No); cuMemAlloc(&Tijk, sizeof(F) * No * No * No);
@ -3930,27 +4020,27 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
WITH_CHRONO("doubles", WITH_CHRONO("doubles",
doublesContribution<F>( abc, (size_t)No, (size_t)Nv doublesContribution<F>( abc, (size_t)No, (size_t)Nv
// -- VABCI // -- VABCI
, (F*)abph.unwrapSlice(Slice<F>::AB, abc) , abph.unwrapSlice(Slice<F>::AB, abc)
, (F*)abph.unwrapSlice(Slice<F>::AC, abc) , abph.unwrapSlice(Slice<F>::AC, abc)
, (F*)abph.unwrapSlice(Slice<F>::BC, abc) , abph.unwrapSlice(Slice<F>::BC, abc)
, (F*)abph.unwrapSlice(Slice<F>::BA, abc) , abph.unwrapSlice(Slice<F>::BA, abc)
, (F*)abph.unwrapSlice(Slice<F>::CA, abc) , abph.unwrapSlice(Slice<F>::CA, abc)
, (F*)abph.unwrapSlice(Slice<F>::CB, abc) , abph.unwrapSlice(Slice<F>::CB, abc)
// -- VHHHA // -- VHHHA
, (F*)hhha.unwrapSlice(Slice<F>::A, abc) , hhha.unwrapSlice(Slice<F>::A, abc)
, (F*)hhha.unwrapSlice(Slice<F>::B, abc) , hhha.unwrapSlice(Slice<F>::B, abc)
, (F*)hhha.unwrapSlice(Slice<F>::C, abc) , hhha.unwrapSlice(Slice<F>::C, abc)
// -- TA // -- TA
, (F*)taphh.unwrapSlice(Slice<F>::A, abc) , taphh.unwrapSlice(Slice<F>::A, abc)
, (F*)taphh.unwrapSlice(Slice<F>::B, abc) , taphh.unwrapSlice(Slice<F>::B, abc)
, (F*)taphh.unwrapSlice(Slice<F>::C, abc) , taphh.unwrapSlice(Slice<F>::C, abc)
// -- TABIJ // -- TABIJ
, (F*)tabhh.unwrapSlice(Slice<F>::AB, abc) , tabhh.unwrapSlice(Slice<F>::AB, abc)
, (F*)tabhh.unwrapSlice(Slice<F>::AC, abc) , tabhh.unwrapSlice(Slice<F>::AC, abc)
, (F*)tabhh.unwrapSlice(Slice<F>::BC, abc) , tabhh.unwrapSlice(Slice<F>::BC, abc)
// -- TIJK // -- TIJK
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
, (F*)Tijk , (DataFieldType<F>*)Tijk
#else #else
, Tijk.data() , Tijk.data()
#endif #endif
@ -4423,6 +4513,10 @@ namespace atrip {
template <typename F> F maybeConjugate(const F); template <typename F> F maybeConjugate(const F);
#if defined(HAVE_CUDA)
void operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz);
#endif
namespace traits { namespace traits {
template <typename FF> bool isComplex(); template <typename FF> bool isComplex();
@ -4445,6 +4539,13 @@ namespace atrip {
template <> double maybeConjugate(const double a) { return a; } template <> double maybeConjugate(const double a) { return a; }
template <> Complex maybeConjugate(const Complex a) { return std::conj(a); } template <> Complex maybeConjugate(const Complex a) { return std::conj(a); }
#if defined(HAVE_CUDA)
void operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz) {
lz.x += rz.x;
lz.y += rz.y;
}
#endif
namespace traits { namespace traits {
template <typename F> bool isComplex() { return false; } template <typename F> bool isComplex() { return false; }

View File

@ -7,19 +7,19 @@
let let
mkl = import ./etc/nix/mkl.nix { pkgs = (import <nixpkgs> { unfree-pkgs = import <nixpkgs> {
config.allowUnfree = true; config.allowUnfree = true;
}); }; };
openblas = import ./etc/nix/openblas.nix { inherit pkgs; }; openblas = import ./etc/nix/openblas.nix { inherit pkgs; };
cuda-pkg = if cuda then (import ./cuda.nix { inherit pkgs; }) else {}; mkl = import ./etc/nix/mkl.nix { pkgs = unfree-pkgs; };
cuda-pkg = if cuda then (import ./cuda.nix { pkgs = unfree-pkgs; }) else {};
in in
pkgs.mkShell rec { pkgs.mkShell rec {
compiler-pkg compiler-pkg
= if compiler == "gcc11" then pkgs.gcc11 = if compiler == "gcc11" then pkgs.gcc11
else if compiler == "gcc10" then pkgs.gcc10 else if compiler == "gcc10" then pkgs.gcc10