Fix problem with complex numbers
This commit is contained in:
parent
565fb1dcc8
commit
c757c4650c
377
atrip.org
377
atrip.org
@ -214,17 +214,49 @@ of a GPU architecture.
|
|||||||
|
|
||||||
#+begin_src c++ :tangle (atrip-types-h)
|
#+begin_src c++ :tangle (atrip-types-h)
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#include <atrip/Complex.hpp>
|
||||||
|
#include <atrip/Atrip.hpp>
|
||||||
|
|
||||||
|
namespace atrip {
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
|
struct DataField;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct DataField<double> {
|
||||||
|
using type = double;
|
||||||
|
};
|
||||||
|
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
|
|
||||||
|
template <typename F>
|
||||||
using DataPtr = CUdeviceptr;
|
using DataPtr = CUdeviceptr;
|
||||||
#define DataNullPtr 0x00
|
#define DataNullPtr 0x00
|
||||||
#define _AT_(_array, _idx) ((F*)(_array))[(_idx)]
|
|
||||||
|
template <>
|
||||||
|
struct DataField<Complex> {
|
||||||
|
using type = cuDoubleComplex;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
template <typename F>
|
||||||
using DataPtr = F*;
|
using DataPtr = F*;
|
||||||
#define DataNullPtr nullptr
|
#define DataNullPtr nullptr
|
||||||
#define _AT_(_array, _idx) (_array)[(_idx)]
|
|
||||||
|
template <>
|
||||||
|
struct DataField<Complex> {
|
||||||
|
using type = Complex;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
template <typename F>
|
||||||
|
using DataFieldType = typename DataField<F>::type;
|
||||||
|
|
||||||
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
|
|
||||||
@ -748,7 +780,7 @@ The main behaviour of the function should
|
|||||||
|
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
// copy the retrieved mpi data to the device
|
// copy the retrieved mpi data to the device
|
||||||
cuMemcpy((DataPtr<F>)mpi_data, data, size);
|
cuMemcpyHtoD(data, (void*)mpi_data, sizeof(F) * size);
|
||||||
std::free(mpi_data);
|
std::free(mpi_data);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -2558,6 +2590,10 @@ tensor contractions.
|
|||||||
#include<atrip/Blas.hpp>
|
#include<atrip/Blas.hpp>
|
||||||
#include<atrip/Utils.hpp>
|
#include<atrip/Utils.hpp>
|
||||||
|
|
||||||
|
#if defined(HAVE_CUDA)
|
||||||
|
#include<thrust/device_vector.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
using ABCTuple = std::array<size_t, 3>;
|
using ABCTuple = std::array<size_t, 3>;
|
||||||
@ -2886,26 +2922,27 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \
|
|||||||
, size_t const No
|
, size_t const No
|
||||||
, size_t const Nv
|
, size_t const Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, F* const VABph
|
, DataPtr<F> const VABph
|
||||||
, F* const VACph
|
, DataPtr<F> const VACph
|
||||||
, F* const VBCph
|
, DataPtr<F> const VBCph
|
||||||
, F* const VBAph
|
, DataPtr<F> const VBAph
|
||||||
, F* const VCAph
|
, DataPtr<F> const VCAph
|
||||||
, F* const VCBph
|
, DataPtr<F> const VCBph
|
||||||
// -- VHHHA
|
// -- VHHHA
|
||||||
, F* const VhhhA
|
, DataPtr<F> const VhhhA
|
||||||
, F* const VhhhB
|
, DataPtr<F> const VhhhB
|
||||||
, F* const VhhhC
|
, DataPtr<F> const VhhhC
|
||||||
// -- TA
|
// -- TA
|
||||||
, F* const TAphh
|
, DataPtr<F> const TAphh
|
||||||
, F* const TBphh
|
, DataPtr<F> const TBphh
|
||||||
, F* const TCphh
|
, DataPtr<F> const TCphh
|
||||||
// -- TABIJ
|
// -- TABIJ
|
||||||
, F* const TABhh
|
, DataPtr<F> const TABhh
|
||||||
, F* const TAChh
|
, DataPtr<F> const TAChh
|
||||||
, F* const TBChh
|
, DataPtr<F> const TBChh
|
||||||
// -- TIJK
|
// -- TIJK
|
||||||
, F* Tijk
|
// , DataPtr<F> Tijk
|
||||||
|
, DataFieldType<F>* Tijk_
|
||||||
);
|
);
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
@ -2919,91 +2956,141 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \
|
|||||||
, size_t const No
|
, size_t const No
|
||||||
, size_t const Nv
|
, size_t const Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, F* const VABph
|
, DataPtr<F> const VABph
|
||||||
, F* const VACph
|
, DataPtr<F> const VACph
|
||||||
, F* const VBCph
|
, DataPtr<F> const VBCph
|
||||||
, F* const VBAph
|
, DataPtr<F> const VBAph
|
||||||
, F* const VCAph
|
, DataPtr<F> const VCAph
|
||||||
, F* const VCBph
|
, DataPtr<F> const VCBph
|
||||||
// -- VHHHA
|
// -- VHHHA
|
||||||
, F* const VhhhA
|
, DataPtr<F> const VhhhA
|
||||||
, F* const VhhhB
|
, DataPtr<F> const VhhhB
|
||||||
, F* const VhhhC
|
, DataPtr<F> const VhhhC
|
||||||
// -- TA
|
// -- TA
|
||||||
, F* const TAphh
|
, DataPtr<F> const TAphh
|
||||||
, F* const TBphh
|
, DataPtr<F> const TBphh
|
||||||
, F* const TCphh
|
, DataPtr<F> const TCphh
|
||||||
// -- TABIJ
|
// -- TABIJ
|
||||||
, F* const TABhh
|
, DataPtr<F> const TABhh
|
||||||
, F* const TAChh
|
, DataPtr<F> const TAChh
|
||||||
, F* const TBChh
|
, DataPtr<F> const TBChh
|
||||||
// -- TIJK
|
// -- TIJK
|
||||||
, F* Tijk
|
// , DataPtr<F> Tijk_
|
||||||
|
, DataFieldType<F>* Tijk_
|
||||||
) {
|
) {
|
||||||
|
|
||||||
const size_t a = abc[0], b = abc[1], c = abc[2]
|
const size_t a = abc[0], b = abc[1], c = abc[2]
|
||||||
, NoNo = No*No, NoNv = No*Nv
|
, NoNo = No*No, NoNv = No*Nv
|
||||||
;
|
;
|
||||||
|
|
||||||
#if defined(ATRIP_USE_DGEMM)
|
typename DataField<F>::type* Tijk = (typename DataField<F>::type*) Tijk_;
|
||||||
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
|
||||||
#define REORDER(__II, __JJ, __KK) \
|
#if defined(ATRIP_USE_DGEMM)
|
||||||
|
#define _IJK_(i, j, k) i + j*No + k*NoNo
|
||||||
|
#define REORDER(__II, __JJ, __KK) \
|
||||||
WITH_CHRONO("doubles:reorder", \
|
WITH_CHRONO("doubles:reorder", \
|
||||||
for (size_t k = 0; k < No; k++) \
|
for (size_t k = 0; k < No; k++) \
|
||||||
for (size_t j = 0; j < No; j++) \
|
for (size_t j = 0; j < No; j++) \
|
||||||
for (size_t i = 0; i < No; i++) { \
|
for (size_t i = 0; i < No; i++) { \
|
||||||
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
|
Tijk[_IJK_(i, j, k)] += _t_buffer_p[_IJK_(__II, __JJ, __KK)]; \
|
||||||
} \
|
} \
|
||||||
)
|
)
|
||||||
#define DGEMM_PARTICLES(__A, __B) \
|
#if defined(HAVE_CUDA)
|
||||||
atrip::xgemm<F>( "T" \
|
#define __TO_DEVICEPTR(_v) (DataFieldType<F>*)(CUdeviceptr)thrust::raw_pointer_cast((_v))
|
||||||
, "N" \
|
#define DGEMM_PARTICLES(__A, __B) \
|
||||||
, (int const*)&NoNo \
|
atrip::xgemm<F>("T", \
|
||||||
, (int const*)&No \
|
"N", \
|
||||||
, (int const*)&Nv \
|
(int const*)&NoNo, \
|
||||||
, &one \
|
(int const*)&No, \
|
||||||
, __A \
|
(int const*)&Nv, \
|
||||||
, (int const*)&Nv \
|
&one, \
|
||||||
, __B \
|
(DataFieldType<F>*)__A, \
|
||||||
, (int const*)&Nv \
|
(int const*)&Nv, \
|
||||||
, &zero \
|
(DataFieldType<F>*)__B, \
|
||||||
, _t_buffer.data() \
|
(int const*)&Nv, \
|
||||||
, (int const*)&NoNo \
|
&zero, \
|
||||||
|
_t_buffer_p, \
|
||||||
|
(int const*)&NoNo);
|
||||||
|
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||||
|
atrip::xgemm<F>("N", \
|
||||||
|
__TRANSB, \
|
||||||
|
(int const*)&NoNo, \
|
||||||
|
(int const*)&No, \
|
||||||
|
(int const*)&No, \
|
||||||
|
&m_one, \
|
||||||
|
__TO_DEVICEPTR(__A), \
|
||||||
|
(int const*)&NoNo, \
|
||||||
|
(DataFieldType<F>*)__B, \
|
||||||
|
(int const*)&No, \
|
||||||
|
&zero, \
|
||||||
|
_t_buffer_p, \
|
||||||
|
(int const*)&NoNo \
|
||||||
);
|
);
|
||||||
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
#define MAYBE_CONJ(_conj, _buffer) \
|
||||||
atrip::xgemm<F>( "N" \
|
|
||||||
, __TRANSB \
|
|
||||||
, (int const*)&NoNo \
|
|
||||||
, (int const*)&No \
|
|
||||||
, (int const*)&No \
|
|
||||||
, &m_one \
|
|
||||||
, __A \
|
|
||||||
, (int const*)&NoNo \
|
|
||||||
, __B \
|
|
||||||
, (int const*)&No \
|
|
||||||
, &zero \
|
|
||||||
, _t_buffer.data() \
|
|
||||||
, (int const*)&NoNo \
|
|
||||||
);
|
|
||||||
#define MAYBE_CONJ(_conj, _buffer) \
|
|
||||||
for (size_t __i = 0; __i < NoNoNo; ++__i) \
|
for (size_t __i = 0; __i < NoNoNo; ++__i) \
|
||||||
_conj[__i] = maybeConjugate<F>(_buffer[__i]); \
|
_conj[__i] = \
|
||||||
|
maybeConjugate<DataFieldType<F>>(((DataFieldType<F>*)_buffer)[__i]);
|
||||||
|
#else
|
||||||
|
#define __TO_DEVICEPTR(_v) (_v)
|
||||||
|
#define DGEMM_PARTICLES(__A, __B) \
|
||||||
|
atrip::xgemm<F>("T", \
|
||||||
|
"N", \
|
||||||
|
(int const*)&NoNo, \
|
||||||
|
(int const*)&No, \
|
||||||
|
(int const*)&Nv, \
|
||||||
|
&one, \
|
||||||
|
__A, \
|
||||||
|
(int const*)&Nv, \
|
||||||
|
__B, \
|
||||||
|
(int const*)&Nv, \
|
||||||
|
&zero, \
|
||||||
|
_t_buffer_p, \
|
||||||
|
(int const*)&NoNo \
|
||||||
|
);
|
||||||
|
#define DGEMM_HOLES(__A, __B, __TRANSB) \
|
||||||
|
atrip::xgemm<F>("N", \
|
||||||
|
__TRANSB, \
|
||||||
|
(int const*)&NoNo, \
|
||||||
|
(int const*)&No, \
|
||||||
|
(int const*)&No, \
|
||||||
|
&m_one, \
|
||||||
|
__A, \
|
||||||
|
(int const*)&NoNo, \
|
||||||
|
__B, \
|
||||||
|
(int const*)&No, \
|
||||||
|
&zero, \
|
||||||
|
_t_buffer_p, \
|
||||||
|
(int const*)&NoNo \
|
||||||
|
);
|
||||||
|
#define MAYBE_CONJ(_conj, _buffer) \
|
||||||
|
for (size_t __i = 0; __i < NoNoNo; ++__i) \
|
||||||
|
_conj[__i] = maybeConjugate<F>(_buffer[__i]);
|
||||||
|
#endif
|
||||||
|
|
||||||
const size_t NoNoNo = No*NoNo;
|
const size_t NoNoNo = No*NoNo;
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
thrust::device_vector< DataFieldType<F> > _t_buffer;
|
||||||
|
#else
|
||||||
std::vector<F> _t_buffer;
|
std::vector<F> _t_buffer;
|
||||||
|
#endif
|
||||||
_t_buffer.reserve(NoNoNo);
|
_t_buffer.reserve(NoNoNo);
|
||||||
|
DataFieldType<F>* _t_buffer_p = __TO_DEVICEPTR(_t_buffer.data());
|
||||||
F one{1.0}, m_one{-1.0}, zero{0.0};
|
F one{1.0}, m_one{-1.0}, zero{0.0};
|
||||||
|
|
||||||
WITH_CHRONO("double:reorder",
|
WITH_CHRONO("double:reorder",
|
||||||
for (size_t k = 0; k < NoNoNo; k++) {
|
for (size_t k = 0; k < NoNoNo; k++) {
|
||||||
Tijk[k] = 0.0;
|
Tijk[k] = DataFieldType<F>{0.0};
|
||||||
})
|
})
|
||||||
|
|
||||||
// TOMERGE: replace chronos
|
// TOMERGE: replace chronos
|
||||||
WITH_CHRONO("doubles:holes",
|
WITH_CHRONO("doubles:holes",
|
||||||
{ // Holes part %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
{ // Holes part %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
|
#ifdef HAVE_CUDA
|
||||||
|
thrust::device_vector< DataFieldType<F> > _vhhh(NoNoNo);
|
||||||
|
#else
|
||||||
std::vector<F> _vhhh(NoNoNo);
|
std::vector<F> _vhhh(NoNoNo);
|
||||||
|
#endif
|
||||||
|
|
||||||
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
|
||||||
MAYBE_CONJ(_vhhh, VhhhC)
|
MAYBE_CONJ(_vhhh, VhhhC)
|
||||||
@ -3135,59 +3222,59 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \
|
|||||||
|
|
||||||
// instantiate templates
|
// instantiate templates
|
||||||
template
|
template
|
||||||
void doublesContribution
|
void doublesContribution<double>
|
||||||
( const ABCTuple &abc
|
( const ABCTuple &abc
|
||||||
, size_t const No
|
, size_t const No
|
||||||
, size_t const Nv
|
, size_t const Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, double* const VABph
|
, DataPtr<double> const VABph
|
||||||
, double* const VACph
|
, DataPtr<double> const VACph
|
||||||
, double* const VBCph
|
, DataPtr<double> const VBCph
|
||||||
, double* const VBAph
|
, DataPtr<double> const VBAph
|
||||||
, double* const VCAph
|
, DataPtr<double> const VCAph
|
||||||
, double* const VCBph
|
, DataPtr<double> const VCBph
|
||||||
// -- VHHHA
|
// -- VHHHA
|
||||||
, double* const VhhhA
|
, DataPtr<double> const VhhhA
|
||||||
, double* const VhhhB
|
, DataPtr<double> const VhhhB
|
||||||
, double* const VhhhC
|
, DataPtr<double> const VhhhC
|
||||||
// -- TA
|
// -- TA
|
||||||
, double* const TAphh
|
, DataPtr<double> const TAphh
|
||||||
, double* const TBphh
|
, DataPtr<double> const TBphh
|
||||||
, double* const TCphh
|
, DataPtr<double> const TCphh
|
||||||
// -- TABIJ
|
// -- TABIJ
|
||||||
, double* const TABhh
|
, DataPtr<double> const TABhh
|
||||||
, double* const TAChh
|
, DataPtr<double> const TAChh
|
||||||
, double* const TBChh
|
, DataPtr<double> const TBChh
|
||||||
// -- TIJK
|
// -- TIJK
|
||||||
, double* Tijk
|
, DataFieldType<double>* Tijk
|
||||||
);
|
);
|
||||||
|
|
||||||
template
|
template
|
||||||
void doublesContribution
|
void doublesContribution<Complex>
|
||||||
( const ABCTuple &abc
|
( const ABCTuple &abc
|
||||||
, size_t const No
|
, size_t const No
|
||||||
, size_t const Nv
|
, size_t const Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, Complex* const VABph
|
, DataPtr<Complex> const VABph
|
||||||
, Complex* const VACph
|
, DataPtr<Complex> const VACph
|
||||||
, Complex* const VBCph
|
, DataPtr<Complex> const VBCph
|
||||||
, Complex* const VBAph
|
, DataPtr<Complex> const VBAph
|
||||||
, Complex* const VCAph
|
, DataPtr<Complex> const VCAph
|
||||||
, Complex* const VCBph
|
, DataPtr<Complex> const VCBph
|
||||||
// -- VHHHA
|
// -- VHHHA
|
||||||
, Complex* const VhhhA
|
, DataPtr<Complex> const VhhhA
|
||||||
, Complex* const VhhhB
|
, DataPtr<Complex> const VhhhB
|
||||||
, Complex* const VhhhC
|
, DataPtr<Complex> const VhhhC
|
||||||
// -- TA
|
// -- TA
|
||||||
, Complex* const TAphh
|
, DataPtr<Complex> const TAphh
|
||||||
, Complex* const TBphh
|
, DataPtr<Complex> const TBphh
|
||||||
, Complex* const TCphh
|
, DataPtr<Complex> const TCphh
|
||||||
// -- TABIJ
|
// -- TABIJ
|
||||||
, Complex* const TABhh
|
, DataPtr<Complex> const TABhh
|
||||||
, Complex* const TAChh
|
, DataPtr<Complex> const TAChh
|
||||||
, Complex* const TBChh
|
, DataPtr<Complex> const TBChh
|
||||||
// -- TIJK
|
// -- TIJK
|
||||||
, Complex* Tijk
|
, DataFieldType<Complex>* Tijk
|
||||||
);
|
);
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
@ -3212,6 +3299,7 @@ is mainly using the =DGEMM= function, which we declare as
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <atrip/Complex.hpp>
|
#include <atrip/Complex.hpp>
|
||||||
|
#include <atrip/Types.hpp>
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
@ -3259,12 +3347,12 @@ namespace atrip {
|
|||||||
const int *n,
|
const int *n,
|
||||||
const int *k,
|
const int *k,
|
||||||
F *alpha,
|
F *alpha,
|
||||||
const F *A,
|
const DataFieldType<F> *A,
|
||||||
const int *lda,
|
const int *lda,
|
||||||
const F *B,
|
const DataFieldType<F> *B,
|
||||||
const int *ldb,
|
const int *ldb,
|
||||||
F *beta,
|
F *beta,
|
||||||
F *C,
|
DataFieldType<F> *C,
|
||||||
const int *ldc);
|
const int *ldc);
|
||||||
}
|
}
|
||||||
#+end_src
|
#+end_src
|
||||||
@ -3292,18 +3380,18 @@ namespace atrip {
|
|||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void xgemm(const char *transa,
|
void xgemm<double>(const char *transa,
|
||||||
const char *transb,
|
const char *transb,
|
||||||
const int *m,
|
const int *m,
|
||||||
const int *n,
|
const int *n,
|
||||||
const int *k,
|
const int *k,
|
||||||
double *alpha,
|
double *alpha,
|
||||||
const double *A,
|
const typename DataField<double>::type *A,
|
||||||
const int *lda,
|
const int *lda,
|
||||||
const double *B,
|
const typename DataField<double>::type *B,
|
||||||
const int *ldb,
|
const int *ldb,
|
||||||
double *beta,
|
double *beta,
|
||||||
double *C,
|
typename DataField<double>::type *C,
|
||||||
const int *ldc) {
|
const int *ldc) {
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
cublasDgemm(Atrip::cuda.handle,
|
cublasDgemm(Atrip::cuda.handle,
|
||||||
@ -3323,20 +3411,21 @@ namespace atrip {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void xgemm(const char *transa,
|
void xgemm<Complex>(const char *transa,
|
||||||
const char *transb,
|
const char *transb,
|
||||||
const int *m,
|
const int *m,
|
||||||
const int *n,
|
const int *n,
|
||||||
const int *k,
|
const int *k,
|
||||||
Complex *alpha,
|
Complex *alpha,
|
||||||
const Complex *A,
|
const typename DataField<Complex>::type *A,
|
||||||
const int *lda,
|
const int *lda,
|
||||||
const Complex *B,
|
const typename DataField<Complex>::type *B,
|
||||||
const int *ldb,
|
const int *ldb,
|
||||||
Complex *beta,
|
Complex *beta,
|
||||||
Complex *C,
|
typename DataField<Complex>::type *C,
|
||||||
const int *ldc) {
|
const int *ldc) {
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
|
#pragma warning HAVE_CUDA
|
||||||
cuDoubleComplex
|
cuDoubleComplex
|
||||||
cu_alpha = {std::real(*alpha), std::imag(*alpha)},
|
cu_alpha = {std::real(*alpha), std::imag(*alpha)},
|
||||||
cu_beta = {std::real(*beta), std::imag(*beta)};
|
cu_beta = {std::real(*beta), std::imag(*beta)};
|
||||||
@ -3345,10 +3434,11 @@ namespace atrip {
|
|||||||
char_to_cublasOperation(transb),
|
char_to_cublasOperation(transb),
|
||||||
,*m, *n, *k,
|
,*m, *n, *k,
|
||||||
&cu_alpha,
|
&cu_alpha,
|
||||||
reinterpret_cast<const cuDoubleComplex*>(A), *lda,
|
|
||||||
reinterpret_cast<const cuDoubleComplex*>(B), *ldb,
|
A, *lda,
|
||||||
|
B, *ldb,
|
||||||
&cu_beta,
|
&cu_beta,
|
||||||
reinterpret_cast<const cuDoubleComplex*>(C), *ldc);
|
C, *ldc);
|
||||||
#else
|
#else
|
||||||
zgemm_(transa, transb,
|
zgemm_(transa, transb,
|
||||||
m, n, k,
|
m, n, k,
|
||||||
@ -3531,9 +3621,9 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
cuMemAlloc(&epsi, sizeof(F) * _epsi.size());
|
cuMemAlloc(&epsi, sizeof(F) * _epsi.size());
|
||||||
cuMemAlloc(&epsa, sizeof(F) * _epsa.size());
|
cuMemAlloc(&epsa, sizeof(F) * _epsa.size());
|
||||||
|
|
||||||
cuMemcpy(Tai, (DataPtr<F>)_Tai.data(), sizeof(F) * _Tai.size());
|
cuMemcpyHtoD(Tai, (void*)_Tai.data(), sizeof(F) * _Tai.size());
|
||||||
cuMemcpy(epsi,(DataPtr<F>)_epsi.data(), sizeof(F) * _epsi.size());
|
cuMemcpyHtoD(epsi,(void*)_epsi.data(), sizeof(F) * _epsi.size());
|
||||||
cuMemcpy(epsa, (DataPtr<F>)_epsa.data(), sizeof(F) * _epsa.size());
|
cuMemcpyHtoD(epsa, (void*)_epsa.data(), sizeof(F) * _epsa.size());
|
||||||
|
|
||||||
DataPtr<F> Tijk, Zijk;
|
DataPtr<F> Tijk, Zijk;
|
||||||
cuMemAlloc(&Tijk, sizeof(F) * No * No * No);
|
cuMemAlloc(&Tijk, sizeof(F) * No * No * No);
|
||||||
@ -3930,27 +4020,27 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
WITH_CHRONO("doubles",
|
WITH_CHRONO("doubles",
|
||||||
doublesContribution<F>( abc, (size_t)No, (size_t)Nv
|
doublesContribution<F>( abc, (size_t)No, (size_t)Nv
|
||||||
// -- VABCI
|
// -- VABCI
|
||||||
, (F*)abph.unwrapSlice(Slice<F>::AB, abc)
|
, abph.unwrapSlice(Slice<F>::AB, abc)
|
||||||
, (F*)abph.unwrapSlice(Slice<F>::AC, abc)
|
, abph.unwrapSlice(Slice<F>::AC, abc)
|
||||||
, (F*)abph.unwrapSlice(Slice<F>::BC, abc)
|
, abph.unwrapSlice(Slice<F>::BC, abc)
|
||||||
, (F*)abph.unwrapSlice(Slice<F>::BA, abc)
|
, abph.unwrapSlice(Slice<F>::BA, abc)
|
||||||
, (F*)abph.unwrapSlice(Slice<F>::CA, abc)
|
, abph.unwrapSlice(Slice<F>::CA, abc)
|
||||||
, (F*)abph.unwrapSlice(Slice<F>::CB, abc)
|
, abph.unwrapSlice(Slice<F>::CB, abc)
|
||||||
// -- VHHHA
|
// -- VHHHA
|
||||||
, (F*)hhha.unwrapSlice(Slice<F>::A, abc)
|
, hhha.unwrapSlice(Slice<F>::A, abc)
|
||||||
, (F*)hhha.unwrapSlice(Slice<F>::B, abc)
|
, hhha.unwrapSlice(Slice<F>::B, abc)
|
||||||
, (F*)hhha.unwrapSlice(Slice<F>::C, abc)
|
, hhha.unwrapSlice(Slice<F>::C, abc)
|
||||||
// -- TA
|
// -- TA
|
||||||
, (F*)taphh.unwrapSlice(Slice<F>::A, abc)
|
, taphh.unwrapSlice(Slice<F>::A, abc)
|
||||||
, (F*)taphh.unwrapSlice(Slice<F>::B, abc)
|
, taphh.unwrapSlice(Slice<F>::B, abc)
|
||||||
, (F*)taphh.unwrapSlice(Slice<F>::C, abc)
|
, taphh.unwrapSlice(Slice<F>::C, abc)
|
||||||
// -- TABIJ
|
// -- TABIJ
|
||||||
, (F*)tabhh.unwrapSlice(Slice<F>::AB, abc)
|
, tabhh.unwrapSlice(Slice<F>::AB, abc)
|
||||||
, (F*)tabhh.unwrapSlice(Slice<F>::AC, abc)
|
, tabhh.unwrapSlice(Slice<F>::AC, abc)
|
||||||
, (F*)tabhh.unwrapSlice(Slice<F>::BC, abc)
|
, tabhh.unwrapSlice(Slice<F>::BC, abc)
|
||||||
// -- TIJK
|
// -- TIJK
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
, (F*)Tijk
|
, (DataFieldType<F>*)Tijk
|
||||||
#else
|
#else
|
||||||
, Tijk.data()
|
, Tijk.data()
|
||||||
#endif
|
#endif
|
||||||
@ -4423,6 +4513,10 @@ namespace atrip {
|
|||||||
|
|
||||||
template <typename F> F maybeConjugate(const F);
|
template <typename F> F maybeConjugate(const F);
|
||||||
|
|
||||||
|
#if defined(HAVE_CUDA)
|
||||||
|
void operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz);
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace traits {
|
namespace traits {
|
||||||
|
|
||||||
template <typename FF> bool isComplex();
|
template <typename FF> bool isComplex();
|
||||||
@ -4445,6 +4539,13 @@ namespace atrip {
|
|||||||
template <> double maybeConjugate(const double a) { return a; }
|
template <> double maybeConjugate(const double a) { return a; }
|
||||||
template <> Complex maybeConjugate(const Complex a) { return std::conj(a); }
|
template <> Complex maybeConjugate(const Complex a) { return std::conj(a); }
|
||||||
|
|
||||||
|
#if defined(HAVE_CUDA)
|
||||||
|
void operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz) {
|
||||||
|
lz.x += rz.x;
|
||||||
|
lz.y += rz.y;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
namespace traits {
|
namespace traits {
|
||||||
template <typename F> bool isComplex() { return false; }
|
template <typename F> bool isComplex() { return false; }
|
||||||
|
|||||||
@ -7,19 +7,19 @@
|
|||||||
|
|
||||||
let
|
let
|
||||||
|
|
||||||
mkl = import ./etc/nix/mkl.nix { pkgs = (import <nixpkgs> {
|
unfree-pkgs = import <nixpkgs> {
|
||||||
config.allowUnfree = true;
|
config.allowUnfree = true;
|
||||||
}); };
|
};
|
||||||
|
|
||||||
openblas = import ./etc/nix/openblas.nix { inherit pkgs; };
|
openblas = import ./etc/nix/openblas.nix { inherit pkgs; };
|
||||||
|
|
||||||
cuda-pkg = if cuda then (import ./cuda.nix { inherit pkgs; }) else {};
|
mkl = import ./etc/nix/mkl.nix { pkgs = unfree-pkgs; };
|
||||||
|
cuda-pkg = if cuda then (import ./cuda.nix { pkgs = unfree-pkgs; }) else {};
|
||||||
|
|
||||||
in
|
in
|
||||||
|
|
||||||
pkgs.mkShell rec {
|
pkgs.mkShell rec {
|
||||||
|
|
||||||
|
|
||||||
compiler-pkg
|
compiler-pkg
|
||||||
= if compiler == "gcc11" then pkgs.gcc11
|
= if compiler == "gcc11" then pkgs.gcc11
|
||||||
else if compiler == "gcc10" then pkgs.gcc10
|
else if compiler == "gcc10" then pkgs.gcc10
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user