diff --git a/atrip.org b/atrip.org index 85f5186..f9cafb7 100644 --- a/atrip.org +++ b/atrip.org @@ -2916,6 +2916,9 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \ #+begin_src c++ :tangle (atrip-equations-h) +#if defined(HAVE_CUDA) + __device__ +#endif template void doublesContribution ( const ABCTuple &abc @@ -2950,6 +2953,9 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \ #+begin_src c++ :tangle (atrip-equations-cxx) +#if defined(HAVE_CUDA) + __device__ +#endif template void doublesContribution ( const ABCTuple &abc @@ -3225,6 +3231,9 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \ // instantiate templates +#if defined(HAVE_CUDA) + __device__ +#endif template void doublesContribution ( const ABCTuple &abc @@ -3253,6 +3262,9 @@ V^{{\color{blue}ab}}_{{\color{red}e}i} T^{{\color{blue}c}{\color{red}e}}_{ij} \ , DataFieldType* Tijk ); +#if defined(HAVE_CUDA) + __device__ +#endif template void doublesContribution ( const ABCTuple &abc @@ -4510,6 +4522,10 @@ using namespace atrip; #include #include +#include "config.h" +#if defined(HAVE_CUDA) +#include +#endif namespace atrip { @@ -4518,7 +4534,7 @@ namespace atrip { template F maybeConjugate(const F); #if defined(HAVE_CUDA) - void operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz); + cuDoubleComplex& operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz); #endif namespace traits { @@ -4544,9 +4560,31 @@ namespace atrip { template <> Complex maybeConjugate(const Complex a) { return std::conj(a); } #if defined(HAVE_CUDA) - void operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz) { + /* + __device__ + template <> double2 + maybeConjugate(const double2 a) { + return {a.x, -a.y}; + } + ,*/ + __device__ + template <> cuDoubleComplex + maybeConjugate(const cuDoubleComplex a) { + return {a.x, -a.y}; + } + /* + __device__ + double2& operator+=(double2& lz, double2 const& rz) { lz.x += rz.x; lz.y += rz.y; + return lz; + } + ,*/ + __device__ + cuDoubleComplex& operator+=(cuDoubleComplex& lz, cuDoubleComplex const& rz) { + lz.x += rz.x; + lz.y += rz.y; + return lz; } #endif diff --git a/bench/Makefile.am b/bench/Makefile.am index 39ce3c3..d6e591e 100644 --- a/bench/Makefile.am +++ b/bench/Makefile.am @@ -18,5 +18,5 @@ endif if WITH_CUDA test_main_CXXFLAGS = $(CUDA_CXXFLAGS) -test_main_LDADD += $(CUDA_LDFLAGS) +test_main_LDADD += $(CUDA_LDFLAGS) -lcublas -lcudadevrt -lthrust endif diff --git a/include/atrip/Atrip.hpp b/include/atrip/Atrip.hpp index ed2eaba..7a94208 100644 --- a/include/atrip/Atrip.hpp +++ b/include/atrip/Atrip.hpp @@ -18,9 +18,17 @@ #include #include #include +#include "config.h" +#if defined(HAVE_CUDA) +#include +#define CUBLASAPI +#include +#include +#endif #include +#include #define ADD_ATTRIBUTE(_type, _name, _default) \ _type _name = _default; \