Initial compiling implementation of the energy kernel
This commit is contained in:
@@ -11,11 +11,22 @@
|
||||
#if defined(HAVE_CUDA) && defined(__CUDACC__)
|
||||
# define __MAYBE_GLOBAL__ __global__
|
||||
# define __MAYBE_DEVICE__ __device__
|
||||
# define __MAYBE_HOST__ __host__
|
||||
# define __INLINE__ __inline__
|
||||
#else
|
||||
# define __MAYBE_GLOBAL__
|
||||
# define __MAYBE_DEVICE__
|
||||
# define __MAYBE_HOST__
|
||||
# define __INLINE__ inline
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
#define ACC_FUNCALL(fname, i, j, ...) fname<<<(i), (j)>>>(__VA_ARGS__)
|
||||
#else
|
||||
#define ACC_FUNCALL(fname, i, j, ...) fname(__VA_ARGS__)
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
|
||||
#define _CHECK_CUDA_SUCCESS(message, ...) \
|
||||
do { \
|
||||
CUresult result = __VA_ARGS__; \
|
||||
|
||||
@@ -23,6 +23,8 @@
|
||||
#include<thrust/device_vector.h>
|
||||
#endif
|
||||
|
||||
#include<atrip/CUDA.hpp>
|
||||
|
||||
|
||||
namespace atrip {
|
||||
using ABCTuple = std::array<size_t, 3>;
|
||||
@@ -32,21 +34,25 @@ using ABCTuples = std::vector<ABCTuple>;
|
||||
|
||||
// [[file:~/cuda/atrip/atrip.org::*Energy][Energy:1]]
|
||||
template <typename F=double>
|
||||
double getEnergyDistinct
|
||||
__MAYBE_GLOBAL__
|
||||
void getEnergyDistinct
|
||||
( F const epsabc
|
||||
, size_t const No
|
||||
, F* const epsi
|
||||
, F* const Tijk
|
||||
, F* const Zijk
|
||||
, double* energy
|
||||
);
|
||||
|
||||
template <typename F=double>
|
||||
double getEnergySame
|
||||
__MAYBE_GLOBAL__
|
||||
void getEnergySame
|
||||
( F const epsabc
|
||||
, size_t const No
|
||||
, F* const epsi
|
||||
, F* const Tijk
|
||||
, F* const Zijk
|
||||
, double* energy
|
||||
);
|
||||
// Energy:1 ends here
|
||||
|
||||
|
||||
171
include/atrip/Operations.hpp
Normal file
171
include/atrip/Operations.hpp
Normal file
@@ -0,0 +1,171 @@
|
||||
// Copyright 2022 Alejandro Gallo
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifndef OPERATIONS_HPP_
|
||||
#define OPERATIONS_HPP_
|
||||
|
||||
#include <atrip/CUDA.hpp>
|
||||
#include <atrip/Types.hpp>
|
||||
#include <atrip/Complex.hpp>
|
||||
|
||||
namespace atrip {
|
||||
namespace acc {
|
||||
|
||||
// cuda kernels
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_GLOBAL__
|
||||
void zeroing(F* a, size_t n) {
|
||||
F zero = {0};
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
a[i] = zero;
|
||||
}
|
||||
}
|
||||
|
||||
////
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
F maybeConjugateScalar(const F &a) { return a; }
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
cuDoubleComplex maybeConjugateScalar(const cuDoubleComplex &a) {
|
||||
return {a.x, -a.y};
|
||||
}
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__
|
||||
void maybeConjugate(F* to, F* from, size_t n) {
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
to[i] = maybeConjugateScalar<F>(from[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__
|
||||
void reorder(F* to, F* from, size_t size, size_t I, size_t J, size_t K) {
|
||||
size_t idx = 0;
|
||||
const size_t IDX = I + J*size + K*size*size;
|
||||
for (size_t k = 0; k < size; k++)
|
||||
for (size_t j = 0; j < size; j++)
|
||||
for (size_t i = 0; i < size; i++, idx++)
|
||||
to[idx] += from[IDX];
|
||||
}
|
||||
|
||||
// Multiplication operation
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
F prod(const F &a, const F &b) { return a * b; }
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
cuDoubleComplex prod(const cuDoubleComplex &a, const cuDoubleComplex &b) {
|
||||
return cuCmul(a, b);
|
||||
}
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
// Division operation
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
F div(const F &a, const F &b) { return a / b; }
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
cuDoubleComplex div(const cuDoubleComplex &a, const cuDoubleComplex &b) {
|
||||
return cuCdiv(a, b);
|
||||
}
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
// Real part
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_HOST__ __INLINE__
|
||||
double real(F &a) { return std::real(a); }
|
||||
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
double real(double &a) {
|
||||
return a;
|
||||
}
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
double real(cuDoubleComplex &a) {
|
||||
return cuCreal(a);
|
||||
}
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
// Substraction operator
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
F sub(const F &a, const F &b) { return a - b; }
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
cuDoubleComplex sub(const cuDoubleComplex &a,
|
||||
const cuDoubleComplex &b) {
|
||||
return cuCsub(a, b);
|
||||
}
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
// Addition operator
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
F add(const F &a, const F &b) { return a + b; }
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
|
||||
cuDoubleComplex add(const cuDoubleComplex &a, const cuDoubleComplex &b) {
|
||||
return cuCadd(a, b);
|
||||
}
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
// Sum in place operator
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename F>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__
|
||||
void sum_in_place(F* to, const F* from) { *to += *from; }
|
||||
|
||||
#if defined(HAVE_CUDA)
|
||||
template <>
|
||||
__MAYBE_DEVICE__ __MAYBE_HOST__
|
||||
void sum_in_place(cuDoubleComplex* to, const cuDoubleComplex* from) {
|
||||
to->x += from->x;
|
||||
to->y += from->y;
|
||||
}
|
||||
#endif /* defined(HAVE_CUDA) */
|
||||
|
||||
|
||||
} // namespace acc
|
||||
} // namespace atrip
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user