atrip/include/atrip/Operations.hpp

// Copyright 2022 Alejandro Gallo
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef OPERATIONS_HPP_
#define OPERATIONS_HPP_

#include <atrip/CUDA.hpp>
#include <atrip/Types.hpp>
#include <atrip/Complex.hpp>

namespace atrip {
namespace acc {

  // cuda kernels

  template <typename F>
  __MAYBE_GLOBAL__
  void zeroing(F* a, size_t n) {
    F zero = {0};
    for (size_t i = 0; i < n; i++) {
      a[i] = zero;
    }
  }

  ////
  template <typename F>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  F maybeConjugateScalar(const F &a) { return a; }

#if defined(HAVE_CUDA)
  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  cuDoubleComplex maybeConjugateScalar(const cuDoubleComplex &a) {
    return {a.x, -a.y};
  }
#endif /*  defined(HAVE_CUDA) */

  template <typename F>
  __MAYBE_GLOBAL__
  void maybeConjugate(F* to, F* from, size_t n) {
    for (size_t i = 0; i < n; ++i) {
      to[i] = maybeConjugateScalar<F>(from[i]);
    }
  }


  template <typename F>
  __MAYBE_DEVICE__ __MAYBE_HOST__
  void reorder(F* to, F* from, size_t size, size_t I, size_t J, size_t K) {
    size_t idx = 0;
    const size_t IDX = I + J*size + K*size*size;
    for (size_t k = 0; k < size; k++)
    for (size_t j = 0; j < size; j++)
    for (size_t i = 0; i < size; i++, idx++)
      to[idx] += from[IDX];
  }

  // Multiplication operation
  //////////////////////////////////////////////////////////////////////////////

  template <typename F>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  F prod(const F &a, const F &b) { return a * b; }

#if defined(HAVE_CUDA)
  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  cuDoubleComplex prod(const cuDoubleComplex &a, const cuDoubleComplex &b) {
    return cuCmul(a, b);
  }
#endif /*  defined(HAVE_CUDA) */

  // Division operation
  //////////////////////////////////////////////////////////////////////////////

  template <typename F>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  F div(const F &a, const F &b) { return a / b; }

#if defined(HAVE_CUDA)
  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  cuDoubleComplex div(const cuDoubleComplex &a, const cuDoubleComplex &b) {
    return cuCdiv(a, b);
  }
#endif /*  defined(HAVE_CUDA) */

  // Real part
  //////////////////////////////////////////////////////////////////////////////

  template <typename F>
  __MAYBE_HOST__ __INLINE__
  double real(F &a) { return std::real(a); }

  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  double real(double &a) {
    return a;
  }

#if defined(HAVE_CUDA)
  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  double real(cuDoubleComplex &a) {
    return cuCreal(a);
  }
#endif /*  defined(HAVE_CUDA) */

  // Substraction operator
  //////////////////////////////////////////////////////////////////////////////

  template <typename F>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  F sub(const F &a, const F &b) { return a - b; }

#if defined(HAVE_CUDA)
  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  cuDoubleComplex sub(const cuDoubleComplex &a,
                               const cuDoubleComplex &b) {
    return cuCsub(a, b);
  }
#endif /*  defined(HAVE_CUDA) */

  // Addition operator
  //////////////////////////////////////////////////////////////////////////////

  template <typename F>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  F add(const F &a, const F &b) { return a + b; }

#if defined(HAVE_CUDA)
  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__
  cuDoubleComplex add(const cuDoubleComplex &a, const cuDoubleComplex &b) {
    return cuCadd(a, b);
  }
#endif /*  defined(HAVE_CUDA) */

  // Sum in place operator
  //////////////////////////////////////////////////////////////////////////////

  template <typename F>
  __MAYBE_DEVICE__ __MAYBE_HOST__
  void sum_in_place(F* to, const F* from) { *to += *from; }

#if defined(HAVE_CUDA)
  template <>
  __MAYBE_DEVICE__ __MAYBE_HOST__
  void sum_in_place(cuDoubleComplex* to, const cuDoubleComplex* from) {
    to->x += from->x;
    to->y += from->y;
  }
#endif /*  defined(HAVE_CUDA) */


}  // namespace acc
}  // namespace atrip

#endif