From 77e1aaabeb73c101335e3eba5f7d8e137f49fc13 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Thu, 5 Jan 2023 00:06:37 +0100 Subject: [PATCH] Add bureaucracy for openacc in autotools --- .github/workflows/main.yml | 2 + configure.ac | 10 ++++- etc/env/raven/cuda-openacc | 53 +++++++++++++++++++++++++ etc/m4/atrip_openacc.m4 | 70 +++++++++++++++++++++++++++++++++ etc/m4/ax_cxx_compile_stdcxx.m4 | 18 +++++---- 5 files changed, 144 insertions(+), 9 deletions(-) create mode 100644 etc/env/raven/cuda-openacc create mode 100644 etc/m4/atrip_openacc.m4 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d214457..64c89a3 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,6 +18,8 @@ jobs: strategy: matrix: compiler: + - gcc12 + - gcc11 - gcc11 - gcc10 - gcc9 diff --git a/configure.ac b/configure.ac index eb75fbe..827d0a7 100644 --- a/configure.ac +++ b/configure.ac @@ -48,8 +48,9 @@ AM_CONDITIONAL([WITH_CLANG_CHECK], [test x${clang_check} = xYES]) AC_ARG_ENABLE([cuda], [AS_HELP_STRING([--enable-cuda], [Build with cuda])], - [WITH_CUDA=yes], - [WITH_CUDA=no]) + [WITH_CUDA=yes + WITH_OPENACC=yes], + [WITH_CUDA=no]) AC_ARG_VAR([NVCC], [Path to the nvidia cuda compiler.]) AC_ARG_VAR([CUDA_LDFLAGS], [LDFLAGS to find libraries -lcuda, -lcudart, -lcublas.]) AC_ARG_VAR([CUDA_CXXFLAGS], [CXXFLAGS to find the CUDA headers]) @@ -176,6 +177,11 @@ fi dnl CUDA NVIDIA ----------------------------------------------------------- AM_CONDITIONAL([WITH_CUDA], [test x${WITH_CUDA} = xyes]) +AM_CONDITIONAL([WITH_OPENACC], [test x${WITH_OPENACC} = xyes]) +if test x${WITH_OPENACC} = xyes; then +ATRIP_OPENACC([CXXFLAGS="${CXXFLAGS} -fopenacc"], + [AC_MSG_ERROR([I can't use -fopenacc, aborting])]) +fi if test x${WITH_CUDA} = xyes; then AC_MSG_RESULT([ CUDA SUPPORT IS ENABLED diff --git a/etc/env/raven/cuda-openacc b/etc/env/raven/cuda-openacc new file mode 100644 index 0000000..6fc5308 --- /dev/null +++ b/etc/env/raven/cuda-openacc @@ -0,0 +1,53 @@ +mods=( + cuda/11.6 + gcc/12 + openmpi + mkl/2020.4 + autoconf/2.69 + automake/1.15 + libtool/2.4.6 +) + + +module purge +module load ${mods[@]} +LIB_PATH="${CUDA_HOME}/lib64" +export CUDA_ROOT=${CUDA_HOME} +export CUDA_LDFLAGS="-L${LIB_PATH} -lcuda -L${LIB_PATH} -lcudart -L${LIB_PATH} -lcublas" +export CUDA_CXXFLAGS="-I${CUDA_HOME}/include" + +export LD_LIBRARY_PATH="${MKL_HOME}/lib/intel64:${LD_LIBRARY_PATH}" + +ls ${LIB_PATH}/libcublas.so +ls ${LIB_PATH}/libcudart.so + +cat < +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +AC_DEFUN([ATRIP_OPENACC], +[ +AC_MSG_CHECKING([that the compiler works with the -fopenacc]) +AC_COMPILE_IFELSE([AC_LANG_SOURCE([_ATRIP_OPENACC_SOURCE])], + [ + $1 + AC_MSG_RESULT([yes]) + ], + [ + $2 + AC_MSG_ERROR([no]) + ]) +])dnl DEFUN + +m4_define([_ATRIP_OPENACC_SOURCE], [[ +#include +#include +#include + +#define SIZE 10 + +int main(int argc, char **argv) { + float matrix[SIZE * SIZE]; + float result[SIZE * SIZE]; + + // Initialize the matrix with random values + for (int i = 0; i < SIZE * SIZE; i++) { + matrix[i] = rand() / (float)RAND_MAX; + } + +#pragma acc data \ + copy(matrix[0:SIZE * SIZE]) \ + copyout(result[0:SIZE * SIZE]) + { + // Calculate the matrix multiplication +#pragma acc parallel loop collapse(2) + for (int i = 0; i < SIZE; i++) { + for (int j = 0; j < SIZE; j++) { + float sum = 0.0f; + for (int k = 0; k < SIZE; k++) { + sum += matrix[i * SIZE + k] * matrix[j * SIZE + k]; + } + result[i * SIZE + j] = sum; + } + } + } + return 0; +} +]]) diff --git a/etc/m4/ax_cxx_compile_stdcxx.m4 b/etc/m4/ax_cxx_compile_stdcxx.m4 index 51a3505..a3d964c 100644 --- a/etc/m4/ax_cxx_compile_stdcxx.m4 +++ b/etc/m4/ax_cxx_compile_stdcxx.m4 @@ -43,7 +43,7 @@ # and this notice are preserved. This file is offered as-is, without any # warranty. -#serial 14 +#serial 15 dnl This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro dnl (serial version number 13). @@ -189,7 +189,11 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 201103L +// MSVC always sets __cplusplus to 199711L in older versions; newer versions +// only set it correctly if /Zc:__cplusplus is specified as well as a +// /std:c++NN switch: +// https://devblogs.microsoft.com/cppblog/msvc-now-correctly-reports-__cplusplus/ +#elif __cplusplus < 201103L && !defined _MSC_VER #error "This is not a C++11 compiler" @@ -480,7 +484,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 201402L +#elif __cplusplus < 201402L && !defined _MSC_VER #error "This is not a C++14 compiler" @@ -604,7 +608,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 201703L +#elif __cplusplus < 201703L && !defined _MSC_VER #error "This is not a C++17 compiler" @@ -970,7 +974,7 @@ namespace cxx17 } // namespace cxx17 -#endif // __cplusplus < 201703L +#endif // __cplusplus < 201703L && !defined _MSC_VER ]]) @@ -983,7 +987,7 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_20], [[ #error "This is not a C++ compiler" -#elif __cplusplus < 202002L +#elif __cplusplus < 202002L && !defined _MSC_VER #error "This is not a C++20 compiler" @@ -1000,6 +1004,6 @@ namespace cxx20 } // namespace cxx20 -#endif // __cplusplus < 202002L +#endif // __cplusplus < 202002L && !defined _MSC_VER ]])