From 4101c899071c5184ff756c6d0b866511e33d1250 Mon Sep 17 00:00:00 2001 From: Gallo Alejandro Date: Thu, 11 Aug 2022 13:55:52 +0200 Subject: [PATCH] Improve cuda m4 --- bench/test_main.cxx | 2 +- etc/m4/atrip_cublas.m4 | 70 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/bench/test_main.cxx b/bench/test_main.cxx index 63786ac..abc2a61 100644 --- a/bench/test_main.cxx +++ b/bench/test_main.cxx @@ -17,7 +17,7 @@ int main(int argc, char** argv) { MPI_Init(&argc, &argv); size_t checkpoint_it; - int no(10), nv(10), itMod(-1), percentageMod(10); + int no(10), nv(100), itMod(-1), percentageMod(10); float checkpoint_percentage; bool nochrono(false), barrier(false), rankRoundRobin(false), diff --git a/etc/m4/atrip_cublas.m4 b/etc/m4/atrip_cublas.m4 index cfc182c..ab250e5 100644 --- a/etc/m4/atrip_cublas.m4 +++ b/etc/m4/atrip_cublas.m4 @@ -21,7 +21,7 @@ ac_save_LDFLAGS="$LDFLAGS" CXXFLAGS="${CXXFLAGS} $CUDA_CXXFLAGS" LDFLAGS="${LDFLAGS} $CUDA_LDFLAGS" -AC_MSG_CHECKING([that cublas works with $CXX]) +AC_MSG_CHECKING([that cublas works with the CXX compiler]) AC_COMPILE_IFELSE([AC_LANG_SOURCE([_ATRIP_CUBLAS_SOURCE])], [ @@ -33,6 +33,16 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([_ATRIP_CUBLAS_SOURCE])], AC_MSG_ERROR([Does not work!]) ]) +AC_MSG_CHECKING([that the detected graphic cards have memory available]) +AC_RUN_IFELSE([AC_LANG_SOURCE([_ATRIP_CUDA_MEMORY_OF_DEVICES])], + [ + atrip_success=yes + ], + [ + atrip_success=no + AC_MSG_ERROR([An available device reports zero memory available!]) + ]) + CXX="$ac_save_CXX" CXXFLAGS="$ac_save_CXXFLAGS" LDFLAGS="$ac_save_LDFLAGS" @@ -79,3 +89,61 @@ int main() { return 0; } ]]) + + +m4_define([_ATRIP_CUDA_MEMORY_OF_DEVICES], [[ +#include +#include +#include +#include +#include + +int main() { + MPI_Init(NULL, NULL); + int rank, np; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &np); + + int ngcards; + cuDeviceGetCount(&ngcards); + + for (size_t rank = 0; rank < ngcards; rank++) { + CUcontext ctx; + CUdevice dev; + CUdevprop_st prop; + size_t _free, total, total2; + char *name = (char*)malloc(256); + + cuDeviceGet(&dev, rank); + cuCtxCreate(&ctx, 0, dev); + cuCtxSetCurrent(ctx); + + cuDeviceGetProperties(&prop, dev); + cuMemGetInfo(&_free, &total); + cuDeviceGetName(name, 256, dev); + cuDeviceTotalMem(&total2, dev); + + printf("\n" + "CUDA CARD RANK %d\n" + "=================\n" + "\tname: %s\n" + "\tShared Mem Per Block (KB): %f\n" + "\tFree/Total mem (GB): %f/%f\n" + "\ttotal2 mem (GB): %f\n" + "\n", + dev, + name, + prop.sharedMemPerBlock / 1024.0, + _free / 1024.0 / 1024.0 / 1024.0 , + total / 1024.0 / 1024.0 / 1024.0 , + total2 / 1024.0 / 1024.0 / 1024.0 + ); + + if (_free == 0 || total == 0 || total2 == 0) + return 1; + + } + + return 0; +} +]])