Improve cuda m4
This commit is contained in:
parent
f06cd7f562
commit
4101c89907
@ -17,7 +17,7 @@ int main(int argc, char** argv) {
|
||||
MPI_Init(&argc, &argv);
|
||||
|
||||
size_t checkpoint_it;
|
||||
int no(10), nv(10), itMod(-1), percentageMod(10);
|
||||
int no(10), nv(100), itMod(-1), percentageMod(10);
|
||||
float checkpoint_percentage;
|
||||
bool
|
||||
nochrono(false), barrier(false), rankRoundRobin(false),
|
||||
|
||||
@ -21,7 +21,7 @@ ac_save_LDFLAGS="$LDFLAGS"
|
||||
CXXFLAGS="${CXXFLAGS} $CUDA_CXXFLAGS"
|
||||
LDFLAGS="${LDFLAGS} $CUDA_LDFLAGS"
|
||||
|
||||
AC_MSG_CHECKING([that cublas works with $CXX])
|
||||
AC_MSG_CHECKING([that cublas works with the CXX compiler])
|
||||
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([_ATRIP_CUBLAS_SOURCE])],
|
||||
[
|
||||
@ -33,6 +33,16 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([_ATRIP_CUBLAS_SOURCE])],
|
||||
AC_MSG_ERROR([Does not work!])
|
||||
])
|
||||
|
||||
AC_MSG_CHECKING([that the detected graphic cards have memory available])
|
||||
AC_RUN_IFELSE([AC_LANG_SOURCE([_ATRIP_CUDA_MEMORY_OF_DEVICES])],
|
||||
[
|
||||
atrip_success=yes
|
||||
],
|
||||
[
|
||||
atrip_success=no
|
||||
AC_MSG_ERROR([An available device reports zero memory available!])
|
||||
])
|
||||
|
||||
CXX="$ac_save_CXX"
|
||||
CXXFLAGS="$ac_save_CXXFLAGS"
|
||||
LDFLAGS="$ac_save_LDFLAGS"
|
||||
@ -79,3 +89,61 @@ int main() {
|
||||
return 0;
|
||||
}
|
||||
]])
|
||||
|
||||
|
||||
m4_define([_ATRIP_CUDA_MEMORY_OF_DEVICES], [[
|
||||
#include <mpi.h>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <string.h>
|
||||
#include <cuda.h>
|
||||
|
||||
int main() {
|
||||
MPI_Init(NULL, NULL);
|
||||
int rank, np;
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &np);
|
||||
|
||||
int ngcards;
|
||||
cuDeviceGetCount(&ngcards);
|
||||
|
||||
for (size_t rank = 0; rank < ngcards; rank++) {
|
||||
CUcontext ctx;
|
||||
CUdevice dev;
|
||||
CUdevprop_st prop;
|
||||
size_t _free, total, total2;
|
||||
char *name = (char*)malloc(256);
|
||||
|
||||
cuDeviceGet(&dev, rank);
|
||||
cuCtxCreate(&ctx, 0, dev);
|
||||
cuCtxSetCurrent(ctx);
|
||||
|
||||
cuDeviceGetProperties(&prop, dev);
|
||||
cuMemGetInfo(&_free, &total);
|
||||
cuDeviceGetName(name, 256, dev);
|
||||
cuDeviceTotalMem(&total2, dev);
|
||||
|
||||
printf("\n"
|
||||
"CUDA CARD RANK %d\n"
|
||||
"=================\n"
|
||||
"\tname: %s\n"
|
||||
"\tShared Mem Per Block (KB): %f\n"
|
||||
"\tFree/Total mem (GB): %f/%f\n"
|
||||
"\ttotal2 mem (GB): %f\n"
|
||||
"\n",
|
||||
dev,
|
||||
name,
|
||||
prop.sharedMemPerBlock / 1024.0,
|
||||
_free / 1024.0 / 1024.0 / 1024.0 ,
|
||||
total / 1024.0 / 1024.0 / 1024.0 ,
|
||||
total2 / 1024.0 / 1024.0 / 1024.0
|
||||
);
|
||||
|
||||
if (_free == 0 || total == 0 || total2 == 0)
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
]])
|
||||
|
||||
Loading…
Reference in New Issue
Block a user