Fix AniaBug #1: cublasCreate after context setting

This commit is contained in:
Gallo Alejandro 2022-09-12 19:17:52 +02:00
parent 23ad87214f
commit 5483325626

View File

@ -24,7 +24,7 @@
using namespace atrip; using namespace atrip;
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
#include <cuda.h> #include <atrip/CUDA.hpp>
#endif #endif
template <typename F> bool RankMap<F>::RANK_ROUND_ROBIN; template <typename F> bool RankMap<F>::RANK_ROUND_ROBIN;
@ -49,11 +49,6 @@ void Atrip::init(MPI_Comm world) {
Atrip::communicator = world; Atrip::communicator = world;
MPI_Comm_rank(world, (int*)&Atrip::rank); MPI_Comm_rank(world, (int*)&Atrip::rank);
MPI_Comm_size(world, (int*)&Atrip::np); MPI_Comm_size(world, (int*)&Atrip::np);
#if defined(HAVE_CUDA)
Atrip::cuda.status = cublasCreate(&Atrip::cuda.handle);
#endif
} }
template <typename F> template <typename F>
@ -71,18 +66,24 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
int ngcards; int ngcards;
_CHECK_CUDA_SUCCESS("initializing cuda",
cuInit(0));
_CHECK_CUDA_SUCCESS("getting device count",
cuDeviceGetCount(&ngcards));
const auto clusterInfo = getClusterInfo(Atrip::communicator); const auto clusterInfo = getClusterInfo(Atrip::communicator);
cuDeviceGetCount(&ngcards);
LOG(0,"Atrip") << "ngcards: " << ngcards << "\n"; LOG(0,"Atrip") << "ngcards: " << ngcards << "\n";
if (clusterInfo.ranksPerNode > ngcards) { if (clusterInfo.ranksPerNode > ngcards) {
std::cerr << "ATRIP: You are running on more ranks per node than the number of graphic cards\n" const auto msg
<< "You have " << ngcards << " cards at your disposal\n"; = _FORMAT("ATRIP: You are running on more ranks per node than the number of graphic cards\n"
throw ""; "You have %d cards at your disposal\n", ngcards);
} std::cerr << msg;
if (clusterInfo.ranksPerNode < ngcards) { throw msg;
std::cerr << "You have " << ngcards << " cards at your disposal\n" } else if (clusterInfo.ranksPerNode < ngcards) {
<< "You will be only using " << clusterInfo.ranksPerNode const auto msg
<< ", i.e., the nubmer of ranks.\n"; = _FORMAT("You have %d cards at your disposal.\n"
"You will be only using %d, i.e, the number of ranks\n",
ngcards, clusterInfo.ranksPerNode);
std::cerr << msg;
} }
@ -94,16 +95,27 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
struct { struct { size_t free, total; } avail; size_t total; } memory; struct { struct { size_t free, total; } avail; size_t total; } memory;
char *name = (char*)malloc(256); char *name = (char*)malloc(256);
// - TODO :: we should check that the Zuweisung of graphic cards
// to nodes works as expected, i.e., node k should get from 0
// to ngcards with the formula =rank % ngcards=.
// set current device // set current device
cuDeviceGet(&dev, rank); _CHECK_CUDA_SUCCESS("getting device for index <rank>",
cuCtxCreate(&ctx, 0, dev); cuDeviceGet(&dev, rank % ngcards));
cuCtxSetCurrent(ctx); _CHECK_CUDA_SUCCESS("creating a cuda context",
cuCtxCreate(&ctx, 0, dev));
_CHECK_CUDA_SUCCESS("setting the context",
cuCtxSetCurrent(ctx));
// get information of the device // get information of the device
cuDeviceGetProperties(&prop, dev); _CHECK_CUDA_SUCCESS("getting properties of current device",
cuMemGetInfo(&memory.avail.free, &memory.avail.total); cuDeviceGetProperties(&prop, dev));
cuDeviceGetName(name, 256, dev); _CHECK_CUDA_SUCCESS("getting memory information",
cuDeviceTotalMem(&memory.total, dev); cuMemGetInfo(&memory.avail.free, &memory.avail.total));
_CHECK_CUDA_SUCCESS("getting name",
cuDeviceGetName(name, 256, dev));
_CHECK_CUDA_SUCCESS("getting total memory",
cuDeviceTotalMem(&memory.total, dev));
printf("\n" printf("\n"
"CUDA CARD RANK %d\n" "CUDA CARD RANK %d\n"
@ -124,6 +136,10 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
memory.total / 1024.0 / 1024.0 / 1024.0 memory.total / 1024.0 / 1024.0 / 1024.0
); );
std::free((void*)name); std::free((void*)name);
_CHECK_CUBLAS_SUCCESS("creating a cublas handle",
cublasCreate(&Atrip::cuda.handle));
} }
MPI_Barrier(universe); MPI_Barrier(universe);
} }
@ -163,17 +179,27 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
DataPtr<F> Tai, epsi, epsa; DataPtr<F> Tai, epsi, epsa;
//TODO: free memory pointers in the end of the algorithm
cuMemAlloc(&Tai, sizeof(F) * _Tai.size());
cuMemAlloc(&epsi, sizeof(F) * _epsi.size());
cuMemAlloc(&epsa, sizeof(F) * _epsa.size());
cuMemcpyHtoD(Tai, (void*)_Tai.data(), sizeof(F) * _Tai.size()); // TODO: free memory pointers in the end of the algorithm
cuMemcpyHtoD(epsi,(void*)_epsi.data(), sizeof(F) * _epsi.size());
cuMemcpyHtoD(epsa, (void*)_epsa.data(), sizeof(F) * _epsa.size());
cuMemAlloc(&Tijk, sizeof(F) * No * No * No); _CHECK_CUDA_SUCCESS("Tai",
cuMemAlloc(&Zijk, sizeof(F) * No * No * No); cuMemAlloc(&Tai, sizeof(F) * _Tai.size()));
_CHECK_CUDA_SUCCESS("epsi",
cuMemAlloc(&epsi, sizeof(F) * _epsi.size()));
_CHECK_CUDA_SUCCESS("epsa",
cuMemAlloc(&epsa, sizeof(F) * _epsa.size()));
_CHECK_CUDA_SUCCESS("memcpy Tai",
cuMemcpyHtoD(Tai, (void*)_Tai.data(), sizeof(F) * _Tai.size()));
_CHECK_CUDA_SUCCESS("memcpy epsi",
cuMemcpyHtoD(epsi,(void*)_epsi.data(), sizeof(F) * _epsi.size()));
_CHECK_CUDA_SUCCESS("memcpy epsa",
cuMemcpyHtoD(epsa, (void*)_epsa.data(), sizeof(F) * _epsa.size()));
_CHECK_CUDA_SUCCESS("Tijk",
cuMemAlloc(&Tijk, sizeof(F) * No * No * No));
_CHECK_CUDA_SUCCESS("Zijk",
cuMemAlloc(&Zijk, sizeof(F) * No * No * No));
#else #else
std::vector<F> &Tai = _Tai, &epsi = _epsi, &epsa = _epsa; std::vector<F> &Tai = _Tai, &epsi = _epsi, &epsa = _epsa;
Zijk = (DataFieldType<F>*)malloc(No*No*No * sizeof(DataFieldType<F>)); Zijk = (DataFieldType<F>*)malloc(No*No*No * sizeof(DataFieldType<F>));
@ -266,8 +292,8 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
auto const isFakeTuple auto const isFakeTuple
= [&tuplesList, distribution](size_t const i) { = [&tuplesList, distribution](size_t const i) {
return distribution->tupleIsFake(tuplesList[i]); return distribution->tupleIsFake(tuplesList[i]);
}; };
using Database = typename Slice<F>::Database; using Database = typename Slice<F>::Database;