Fix AniaBug #1: cublasCreate after context setting
This commit is contained in:
parent
23ad87214f
commit
5483325626
@ -24,7 +24,7 @@
|
|||||||
|
|
||||||
using namespace atrip;
|
using namespace atrip;
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
#include <cuda.h>
|
#include <atrip/CUDA.hpp>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <typename F> bool RankMap<F>::RANK_ROUND_ROBIN;
|
template <typename F> bool RankMap<F>::RANK_ROUND_ROBIN;
|
||||||
@ -49,11 +49,6 @@ void Atrip::init(MPI_Comm world) {
|
|||||||
Atrip::communicator = world;
|
Atrip::communicator = world;
|
||||||
MPI_Comm_rank(world, (int*)&Atrip::rank);
|
MPI_Comm_rank(world, (int*)&Atrip::rank);
|
||||||
MPI_Comm_size(world, (int*)&Atrip::np);
|
MPI_Comm_size(world, (int*)&Atrip::np);
|
||||||
|
|
||||||
#if defined(HAVE_CUDA)
|
|
||||||
Atrip::cuda.status = cublasCreate(&Atrip::cuda.handle);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
@ -71,18 +66,24 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
|
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
int ngcards;
|
int ngcards;
|
||||||
|
_CHECK_CUDA_SUCCESS("initializing cuda",
|
||||||
|
cuInit(0));
|
||||||
|
_CHECK_CUDA_SUCCESS("getting device count",
|
||||||
|
cuDeviceGetCount(&ngcards));
|
||||||
const auto clusterInfo = getClusterInfo(Atrip::communicator);
|
const auto clusterInfo = getClusterInfo(Atrip::communicator);
|
||||||
cuDeviceGetCount(&ngcards);
|
|
||||||
LOG(0,"Atrip") << "ngcards: " << ngcards << "\n";
|
LOG(0,"Atrip") << "ngcards: " << ngcards << "\n";
|
||||||
if (clusterInfo.ranksPerNode > ngcards) {
|
if (clusterInfo.ranksPerNode > ngcards) {
|
||||||
std::cerr << "ATRIP: You are running on more ranks per node than the number of graphic cards\n"
|
const auto msg
|
||||||
<< "You have " << ngcards << " cards at your disposal\n";
|
= _FORMAT("ATRIP: You are running on more ranks per node than the number of graphic cards\n"
|
||||||
throw "";
|
"You have %d cards at your disposal\n", ngcards);
|
||||||
}
|
std::cerr << msg;
|
||||||
if (clusterInfo.ranksPerNode < ngcards) {
|
throw msg;
|
||||||
std::cerr << "You have " << ngcards << " cards at your disposal\n"
|
} else if (clusterInfo.ranksPerNode < ngcards) {
|
||||||
<< "You will be only using " << clusterInfo.ranksPerNode
|
const auto msg
|
||||||
<< ", i.e., the nubmer of ranks.\n";
|
= _FORMAT("You have %d cards at your disposal.\n"
|
||||||
|
"You will be only using %d, i.e, the number of ranks\n",
|
||||||
|
ngcards, clusterInfo.ranksPerNode);
|
||||||
|
std::cerr << msg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -94,16 +95,27 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
struct { struct { size_t free, total; } avail; size_t total; } memory;
|
struct { struct { size_t free, total; } avail; size_t total; } memory;
|
||||||
char *name = (char*)malloc(256);
|
char *name = (char*)malloc(256);
|
||||||
|
|
||||||
|
// - TODO :: we should check that the Zuweisung of graphic cards
|
||||||
|
// to nodes works as expected, i.e., node k should get from 0
|
||||||
|
// to ngcards with the formula =rank % ngcards=.
|
||||||
|
|
||||||
// set current device
|
// set current device
|
||||||
cuDeviceGet(&dev, rank);
|
_CHECK_CUDA_SUCCESS("getting device for index <rank>",
|
||||||
cuCtxCreate(&ctx, 0, dev);
|
cuDeviceGet(&dev, rank % ngcards));
|
||||||
cuCtxSetCurrent(ctx);
|
_CHECK_CUDA_SUCCESS("creating a cuda context",
|
||||||
|
cuCtxCreate(&ctx, 0, dev));
|
||||||
|
_CHECK_CUDA_SUCCESS("setting the context",
|
||||||
|
cuCtxSetCurrent(ctx));
|
||||||
|
|
||||||
// get information of the device
|
// get information of the device
|
||||||
cuDeviceGetProperties(&prop, dev);
|
_CHECK_CUDA_SUCCESS("getting properties of current device",
|
||||||
cuMemGetInfo(&memory.avail.free, &memory.avail.total);
|
cuDeviceGetProperties(&prop, dev));
|
||||||
cuDeviceGetName(name, 256, dev);
|
_CHECK_CUDA_SUCCESS("getting memory information",
|
||||||
cuDeviceTotalMem(&memory.total, dev);
|
cuMemGetInfo(&memory.avail.free, &memory.avail.total));
|
||||||
|
_CHECK_CUDA_SUCCESS("getting name",
|
||||||
|
cuDeviceGetName(name, 256, dev));
|
||||||
|
_CHECK_CUDA_SUCCESS("getting total memory",
|
||||||
|
cuDeviceTotalMem(&memory.total, dev));
|
||||||
|
|
||||||
printf("\n"
|
printf("\n"
|
||||||
"CUDA CARD RANK %d\n"
|
"CUDA CARD RANK %d\n"
|
||||||
@ -124,6 +136,10 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
memory.total / 1024.0 / 1024.0 / 1024.0
|
memory.total / 1024.0 / 1024.0 / 1024.0
|
||||||
);
|
);
|
||||||
std::free((void*)name);
|
std::free((void*)name);
|
||||||
|
|
||||||
|
_CHECK_CUBLAS_SUCCESS("creating a cublas handle",
|
||||||
|
cublasCreate(&Atrip::cuda.handle));
|
||||||
|
|
||||||
}
|
}
|
||||||
MPI_Barrier(universe);
|
MPI_Barrier(universe);
|
||||||
}
|
}
|
||||||
@ -163,17 +179,27 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
|
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
DataPtr<F> Tai, epsi, epsa;
|
DataPtr<F> Tai, epsi, epsa;
|
||||||
//TODO: free memory pointers in the end of the algorithm
|
|
||||||
cuMemAlloc(&Tai, sizeof(F) * _Tai.size());
|
|
||||||
cuMemAlloc(&epsi, sizeof(F) * _epsi.size());
|
|
||||||
cuMemAlloc(&epsa, sizeof(F) * _epsa.size());
|
|
||||||
|
|
||||||
cuMemcpyHtoD(Tai, (void*)_Tai.data(), sizeof(F) * _Tai.size());
|
// TODO: free memory pointers in the end of the algorithm
|
||||||
cuMemcpyHtoD(epsi,(void*)_epsi.data(), sizeof(F) * _epsi.size());
|
|
||||||
cuMemcpyHtoD(epsa, (void*)_epsa.data(), sizeof(F) * _epsa.size());
|
|
||||||
|
|
||||||
cuMemAlloc(&Tijk, sizeof(F) * No * No * No);
|
_CHECK_CUDA_SUCCESS("Tai",
|
||||||
cuMemAlloc(&Zijk, sizeof(F) * No * No * No);
|
cuMemAlloc(&Tai, sizeof(F) * _Tai.size()));
|
||||||
|
_CHECK_CUDA_SUCCESS("epsi",
|
||||||
|
cuMemAlloc(&epsi, sizeof(F) * _epsi.size()));
|
||||||
|
_CHECK_CUDA_SUCCESS("epsa",
|
||||||
|
cuMemAlloc(&epsa, sizeof(F) * _epsa.size()));
|
||||||
|
|
||||||
|
_CHECK_CUDA_SUCCESS("memcpy Tai",
|
||||||
|
cuMemcpyHtoD(Tai, (void*)_Tai.data(), sizeof(F) * _Tai.size()));
|
||||||
|
_CHECK_CUDA_SUCCESS("memcpy epsi",
|
||||||
|
cuMemcpyHtoD(epsi,(void*)_epsi.data(), sizeof(F) * _epsi.size()));
|
||||||
|
_CHECK_CUDA_SUCCESS("memcpy epsa",
|
||||||
|
cuMemcpyHtoD(epsa, (void*)_epsa.data(), sizeof(F) * _epsa.size()));
|
||||||
|
|
||||||
|
_CHECK_CUDA_SUCCESS("Tijk",
|
||||||
|
cuMemAlloc(&Tijk, sizeof(F) * No * No * No));
|
||||||
|
_CHECK_CUDA_SUCCESS("Zijk",
|
||||||
|
cuMemAlloc(&Zijk, sizeof(F) * No * No * No));
|
||||||
#else
|
#else
|
||||||
std::vector<F> &Tai = _Tai, &epsi = _epsi, &epsa = _epsa;
|
std::vector<F> &Tai = _Tai, &epsi = _epsi, &epsa = _epsa;
|
||||||
Zijk = (DataFieldType<F>*)malloc(No*No*No * sizeof(DataFieldType<F>));
|
Zijk = (DataFieldType<F>*)malloc(No*No*No * sizeof(DataFieldType<F>));
|
||||||
@ -266,8 +292,8 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
|
|
||||||
auto const isFakeTuple
|
auto const isFakeTuple
|
||||||
= [&tuplesList, distribution](size_t const i) {
|
= [&tuplesList, distribution](size_t const i) {
|
||||||
return distribution->tupleIsFake(tuplesList[i]);
|
return distribution->tupleIsFake(tuplesList[i]);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
using Database = typename Slice<F>::Database;
|
using Database = typename Slice<F>::Database;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user