Update in SliceUnion ATRIP_SOURCES_IN_GPU

This commit is contained in:
Alejandro Gallo 2022-12-05 17:55:23 +01:00
parent 26e2f2d109
commit 658397ebd7

View File

@ -18,6 +18,12 @@
#include <atrip/Slice.hpp> #include <atrip/Slice.hpp>
#include <atrip/RankMap.hpp> #include <atrip/RankMap.hpp>
#if defined(ATRIP_SOURCES_IN_GPU)
# define SOURCES_DATA(s) (s)
#else
# define SOURCES_DATA(s) (s).data()
#endif
namespace atrip { namespace atrip {
// Prolog:1 ends here // Prolog:1 ends here
@ -195,7 +201,7 @@ template <typename F=double>
; ;
if (blank.info.state == Slice<F>::SelfSufficient) { if (blank.info.state == Slice<F>::SelfSufficient) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
const size_t _size = sizeof(F) * sources[from.source].size(); const size_t _size = sizeof(F) * sliceSize;
// TODO: this is code duplication with downstairs // TODO: this is code duplication with downstairs
if (freePointers.size() == 0) { if (freePointers.size() == 0) {
std::stringstream stream; std::stringstream stream;
@ -212,12 +218,12 @@ template <typename F=double>
WITH_CHRONO("cuda:memcpy:self-sufficient", WITH_CHRONO("cuda:memcpy:self-sufficient",
_CHECK_CUDA_SUCCESS("copying mpi data to device", _CHECK_CUDA_SUCCESS("copying mpi data to device",
cuMemcpyHtoD(blank.data, cuMemcpyHtoD(blank.data,
(void*)sources[from.source].data(), (void*)SOURCES_DATA(sources[from.source]),
sizeof(F) * sources[from.source].size())); sizeof(F) * sliceSize));
)) ))
#else #else
blank.data = sources[from.source].data(); blank.data = SOURCES_DATA(sources[from.source]);
#endif #endif
} else { } else {
if (freePointers.size() == 0) { if (freePointers.size() == 0) {
@ -396,15 +402,18 @@ template <typename F=double>
, world(child_world) , world(child_world)
, universe(global_world) , universe(global_world)
, sliceLength(sliceLength_) , sliceLength(sliceLength_)
, sources(rankMap.nSources(), , sliceSize(std::accumulate(sliceLength.begin(),
std::vector<F>
(std::accumulate(sliceLength.begin(),
sliceLength.end(), sliceLength.end(),
1UL, std::multiplies<size_t>()))) 1UL, std::multiplies<size_t>()))
#if defined(ATRIP_SOURCES_IN_GPU)
, sources(rankMap.nSources())
#else
, sources(rankMap.nSources(),
std::vector<F>(sliceSize))
#endif
, name(name_) , name(name_)
, sliceTypes(sliceTypes_) , sliceTypes(sliceTypes_)
, sliceBuffers(nSliceBuffers) , sliceBuffers(nSliceBuffers)
//, slices(2 * sliceTypes.size(), Slice<F>{ sources[0].size() })
{ // constructor begin { // constructor begin
LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n"; LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n";
@ -412,7 +421,7 @@ template <typename F=double>
for (auto& ptr: sliceBuffers) { for (auto& ptr: sliceBuffers) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
const CUresult error = const CUresult error =
cuMemAlloc(&ptr, sizeof(F) * sources[0].size()); cuMemAlloc(&ptr, sizeof(F) * sliceSize);
if (ptr == 0UL) { if (ptr == 0UL) {
throw "UNSUFICCIENT MEMORY ON THE GRAPHIC CARD FOR FREE POINTERS"; throw "UNSUFICCIENT MEMORY ON THE GRAPHIC CARD FOR FREE POINTERS";
} }
@ -423,12 +432,12 @@ template <typename F=double>
throw s.str(); throw s.str();
} }
#else #else
ptr = (DataPtr<F>)malloc(sizeof(F) * sources[0].size()); ptr = (DataPtr<F>)malloc(sizeof(F) * sliceSize);
#endif #endif
} }
slices slices
= std::vector<Slice<F>>(2 * sliceTypes.size(), { sources[0].size() }); = std::vector<Slice<F>>(2 * sliceTypes.size(), { sliceSize });
// TODO: think exactly ^------------------- about this number // TODO: think exactly ^------------------- about this number
// initialize the freePointers with the pointers to the buffers // initialize the freePointers with the pointers to the buffers
@ -441,12 +450,12 @@ template <typename F=double>
LOG(1,"Atrip") << "#slices " << slices.size() << "\n"; LOG(1,"Atrip") << "#slices " << slices.size() << "\n";
WITH_RANK << "#slices[0] " << slices[0].size << "\n"; WITH_RANK << "#slices[0] " << slices[0].size << "\n";
LOG(1,"Atrip") << "#sources " << sources.size() << "\n"; LOG(1,"Atrip") << "#sources " << sources.size() << "\n";
WITH_RANK << "#sources[0] " << sources[0].size() << "\n"; WITH_RANK << "#sources[0] " << sliceSize << "\n";
WITH_RANK << "#freePointers " << freePointers.size() << "\n"; WITH_RANK << "#freePointers " << freePointers.size() << "\n";
LOG(1,"Atrip") << "#sliceBuffers " << sliceBuffers.size() << "\n"; LOG(1,"Atrip") << "#sliceBuffers " << sliceBuffers.size() << "\n";
LOG(1,"Atrip") << "GB*" << np << " " LOG(1,"Atrip") << "GB*" << np << " "
<< double(sources.size() + sliceBuffers.size()) << double(sources.size() + sliceBuffers.size())
* sources[0].size() * sliceSize
* 8 * np * 8 * np
/ 1073741824.0 / 1073741824.0
<< "\n"; << "\n";
@ -495,14 +504,13 @@ template <typename F=double>
if (otherRank == info.from.rank) sendData_p = false; if (otherRank == info.from.rank) sendData_p = false;
if (!sendData_p) return; if (!sendData_p) return;
MPI_Isend( sources[info.from.source].data() MPI_Isend((void*)SOURCES_DATA(sources[info.from.source]),
, sources[info.from.source].size() sliceSize,
, traits::mpi::datatypeOf<F>() traits::mpi::datatypeOf<F>(),
, otherRank otherRank,
, tag tag,
, universe universe,
, &request &request);
);
WITH_CRAZY_DEBUG WITH_CRAZY_DEBUG
WITH_RANK << "sent to " << otherRank << "\n"; WITH_RANK << "sent to " << otherRank << "\n";
@ -516,25 +524,26 @@ template <typename F=double>
if (Atrip::rank == info.from.rank) return; if (Atrip::rank == info.from.rank) return;
if (slice.info.state == Slice<F>::Fetch) { if (slice.info.state == Slice<F>::Fetch) { // if-1
// TODO: do it through the slice class // TODO: do it through the slice class
slice.info.state = Slice<F>::Dispatched; slice.info.state = Slice<F>::Dispatched;
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
# if !defined(ATRIP_CUDA_AWARE_MPI) && defined(ATRIP_SOURCES_IN_GPU)
# error "You need CUDA aware MPI to have slices on the GPU"
# endif
slice.mpi_data = (F*)malloc(sizeof(F) * slice.size); slice.mpi_data = (F*)malloc(sizeof(F) * slice.size);
MPI_Irecv( slice.mpi_data MPI_Irecv(slice.mpi_data,
#else #else
MPI_Irecv( slice.data MPI_Irecv(slice.data,
#endif #endif
, slice.size slice.size,
, traits::mpi::datatypeOf<F>() traits::mpi::datatypeOf<F>(),
, info.from.rank info.from.rank,
, tag tag,
, universe universe,
, &slice.request &slice.request);
//, MPI_STATUS_IGNORE } // if-1
); } // receive
}
}
void unwrapAll(ABCTuple const& abc) { void unwrapAll(ABCTuple const& abc) {
for (auto type: sliceTypes) unwrapSlice(type, abc); for (auto type: sliceTypes) unwrapSlice(type, abc);
@ -597,7 +606,12 @@ template <typename F=double>
const MPI_Comm world; const MPI_Comm world;
const MPI_Comm universe; const MPI_Comm universe;
const std::vector<size_t> sliceLength; const std::vector<size_t> sliceLength;
const size_t sliceSize;
#if defined(ATRIP_SOURCES_IN_GPU)
std::vector< DataPtr<F> > sources;
#else
std::vector< std::vector<F> > sources; std::vector< std::vector<F> > sources;
#endif
std::vector< Slice<F> > slices; std::vector< Slice<F> > slices;
typename Slice<F>::Name name; typename Slice<F>::Name name;
const std::vector<typename Slice<F>::Type> sliceTypes; const std::vector<typename Slice<F>::Type> sliceTypes;