Update in SliceUnion ATRIP_SOURCES_IN_GPU
This commit is contained in:
parent
26e2f2d109
commit
658397ebd7
@ -18,6 +18,12 @@
|
|||||||
#include <atrip/Slice.hpp>
|
#include <atrip/Slice.hpp>
|
||||||
#include <atrip/RankMap.hpp>
|
#include <atrip/RankMap.hpp>
|
||||||
|
|
||||||
|
#if defined(ATRIP_SOURCES_IN_GPU)
|
||||||
|
# define SOURCES_DATA(s) (s)
|
||||||
|
#else
|
||||||
|
# define SOURCES_DATA(s) (s).data()
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
// Prolog:1 ends here
|
// Prolog:1 ends here
|
||||||
|
|
||||||
@ -195,7 +201,7 @@ template <typename F=double>
|
|||||||
;
|
;
|
||||||
if (blank.info.state == Slice<F>::SelfSufficient) {
|
if (blank.info.state == Slice<F>::SelfSufficient) {
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
const size_t _size = sizeof(F) * sources[from.source].size();
|
const size_t _size = sizeof(F) * sliceSize;
|
||||||
// TODO: this is code duplication with downstairs
|
// TODO: this is code duplication with downstairs
|
||||||
if (freePointers.size() == 0) {
|
if (freePointers.size() == 0) {
|
||||||
std::stringstream stream;
|
std::stringstream stream;
|
||||||
@ -212,12 +218,12 @@ template <typename F=double>
|
|||||||
WITH_CHRONO("cuda:memcpy:self-sufficient",
|
WITH_CHRONO("cuda:memcpy:self-sufficient",
|
||||||
_CHECK_CUDA_SUCCESS("copying mpi data to device",
|
_CHECK_CUDA_SUCCESS("copying mpi data to device",
|
||||||
cuMemcpyHtoD(blank.data,
|
cuMemcpyHtoD(blank.data,
|
||||||
(void*)sources[from.source].data(),
|
(void*)SOURCES_DATA(sources[from.source]),
|
||||||
sizeof(F) * sources[from.source].size()));
|
sizeof(F) * sliceSize));
|
||||||
))
|
))
|
||||||
|
|
||||||
#else
|
#else
|
||||||
blank.data = sources[from.source].data();
|
blank.data = SOURCES_DATA(sources[from.source]);
|
||||||
#endif
|
#endif
|
||||||
} else {
|
} else {
|
||||||
if (freePointers.size() == 0) {
|
if (freePointers.size() == 0) {
|
||||||
@ -396,15 +402,18 @@ template <typename F=double>
|
|||||||
, world(child_world)
|
, world(child_world)
|
||||||
, universe(global_world)
|
, universe(global_world)
|
||||||
, sliceLength(sliceLength_)
|
, sliceLength(sliceLength_)
|
||||||
, sources(rankMap.nSources(),
|
, sliceSize(std::accumulate(sliceLength.begin(),
|
||||||
std::vector<F>
|
|
||||||
(std::accumulate(sliceLength.begin(),
|
|
||||||
sliceLength.end(),
|
sliceLength.end(),
|
||||||
1UL, std::multiplies<size_t>())))
|
1UL, std::multiplies<size_t>()))
|
||||||
|
#if defined(ATRIP_SOURCES_IN_GPU)
|
||||||
|
, sources(rankMap.nSources())
|
||||||
|
#else
|
||||||
|
, sources(rankMap.nSources(),
|
||||||
|
std::vector<F>(sliceSize))
|
||||||
|
#endif
|
||||||
, name(name_)
|
, name(name_)
|
||||||
, sliceTypes(sliceTypes_)
|
, sliceTypes(sliceTypes_)
|
||||||
, sliceBuffers(nSliceBuffers)
|
, sliceBuffers(nSliceBuffers)
|
||||||
//, slices(2 * sliceTypes.size(), Slice<F>{ sources[0].size() })
|
|
||||||
{ // constructor begin
|
{ // constructor begin
|
||||||
|
|
||||||
LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n";
|
LOG(0,"Atrip") << "INIT SliceUnion: " << name << "\n";
|
||||||
@ -412,7 +421,7 @@ template <typename F=double>
|
|||||||
for (auto& ptr: sliceBuffers) {
|
for (auto& ptr: sliceBuffers) {
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
const CUresult error =
|
const CUresult error =
|
||||||
cuMemAlloc(&ptr, sizeof(F) * sources[0].size());
|
cuMemAlloc(&ptr, sizeof(F) * sliceSize);
|
||||||
if (ptr == 0UL) {
|
if (ptr == 0UL) {
|
||||||
throw "UNSUFICCIENT MEMORY ON THE GRAPHIC CARD FOR FREE POINTERS";
|
throw "UNSUFICCIENT MEMORY ON THE GRAPHIC CARD FOR FREE POINTERS";
|
||||||
}
|
}
|
||||||
@ -423,12 +432,12 @@ template <typename F=double>
|
|||||||
throw s.str();
|
throw s.str();
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
ptr = (DataPtr<F>)malloc(sizeof(F) * sources[0].size());
|
ptr = (DataPtr<F>)malloc(sizeof(F) * sliceSize);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
slices
|
slices
|
||||||
= std::vector<Slice<F>>(2 * sliceTypes.size(), { sources[0].size() });
|
= std::vector<Slice<F>>(2 * sliceTypes.size(), { sliceSize });
|
||||||
// TODO: think exactly ^------------------- about this number
|
// TODO: think exactly ^------------------- about this number
|
||||||
|
|
||||||
// initialize the freePointers with the pointers to the buffers
|
// initialize the freePointers with the pointers to the buffers
|
||||||
@ -441,12 +450,12 @@ template <typename F=double>
|
|||||||
LOG(1,"Atrip") << "#slices " << slices.size() << "\n";
|
LOG(1,"Atrip") << "#slices " << slices.size() << "\n";
|
||||||
WITH_RANK << "#slices[0] " << slices[0].size << "\n";
|
WITH_RANK << "#slices[0] " << slices[0].size << "\n";
|
||||||
LOG(1,"Atrip") << "#sources " << sources.size() << "\n";
|
LOG(1,"Atrip") << "#sources " << sources.size() << "\n";
|
||||||
WITH_RANK << "#sources[0] " << sources[0].size() << "\n";
|
WITH_RANK << "#sources[0] " << sliceSize << "\n";
|
||||||
WITH_RANK << "#freePointers " << freePointers.size() << "\n";
|
WITH_RANK << "#freePointers " << freePointers.size() << "\n";
|
||||||
LOG(1,"Atrip") << "#sliceBuffers " << sliceBuffers.size() << "\n";
|
LOG(1,"Atrip") << "#sliceBuffers " << sliceBuffers.size() << "\n";
|
||||||
LOG(1,"Atrip") << "GB*" << np << " "
|
LOG(1,"Atrip") << "GB*" << np << " "
|
||||||
<< double(sources.size() + sliceBuffers.size())
|
<< double(sources.size() + sliceBuffers.size())
|
||||||
* sources[0].size()
|
* sliceSize
|
||||||
* 8 * np
|
* 8 * np
|
||||||
/ 1073741824.0
|
/ 1073741824.0
|
||||||
<< "\n";
|
<< "\n";
|
||||||
@ -495,14 +504,13 @@ template <typename F=double>
|
|||||||
if (otherRank == info.from.rank) sendData_p = false;
|
if (otherRank == info.from.rank) sendData_p = false;
|
||||||
if (!sendData_p) return;
|
if (!sendData_p) return;
|
||||||
|
|
||||||
MPI_Isend( sources[info.from.source].data()
|
MPI_Isend((void*)SOURCES_DATA(sources[info.from.source]),
|
||||||
, sources[info.from.source].size()
|
sliceSize,
|
||||||
, traits::mpi::datatypeOf<F>()
|
traits::mpi::datatypeOf<F>(),
|
||||||
, otherRank
|
otherRank,
|
||||||
, tag
|
tag,
|
||||||
, universe
|
universe,
|
||||||
, &request
|
&request);
|
||||||
);
|
|
||||||
WITH_CRAZY_DEBUG
|
WITH_CRAZY_DEBUG
|
||||||
WITH_RANK << "sent to " << otherRank << "\n";
|
WITH_RANK << "sent to " << otherRank << "\n";
|
||||||
|
|
||||||
@ -516,25 +524,26 @@ template <typename F=double>
|
|||||||
|
|
||||||
if (Atrip::rank == info.from.rank) return;
|
if (Atrip::rank == info.from.rank) return;
|
||||||
|
|
||||||
if (slice.info.state == Slice<F>::Fetch) {
|
if (slice.info.state == Slice<F>::Fetch) { // if-1
|
||||||
// TODO: do it through the slice class
|
// TODO: do it through the slice class
|
||||||
slice.info.state = Slice<F>::Dispatched;
|
slice.info.state = Slice<F>::Dispatched;
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
slice.mpi_data = (F*)malloc(sizeof(F) * slice.size);
|
# if !defined(ATRIP_CUDA_AWARE_MPI) && defined(ATRIP_SOURCES_IN_GPU)
|
||||||
MPI_Irecv( slice.mpi_data
|
# error "You need CUDA aware MPI to have slices on the GPU"
|
||||||
#else
|
|
||||||
MPI_Irecv( slice.data
|
|
||||||
# endif
|
# endif
|
||||||
, slice.size
|
slice.mpi_data = (F*)malloc(sizeof(F) * slice.size);
|
||||||
, traits::mpi::datatypeOf<F>()
|
MPI_Irecv(slice.mpi_data,
|
||||||
, info.from.rank
|
#else
|
||||||
, tag
|
MPI_Irecv(slice.data,
|
||||||
, universe
|
#endif
|
||||||
, &slice.request
|
slice.size,
|
||||||
//, MPI_STATUS_IGNORE
|
traits::mpi::datatypeOf<F>(),
|
||||||
);
|
info.from.rank,
|
||||||
}
|
tag,
|
||||||
}
|
universe,
|
||||||
|
&slice.request);
|
||||||
|
} // if-1
|
||||||
|
} // receive
|
||||||
|
|
||||||
void unwrapAll(ABCTuple const& abc) {
|
void unwrapAll(ABCTuple const& abc) {
|
||||||
for (auto type: sliceTypes) unwrapSlice(type, abc);
|
for (auto type: sliceTypes) unwrapSlice(type, abc);
|
||||||
@ -597,7 +606,12 @@ template <typename F=double>
|
|||||||
const MPI_Comm world;
|
const MPI_Comm world;
|
||||||
const MPI_Comm universe;
|
const MPI_Comm universe;
|
||||||
const std::vector<size_t> sliceLength;
|
const std::vector<size_t> sliceLength;
|
||||||
|
const size_t sliceSize;
|
||||||
|
#if defined(ATRIP_SOURCES_IN_GPU)
|
||||||
|
std::vector< DataPtr<F> > sources;
|
||||||
|
#else
|
||||||
std::vector< std::vector<F> > sources;
|
std::vector< std::vector<F> > sources;
|
||||||
|
#endif
|
||||||
std::vector< Slice<F> > slices;
|
std::vector< Slice<F> > slices;
|
||||||
typename Slice<F>::Name name;
|
typename Slice<F>::Name name;
|
||||||
const std::vector<typename Slice<F>::Type> sliceTypes;
|
const std::vector<typename Slice<F>::Type> sliceTypes;
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user