don't need to copy to separate mpi_data array on the host when sources are resident on gpu

2022-12-15 09:02:14 -08:00
parent 4af47a0bb7
commit 9003c218a3
2 changed files with 6 additions and 7 deletions
--- a/include/atrip/Slice.hpp
+++ b/include/atrip/Slice.hpp
@@ -352,7 +352,7 @@ Info info;
 // [[file:~/cuda/atrip/atrip.org::*Attributes][Attributes:2]]
 DataPtr<F> data;
-#if defined(HAVE_CUDA)
+#if defined(HAVE_CUDA && !defined ATRIP_SOURCES_IN_GPU)
    F* mpi_data;
 #endif
 // Attributes:2 ends here
@@ -456,7 +456,7 @@ void unwrapAndMarkReady() {
      if (errorCode != MPI_SUCCESS)
        throw "Atrip: Unexpected error MPI ERROR";
-#if defined(HAVE_CUDA)
+#if defined(HAVE_CUDA && !defined ATRIP_SOURCES_IN_GPU)
      // copy the retrieved mpi data to the device
      WITH_CHRONO("cuda:memcpy",
                  _CHECK_CUDA_SUCCESS("copying mpi data to device",
@@ -488,7 +488,7 @@ void unwrapAndMarkReady() {
 Slice(size_t size_)
      : info({})
      , data(DataNullPtr)
-#if defined(HAVE_CUDA)
+#if defined(HAVE_CUDA && !defined ATRIP_SOURCES_IN_GPU)
      , mpi_data(nullptr)
 #endif
      , size(size_)
--- a/include/atrip/SliceUnion.hpp
+++ b/include/atrip/SliceUnion.hpp
@@ -571,12 +571,11 @@ template <typename F=double>
      if (slice.info.state == Slice<F>::Fetch) { // if-1
        // TODO: do it through the slice class
        slice.info.state = Slice<F>::Dispatched;
-#if defined(HAVE_CUDA)
+#if defined(HAVE_CUDA && defined ATRIP_SOURCES_IN_GPU)
-#  if !defined(ATRIP_CUDA_AWARE_MPI) && defined(ATRIP_SOURCES_IN_GPU)
+#  if !defined(ATRIP_CUDA_AWARE_MPI) 
 #    error "You need CUDA aware MPI to have slices on the GPU"
 #  endif
-        slice.mpi_data = (F*)malloc(sizeof(F) * slice.size);
+        MPI_Irecv((void*)slice.data,
        MPI_Irecv(slice.mpi_data,
 #else
        MPI_Irecv(slice.data,
 #endif