Compare commits

..

No commits in common. "b11b53aca1ff60d9e5eff5bfb1a16d00791ff496" and "399447131c99ee858152933310daf81d803cd4da" have entirely different histories.

4 changed files with 11 additions and 15 deletions

View File

@ -29,7 +29,4 @@ test_cublas_parallel_atrip_SOURCES = test-cublas-parallel-atrip.cxx
bin_PROGRAMS += test-cuda-sanity bin_PROGRAMS += test-cuda-sanity
test_cuda_sanity_SOURCES = test-cuda-sanity.cxx test_cuda_sanity_SOURCES = test-cuda-sanity.cxx
bin_PROGRAMS += test-cublas-parallel
test_cublas_parallel_SOURCES = test-cublas-parallel.cxx
endif endif

View File

@ -158,8 +158,8 @@ int main(int argc, char** argv) {
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
timings["memcpy"].start(); timings["memcpy"].start();
for (size_t _s = 0; _s < slices_d.size(); _s++) { for (size_t _s = 0; _s < slices_d.size(); _s++) {
for (size_t _b = 0; _b < slices_h[_s].size(); _b++) { // for (size_t _b = 0; _b < slices_h[_s].size(); _b++) {
// for (size_t _b = 0; _b < 1 ; _b++) { for (size_t _b = 0; _b < 1 ; _b++) {
auto device = (*slices_d[_s])[_b]; auto device = (*slices_d[_s])[_b];
auto host = slices_h[_s][_b].data(); auto host = slices_h[_s][_b].data();
cuMemcpyHtoD(device, host, sizes[_s]); cuMemcpyHtoD(device, host, sizes[_s]);

View File

@ -208,14 +208,15 @@ template <typename F=double>
auto dataPointer = freePointers.begin(); auto dataPointer = freePointers.begin();
freePointers.erase(dataPointer); freePointers.erase(dataPointer);
blank.data = *dataPointer; blank.data = *dataPointer;
WITH_CHRONO("cuda:memcpy", //
WITH_CHRONO("cuda:memcpy:self-sufficient", //
_CHECK_CUDA_SUCCESS("copying mpi data to device", // TODO [#A]: do cuMemcpy of
cuMemcpyHtoD(blank.data, // sources[from.source].data() ⇒ blank.data
(void*)sources[from.source].data(), // Do this when everything else is working.
sizeof(F) * sources[from.source].size())); // This will probably be a bottleneck of the H-to-D communication,
)) // as most slices are SelfSufficient.
//
//
#else #else
blank.data = sources[from.source].data(); blank.data = sources[from.source].data();
#endif #endif

View File

@ -52,8 +52,6 @@ namespace atrip {
, origin_.up.data() , origin_.up.data()
, 1.0); , 1.0);
memcpy(v.data(), toSlice.data, sizeof(F) * v.size()); memcpy(v.data(), toSlice.data, sizeof(F) * v.size());
#else
# pragma message("WARNING: COMPILING WITHOUT SLICING THE TENSORS")
#endif #endif
} }