Compare commits
No commits in common. "b11b53aca1ff60d9e5eff5bfb1a16d00791ff496" and "399447131c99ee858152933310daf81d803cd4da" have entirely different histories.
b11b53aca1
...
399447131c
@ -29,7 +29,4 @@ test_cublas_parallel_atrip_SOURCES = test-cublas-parallel-atrip.cxx
|
|||||||
|
|
||||||
bin_PROGRAMS += test-cuda-sanity
|
bin_PROGRAMS += test-cuda-sanity
|
||||||
test_cuda_sanity_SOURCES = test-cuda-sanity.cxx
|
test_cuda_sanity_SOURCES = test-cuda-sanity.cxx
|
||||||
|
|
||||||
bin_PROGRAMS += test-cublas-parallel
|
|
||||||
test_cublas_parallel_SOURCES = test-cublas-parallel.cxx
|
|
||||||
endif
|
endif
|
||||||
|
|||||||
@ -158,8 +158,8 @@ int main(int argc, char** argv) {
|
|||||||
MPI_Barrier(MPI_COMM_WORLD);
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
timings["memcpy"].start();
|
timings["memcpy"].start();
|
||||||
for (size_t _s = 0; _s < slices_d.size(); _s++) {
|
for (size_t _s = 0; _s < slices_d.size(); _s++) {
|
||||||
for (size_t _b = 0; _b < slices_h[_s].size(); _b++) {
|
// for (size_t _b = 0; _b < slices_h[_s].size(); _b++) {
|
||||||
// for (size_t _b = 0; _b < 1 ; _b++) {
|
for (size_t _b = 0; _b < 1 ; _b++) {
|
||||||
auto device = (*slices_d[_s])[_b];
|
auto device = (*slices_d[_s])[_b];
|
||||||
auto host = slices_h[_s][_b].data();
|
auto host = slices_h[_s][_b].data();
|
||||||
cuMemcpyHtoD(device, host, sizes[_s]);
|
cuMemcpyHtoD(device, host, sizes[_s]);
|
||||||
|
|||||||
@ -208,14 +208,15 @@ template <typename F=double>
|
|||||||
auto dataPointer = freePointers.begin();
|
auto dataPointer = freePointers.begin();
|
||||||
freePointers.erase(dataPointer);
|
freePointers.erase(dataPointer);
|
||||||
blank.data = *dataPointer;
|
blank.data = *dataPointer;
|
||||||
WITH_CHRONO("cuda:memcpy",
|
//
|
||||||
WITH_CHRONO("cuda:memcpy:self-sufficient",
|
//
|
||||||
_CHECK_CUDA_SUCCESS("copying mpi data to device",
|
// TODO [#A]: do cuMemcpy of
|
||||||
cuMemcpyHtoD(blank.data,
|
// sources[from.source].data() ⇒ blank.data
|
||||||
(void*)sources[from.source].data(),
|
// Do this when everything else is working.
|
||||||
sizeof(F) * sources[from.source].size()));
|
// This will probably be a bottleneck of the H-to-D communication,
|
||||||
))
|
// as most slices are SelfSufficient.
|
||||||
|
//
|
||||||
|
//
|
||||||
#else
|
#else
|
||||||
blank.data = sources[from.source].data();
|
blank.data = sources[from.source].data();
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -52,8 +52,6 @@ namespace atrip {
|
|||||||
, origin_.up.data()
|
, origin_.up.data()
|
||||||
, 1.0);
|
, 1.0);
|
||||||
memcpy(v.data(), toSlice.data, sizeof(F) * v.size());
|
memcpy(v.data(), toSlice.data, sizeof(F) * v.size());
|
||||||
#else
|
|
||||||
# pragma message("WARNING: COMPILING WITHOUT SLICING THE TENSORS")
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user