diff --git a/bench/Makefile.am b/bench/Makefile.am index 7588f08..90b2338 100644 --- a/bench/Makefile.am +++ b/bench/Makefile.am @@ -29,4 +29,7 @@ test_cublas_parallel_atrip_SOURCES = test-cublas-parallel-atrip.cxx bin_PROGRAMS += test-cuda-sanity test_cuda_sanity_SOURCES = test-cuda-sanity.cxx + +bin_PROGRAMS += test-cublas-parallel +test_cublas_parallel_SOURCES = test-cublas-parallel.cxx endif diff --git a/bench/test-cublas-parallel-atrip.cxx b/bench/test-cublas-parallel-atrip.cxx index 93b28f1..d6629df 100644 --- a/bench/test-cublas-parallel-atrip.cxx +++ b/bench/test-cublas-parallel-atrip.cxx @@ -158,8 +158,8 @@ int main(int argc, char** argv) { MPI_Barrier(MPI_COMM_WORLD); timings["memcpy"].start(); for (size_t _s = 0; _s < slices_d.size(); _s++) { - // for (size_t _b = 0; _b < slices_h[_s].size(); _b++) { - for (size_t _b = 0; _b < 1 ; _b++) { + for (size_t _b = 0; _b < slices_h[_s].size(); _b++) { + // for (size_t _b = 0; _b < 1 ; _b++) { auto device = (*slices_d[_s])[_b]; auto host = slices_h[_s][_b].data(); cuMemcpyHtoD(device, host, sizes[_s]);