This commit is contained in:
Alejandro Gallo 2022-08-08 18:26:52 +02:00
parent a30c424781
commit 8c04280a65
3 changed files with 48 additions and 32 deletions

View File

@ -55,28 +55,28 @@ namespace atrip {
const int *ldc const int *ldc
); );
void dcopy_(const int n, void dcopy_(int *n,
const double *x, const double *x,
const int incx, int *incx,
double *y, double *y,
const int incy); int *incy);
void zcopy_(const int n, void zcopy_(int *n,
const void *x, const void *x,
const int incx, int *incx,
void *y, void *y,
const int incy); int *incy);
} }
#endif #endif
template <typename F> template <typename F>
void xcopy(const int n, void xcopy(int* n,
const DataFieldType<F>* x, const DataFieldType<F>* x,
const int incx, int* incx,
DataFieldType<F>* y, DataFieldType<F>* y,
const int incy); int* incy);
template <typename F> template <typename F>
void xgemm(const char *transa, void xgemm(const char *transa,

View File

@ -219,9 +219,11 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
WITH_CHRONO("oneshot-db:comm:allgather", WITH_CHRONO("oneshot-db:comm:allgather",
WITH_CHRONO("db:comm:allgather", WITH_CHRONO("db:comm:allgather",
MPI_Allgather( ldb.data() MPI_Allgather( ldb.data()
// , ldb.size() * sizeof(typename Slice<F>::LocalDatabaseElement)
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, db.data() , db.data()
// , ldb.size() * sizeof(typename Slice<F>::LocalDatabaseElement)
, ldb.size() , ldb.size()
, MPI_LDB_ELEMENT , MPI_LDB_ELEMENT
, c); , c);
@ -423,12 +425,19 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
}); });
} }
const double _doubles_time = Atrip::chrono["doubles"].count(),
_its_time = Atrip::chrono["iterations"].count();
LOG(0,"Atrip") LOG(0,"Atrip")
<< "iteration " << iteration << "iteration " << iteration
<< " [" << 100 * iteration / nIterations << "%]" << " [" << 100 * iteration / nIterations << "%]"
<< " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count() << " (" << (_doubles_time > 0.0
? doublesFlops * iteration / _doubles_time
: -1)
<< "GF)" << "GF)"
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count() << " (" << (_its_time > 0.0
? doublesFlops * iteration / _its_time
: -1)
<< "GF)" << "GF)"
<< "\n"; << "\n";
@ -465,14 +474,21 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
LOG(0, "AtripCUDA") << "first database " << i << "\n"; LOG(0, "AtripCUDA") << "first database " << i << "\n";
WITH_RANK << "__first__:first database ............ \n"; WITH_RANK << "__first__:first database ............ \n";
const auto db = communicateDatabase(abc, universe); const auto db = communicateDatabase(abc, universe);
LOG(0, "AtripCUDA") << "first database communicated" << i << "\n";
WITH_RANK << "__first__:first database communicated \n"; WITH_RANK << "__first__:first database communicated \n";
WITH_RANK << "__first__:first database io phase \n"; WITH_RANK << "__first__:first database io phase \n";
LOG(0, "AtripCUDA") << "doing io " << i << "\n";
doIOPhase(db); doIOPhase(db);
LOG(0, "AtripCUDA") << "io done " << i << "\n";
WITH_RANK << "__first__:first database io phase DONE\n"; WITH_RANK << "__first__:first database io phase DONE\n";
WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
LOG(0, "AtripCUDA") << "unrwapping " << i << "\n";
for (auto& u: unions) u->unwrapAll(abc); for (auto& u: unions) u->unwrapAll(abc);
LOG(0, "AtripCUDA") << "unwrapped " << i << "\n";
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n"; WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
LOG(0, "AtripCUDA") << "barrier " << i << "\n";
MPI_Barrier(universe); MPI_Barrier(universe);
LOG(0, "AtripCUDA") << "barriered " << i << "\n";
} }
LOG(0, "AtripCUDA") << "next database" << i << "\n"; LOG(0, "AtripCUDA") << "next database" << i << "\n";
@ -545,14 +561,14 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
abhh.unwrapAll(abc); abhh.unwrapAll(abc);
))) )))
WITH_CHRONO("reorder", WITH_CHRONO("reorder",
LOG(0, "AtripCUDA") << "reorder singles" << i << "\n"; int ooo = No*No*No, stride = 1;
atrip::xcopy<F>(No*No*No, atrip::xcopy<F>(&ooo,
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
(DataFieldType<F>*)Tijk, 1, (DataFieldType<F>*)Tijk, &stride,
(DataFieldType<F>*)Zijk, 1); (DataFieldType<F>*)Zijk, &stride);
#else #else
(DataFieldType<F>*)Tijk.data(), 1, (DataFieldType<F>*)Tijk.data(), &stride,
(DataFieldType<F>*)Zijk.data(), 1); (DataFieldType<F>*)Zijk.data(), &stride);
#endif #endif
) )
WITH_CHRONO("singles", WITH_CHRONO("singles",

View File

@ -105,32 +105,32 @@ namespace atrip {
template <> template <>
void xcopy<double>(const int n, void xcopy<double>(int *n,
const DataFieldType<double>* x, const DataFieldType<double>* x,
const int incx, int *incx,
DataFieldType<double>* y, DataFieldType<double>* y,
const int incy) { int *incy) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
cublasDcopy(Atrip::cuda.handle, cublasDcopy(Atrip::cuda.handle,
n, *n,
x, incx, x, *incx,
y, incy); y, *incy);
#else #else
dcopy_(n, x, incx, y, incy); dcopy_(n, x, incx, y, incy);
#endif #endif
} }
template <> template <>
void xcopy<Complex>(const int n, void xcopy<Complex>(int* n,
const DataFieldType<Complex>* x, const DataFieldType<Complex>* x,
const int incx, int* incx,
DataFieldType<Complex>* y, DataFieldType<Complex>* y,
const int incy) { int* incy) {
#if defined(HAVE_CUDA) #if defined(HAVE_CUDA)
cublasZcopy(Atrip::cuda.handle, cublasZcopy(Atrip::cuda.handle,
n, *n,
x, incx, x, *incx,
y, incy); y, *incy);
#else #else
zcopy_(n, x, incx, y, incy); zcopy_(n, x, incx, y, incy);
#endif #endif