Fix blas
This commit is contained in:
parent
a30c424781
commit
8c04280a65
@ -55,28 +55,28 @@ namespace atrip {
|
||||
const int *ldc
|
||||
);
|
||||
|
||||
void dcopy_(const int n,
|
||||
void dcopy_(int *n,
|
||||
const double *x,
|
||||
const int incx,
|
||||
int *incx,
|
||||
double *y,
|
||||
const int incy);
|
||||
int *incy);
|
||||
|
||||
void zcopy_(const int n,
|
||||
void zcopy_(int *n,
|
||||
const void *x,
|
||||
const int incx,
|
||||
int *incx,
|
||||
void *y,
|
||||
const int incy);
|
||||
int *incy);
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename F>
|
||||
void xcopy(const int n,
|
||||
void xcopy(int* n,
|
||||
const DataFieldType<F>* x,
|
||||
const int incx,
|
||||
int* incx,
|
||||
DataFieldType<F>* y,
|
||||
const int incy);
|
||||
int* incy);
|
||||
|
||||
template <typename F>
|
||||
void xgemm(const char *transa,
|
||||
|
||||
@ -27,7 +27,7 @@ using namespace atrip;
|
||||
|
||||
namespace atrip {
|
||||
namespace cuda {
|
||||
|
||||
|
||||
};
|
||||
};
|
||||
|
||||
@ -219,9 +219,11 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
WITH_CHRONO("oneshot-db:comm:allgather",
|
||||
WITH_CHRONO("db:comm:allgather",
|
||||
MPI_Allgather( ldb.data()
|
||||
// , ldb.size() * sizeof(typename Slice<F>::LocalDatabaseElement)
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
, db.data()
|
||||
// , ldb.size() * sizeof(typename Slice<F>::LocalDatabaseElement)
|
||||
, ldb.size()
|
||||
, MPI_LDB_ELEMENT
|
||||
, c);
|
||||
@ -372,7 +374,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
}
|
||||
|
||||
LOG(0, "AtripCUDA") << "Starting iterations\n";
|
||||
|
||||
|
||||
|
||||
for ( size_t
|
||||
i = first_iteration,
|
||||
@ -423,12 +425,19 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
});
|
||||
}
|
||||
|
||||
const double _doubles_time = Atrip::chrono["doubles"].count(),
|
||||
_its_time = Atrip::chrono["iterations"].count();
|
||||
|
||||
LOG(0,"Atrip")
|
||||
<< "iteration " << iteration
|
||||
<< " [" << 100 * iteration / nIterations << "%]"
|
||||
<< " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
|
||||
<< " (" << (_doubles_time > 0.0
|
||||
? doublesFlops * iteration / _doubles_time
|
||||
: -1)
|
||||
<< "GF)"
|
||||
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
|
||||
<< " (" << (_its_time > 0.0
|
||||
? doublesFlops * iteration / _its_time
|
||||
: -1)
|
||||
<< "GF)"
|
||||
<< "\n";
|
||||
|
||||
@ -465,14 +474,21 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
LOG(0, "AtripCUDA") << "first database " << i << "\n";
|
||||
WITH_RANK << "__first__:first database ............ \n";
|
||||
const auto db = communicateDatabase(abc, universe);
|
||||
LOG(0, "AtripCUDA") << "first database communicated" << i << "\n";
|
||||
WITH_RANK << "__first__:first database communicated \n";
|
||||
WITH_RANK << "__first__:first database io phase \n";
|
||||
LOG(0, "AtripCUDA") << "doing io " << i << "\n";
|
||||
doIOPhase(db);
|
||||
LOG(0, "AtripCUDA") << "io done " << i << "\n";
|
||||
WITH_RANK << "__first__:first database io phase DONE\n";
|
||||
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
|
||||
LOG(0, "AtripCUDA") << "unrwapping " << i << "\n";
|
||||
for (auto& u: unions) u->unwrapAll(abc);
|
||||
LOG(0, "AtripCUDA") << "unwrapped " << i << "\n";
|
||||
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
|
||||
LOG(0, "AtripCUDA") << "barrier " << i << "\n";
|
||||
MPI_Barrier(universe);
|
||||
LOG(0, "AtripCUDA") << "barriered " << i << "\n";
|
||||
}
|
||||
|
||||
LOG(0, "AtripCUDA") << "next database" << i << "\n";
|
||||
@ -545,14 +561,14 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
abhh.unwrapAll(abc);
|
||||
)))
|
||||
WITH_CHRONO("reorder",
|
||||
LOG(0, "AtripCUDA") << "reorder singles" << i << "\n";
|
||||
atrip::xcopy<F>(No*No*No,
|
||||
int ooo = No*No*No, stride = 1;
|
||||
atrip::xcopy<F>(&ooo,
|
||||
#if defined(HAVE_CUDA)
|
||||
(DataFieldType<F>*)Tijk, 1,
|
||||
(DataFieldType<F>*)Zijk, 1);
|
||||
(DataFieldType<F>*)Tijk, &stride,
|
||||
(DataFieldType<F>*)Zijk, &stride);
|
||||
#else
|
||||
(DataFieldType<F>*)Tijk.data(), 1,
|
||||
(DataFieldType<F>*)Zijk.data(), 1);
|
||||
(DataFieldType<F>*)Tijk.data(), &stride,
|
||||
(DataFieldType<F>*)Zijk.data(), &stride);
|
||||
#endif
|
||||
)
|
||||
WITH_CHRONO("singles",
|
||||
|
||||
@ -105,32 +105,32 @@ namespace atrip {
|
||||
|
||||
|
||||
template <>
|
||||
void xcopy<double>(const int n,
|
||||
void xcopy<double>(int *n,
|
||||
const DataFieldType<double>* x,
|
||||
const int incx,
|
||||
int *incx,
|
||||
DataFieldType<double>* y,
|
||||
const int incy) {
|
||||
int *incy) {
|
||||
#if defined(HAVE_CUDA)
|
||||
cublasDcopy(Atrip::cuda.handle,
|
||||
n,
|
||||
x, incx,
|
||||
y, incy);
|
||||
*n,
|
||||
x, *incx,
|
||||
y, *incy);
|
||||
#else
|
||||
dcopy_(n, x, incx, y, incy);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
void xcopy<Complex>(const int n,
|
||||
void xcopy<Complex>(int* n,
|
||||
const DataFieldType<Complex>* x,
|
||||
const int incx,
|
||||
int* incx,
|
||||
DataFieldType<Complex>* y,
|
||||
const int incy) {
|
||||
int* incy) {
|
||||
#if defined(HAVE_CUDA)
|
||||
cublasZcopy(Atrip::cuda.handle,
|
||||
n,
|
||||
x, incx,
|
||||
y, incy);
|
||||
*n,
|
||||
x, *incx,
|
||||
y, *incy);
|
||||
#else
|
||||
zcopy_(n, x, incx, y, incy);
|
||||
#endif
|
||||
|
||||
Loading…
Reference in New Issue
Block a user