Fix blas
This commit is contained in:
parent
a30c424781
commit
8c04280a65
@ -55,28 +55,28 @@ namespace atrip {
|
|||||||
const int *ldc
|
const int *ldc
|
||||||
);
|
);
|
||||||
|
|
||||||
void dcopy_(const int n,
|
void dcopy_(int *n,
|
||||||
const double *x,
|
const double *x,
|
||||||
const int incx,
|
int *incx,
|
||||||
double *y,
|
double *y,
|
||||||
const int incy);
|
int *incy);
|
||||||
|
|
||||||
void zcopy_(const int n,
|
void zcopy_(int *n,
|
||||||
const void *x,
|
const void *x,
|
||||||
const int incx,
|
int *incx,
|
||||||
void *y,
|
void *y,
|
||||||
const int incy);
|
int *incy);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
void xcopy(const int n,
|
void xcopy(int* n,
|
||||||
const DataFieldType<F>* x,
|
const DataFieldType<F>* x,
|
||||||
const int incx,
|
int* incx,
|
||||||
DataFieldType<F>* y,
|
DataFieldType<F>* y,
|
||||||
const int incy);
|
int* incy);
|
||||||
|
|
||||||
template <typename F>
|
template <typename F>
|
||||||
void xgemm(const char *transa,
|
void xgemm(const char *transa,
|
||||||
|
|||||||
@ -27,7 +27,7 @@ using namespace atrip;
|
|||||||
|
|
||||||
namespace atrip {
|
namespace atrip {
|
||||||
namespace cuda {
|
namespace cuda {
|
||||||
|
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -219,9 +219,11 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
WITH_CHRONO("oneshot-db:comm:allgather",
|
WITH_CHRONO("oneshot-db:comm:allgather",
|
||||||
WITH_CHRONO("db:comm:allgather",
|
WITH_CHRONO("db:comm:allgather",
|
||||||
MPI_Allgather( ldb.data()
|
MPI_Allgather( ldb.data()
|
||||||
|
// , ldb.size() * sizeof(typename Slice<F>::LocalDatabaseElement)
|
||||||
, ldb.size()
|
, ldb.size()
|
||||||
, MPI_LDB_ELEMENT
|
, MPI_LDB_ELEMENT
|
||||||
, db.data()
|
, db.data()
|
||||||
|
// , ldb.size() * sizeof(typename Slice<F>::LocalDatabaseElement)
|
||||||
, ldb.size()
|
, ldb.size()
|
||||||
, MPI_LDB_ELEMENT
|
, MPI_LDB_ELEMENT
|
||||||
, c);
|
, c);
|
||||||
@ -372,7 +374,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOG(0, "AtripCUDA") << "Starting iterations\n";
|
LOG(0, "AtripCUDA") << "Starting iterations\n";
|
||||||
|
|
||||||
|
|
||||||
for ( size_t
|
for ( size_t
|
||||||
i = first_iteration,
|
i = first_iteration,
|
||||||
@ -423,12 +425,19 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const double _doubles_time = Atrip::chrono["doubles"].count(),
|
||||||
|
_its_time = Atrip::chrono["iterations"].count();
|
||||||
|
|
||||||
LOG(0,"Atrip")
|
LOG(0,"Atrip")
|
||||||
<< "iteration " << iteration
|
<< "iteration " << iteration
|
||||||
<< " [" << 100 * iteration / nIterations << "%]"
|
<< " [" << 100 * iteration / nIterations << "%]"
|
||||||
<< " (" << doublesFlops * iteration / Atrip::chrono["doubles"].count()
|
<< " (" << (_doubles_time > 0.0
|
||||||
|
? doublesFlops * iteration / _doubles_time
|
||||||
|
: -1)
|
||||||
<< "GF)"
|
<< "GF)"
|
||||||
<< " (" << doublesFlops * iteration / Atrip::chrono["iterations"].count()
|
<< " (" << (_its_time > 0.0
|
||||||
|
? doublesFlops * iteration / _its_time
|
||||||
|
: -1)
|
||||||
<< "GF)"
|
<< "GF)"
|
||||||
<< "\n";
|
<< "\n";
|
||||||
|
|
||||||
@ -465,14 +474,21 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
LOG(0, "AtripCUDA") << "first database " << i << "\n";
|
LOG(0, "AtripCUDA") << "first database " << i << "\n";
|
||||||
WITH_RANK << "__first__:first database ............ \n";
|
WITH_RANK << "__first__:first database ............ \n";
|
||||||
const auto db = communicateDatabase(abc, universe);
|
const auto db = communicateDatabase(abc, universe);
|
||||||
|
LOG(0, "AtripCUDA") << "first database communicated" << i << "\n";
|
||||||
WITH_RANK << "__first__:first database communicated \n";
|
WITH_RANK << "__first__:first database communicated \n";
|
||||||
WITH_RANK << "__first__:first database io phase \n";
|
WITH_RANK << "__first__:first database io phase \n";
|
||||||
|
LOG(0, "AtripCUDA") << "doing io " << i << "\n";
|
||||||
doIOPhase(db);
|
doIOPhase(db);
|
||||||
|
LOG(0, "AtripCUDA") << "io done " << i << "\n";
|
||||||
WITH_RANK << "__first__:first database io phase DONE\n";
|
WITH_RANK << "__first__:first database io phase DONE\n";
|
||||||
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
|
WITH_RANK << "__first__::::Unwrapping all slices for first database\n";
|
||||||
|
LOG(0, "AtripCUDA") << "unrwapping " << i << "\n";
|
||||||
for (auto& u: unions) u->unwrapAll(abc);
|
for (auto& u: unions) u->unwrapAll(abc);
|
||||||
|
LOG(0, "AtripCUDA") << "unwrapped " << i << "\n";
|
||||||
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
|
WITH_RANK << "__first__::::Unwrapping slices for first database DONE\n";
|
||||||
|
LOG(0, "AtripCUDA") << "barrier " << i << "\n";
|
||||||
MPI_Barrier(universe);
|
MPI_Barrier(universe);
|
||||||
|
LOG(0, "AtripCUDA") << "barriered " << i << "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG(0, "AtripCUDA") << "next database" << i << "\n";
|
LOG(0, "AtripCUDA") << "next database" << i << "\n";
|
||||||
@ -545,14 +561,14 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
|||||||
abhh.unwrapAll(abc);
|
abhh.unwrapAll(abc);
|
||||||
)))
|
)))
|
||||||
WITH_CHRONO("reorder",
|
WITH_CHRONO("reorder",
|
||||||
LOG(0, "AtripCUDA") << "reorder singles" << i << "\n";
|
int ooo = No*No*No, stride = 1;
|
||||||
atrip::xcopy<F>(No*No*No,
|
atrip::xcopy<F>(&ooo,
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
(DataFieldType<F>*)Tijk, 1,
|
(DataFieldType<F>*)Tijk, &stride,
|
||||||
(DataFieldType<F>*)Zijk, 1);
|
(DataFieldType<F>*)Zijk, &stride);
|
||||||
#else
|
#else
|
||||||
(DataFieldType<F>*)Tijk.data(), 1,
|
(DataFieldType<F>*)Tijk.data(), &stride,
|
||||||
(DataFieldType<F>*)Zijk.data(), 1);
|
(DataFieldType<F>*)Zijk.data(), &stride);
|
||||||
#endif
|
#endif
|
||||||
)
|
)
|
||||||
WITH_CHRONO("singles",
|
WITH_CHRONO("singles",
|
||||||
|
|||||||
@ -105,32 +105,32 @@ namespace atrip {
|
|||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void xcopy<double>(const int n,
|
void xcopy<double>(int *n,
|
||||||
const DataFieldType<double>* x,
|
const DataFieldType<double>* x,
|
||||||
const int incx,
|
int *incx,
|
||||||
DataFieldType<double>* y,
|
DataFieldType<double>* y,
|
||||||
const int incy) {
|
int *incy) {
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
cublasDcopy(Atrip::cuda.handle,
|
cublasDcopy(Atrip::cuda.handle,
|
||||||
n,
|
*n,
|
||||||
x, incx,
|
x, *incx,
|
||||||
y, incy);
|
y, *incy);
|
||||||
#else
|
#else
|
||||||
dcopy_(n, x, incx, y, incy);
|
dcopy_(n, x, incx, y, incy);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void xcopy<Complex>(const int n,
|
void xcopy<Complex>(int* n,
|
||||||
const DataFieldType<Complex>* x,
|
const DataFieldType<Complex>* x,
|
||||||
const int incx,
|
int* incx,
|
||||||
DataFieldType<Complex>* y,
|
DataFieldType<Complex>* y,
|
||||||
const int incy) {
|
int* incy) {
|
||||||
#if defined(HAVE_CUDA)
|
#if defined(HAVE_CUDA)
|
||||||
cublasZcopy(Atrip::cuda.handle,
|
cublasZcopy(Atrip::cuda.handle,
|
||||||
n,
|
*n,
|
||||||
x, incx,
|
x, *incx,
|
||||||
y, incy);
|
y, *incy);
|
||||||
#else
|
#else
|
||||||
zcopy_(n, x, incx, y, incy);
|
zcopy_(n, x, incx, y, incy);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user