Implement zeroing of tensors through memcpy and cuMemcpy

This commit is contained in:
2023-01-26 01:49:55 +01:00
parent 4e2d1143e5
commit 93cba3c3ab
4 changed files with 24 additions and 60 deletions

View File

@@ -24,15 +24,6 @@ namespace acc {
// cuda kernels
template <typename F>
__MAYBE_GLOBAL__
void zeroing(F* a, size_t n) {
F zero = {0};
for (size_t i = 0; i < n; i++) {
a[i] = zero;
}
}
////
template <typename F>
__MAYBE_DEVICE__ __MAYBE_HOST__ __INLINE__