Add the slicing into the GPU
This commit is contained in:
parent
658397ebd7
commit
ad542fe856
@ -13,7 +13,9 @@
|
|||||||
(format "%s/include/" root)
|
(format "%s/include/" root)
|
||||||
(format "%s/" root)
|
(format "%s/" root)
|
||||||
(format "%s/bench/" root)
|
(format "%s/bench/" root)
|
||||||
(format "%s/build/main/" root)))))
|
(format "%s/build/main/" root)))
|
||||||
|
(setq-local flycheck-clang-include-path
|
||||||
|
flycheck-gcc-include-path)))
|
||||||
(eval . (flycheck-mode))
|
(eval . (flycheck-mode))
|
||||||
(eval . (outline-minor-mode))
|
(eval . (outline-minor-mode))
|
||||||
(indent-tabs-mode . nil)
|
(indent-tabs-mode . nil)
|
||||||
|
|||||||
@ -19,8 +19,14 @@
|
|||||||
namespace atrip {
|
namespace atrip {
|
||||||
|
|
||||||
template <typename F=double>
|
template <typename F=double>
|
||||||
|
static
|
||||||
void sliceIntoVector
|
void sliceIntoVector
|
||||||
( std::vector<F> &v
|
#if defined(ATRIP_SOURCES_IN_GPU)
|
||||||
|
( DataPtr<F> &source
|
||||||
|
#else
|
||||||
|
( std::vector<F> &source
|
||||||
|
#endif
|
||||||
|
, size_t sliceSize
|
||||||
, CTF::Tensor<F> &toSlice
|
, CTF::Tensor<F> &toSlice
|
||||||
, std::vector<int64_t> const low
|
, std::vector<int64_t> const low
|
||||||
, std::vector<int64_t> const up
|
, std::vector<int64_t> const up
|
||||||
@ -44,18 +50,30 @@ namespace atrip {
|
|||||||
<< "\n";
|
<< "\n";
|
||||||
|
|
||||||
#ifndef ATRIP_DONT_SLICE
|
#ifndef ATRIP_DONT_SLICE
|
||||||
toSlice.slice( toSlice_.low.data()
|
toSlice.slice(toSlice_.low.data(),
|
||||||
, toSlice_.up.data()
|
toSlice_.up.data(),
|
||||||
, 0.0
|
0.0,
|
||||||
, origin
|
origin,
|
||||||
, origin_.low.data()
|
origin_.low.data(),
|
||||||
, origin_.up.data()
|
origin_.up.data(),
|
||||||
, 1.0);
|
1.0);
|
||||||
memcpy(v.data(), toSlice.data, sizeof(F) * v.size());
|
|
||||||
#else
|
#else
|
||||||
# pragma message("WARNING: COMPILING WITHOUT SLICING THE TENSORS")
|
# pragma message("WARNING: COMPILING WITHOUT SLICING THE TENSORS")
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(ATRIP_SOURCES_IN_GPU)
|
||||||
|
WITH_CHRONO("cuda:sources",
|
||||||
|
_CHECK_CUDA_SUCCESS("copying sources data to device",
|
||||||
|
cuMemcpyHtoD(source,
|
||||||
|
toSlice.data,
|
||||||
|
sliceSize));
|
||||||
|
)
|
||||||
|
#else
|
||||||
|
memcpy(source.data(),
|
||||||
|
toSlice.data,
|
||||||
|
sizeof(F) * sliceSize);
|
||||||
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -80,16 +98,15 @@ namespace atrip {
|
|||||||
|
|
||||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
||||||
{
|
{
|
||||||
const int Nv = this->sliceLength[0]
|
|
||||||
, No = this->sliceLength[1]
|
|
||||||
, a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
|
||||||
;
|
|
||||||
|
|
||||||
|
const int
|
||||||
|
Nv = this->sliceLength[0],
|
||||||
|
No = this->sliceLength[1],
|
||||||
|
a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
||||||
|
|
||||||
sliceIntoVector<F>( this->sources[it]
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
||||||
, to, {0, 0, 0}, {Nv, No, No}
|
to, {0, 0, 0}, {Nv, No, No},
|
||||||
, from, {a, 0, 0, 0}, {a+1, Nv, No, No}
|
from, {a, 0, 0, 0}, {a+1, Nv, No, No});
|
||||||
);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -118,14 +135,13 @@ namespace atrip {
|
|||||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
||||||
{
|
{
|
||||||
|
|
||||||
const int No = this->sliceLength[0]
|
const int
|
||||||
, a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
No = this->sliceLength[0],
|
||||||
;
|
a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
||||||
|
|
||||||
sliceIntoVector<F>( this->sources[it]
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
||||||
, to, {0, 0, 0}, {No, No, No}
|
to, {0, 0, 0}, {No, No, No},
|
||||||
, from, {0, 0, 0, a}, {No, No, No, a+1}
|
from, {0, 0, 0, a}, {No, No, No, a+1});
|
||||||
);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -153,18 +169,17 @@ namespace atrip {
|
|||||||
|
|
||||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||||
|
|
||||||
const int Nv = this->sliceLength[0]
|
const int
|
||||||
, No = this->sliceLength[1]
|
Nv = this->sliceLength[0],
|
||||||
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
No = this->sliceLength[1],
|
||||||
, a = el % Nv
|
el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it}),
|
||||||
, b = el / Nv
|
a = el % Nv,
|
||||||
;
|
b = el / Nv;
|
||||||
|
|
||||||
|
|
||||||
sliceIntoVector<F>( this->sources[it]
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
||||||
, to, {0, 0}, {Nv, No}
|
to, {0, 0}, {Nv, No},
|
||||||
, from, {a, b, 0, 0}, {a+1, b+1, Nv, No}
|
from, {a, b, 0, 0}, {a+1, b+1, Nv, No});
|
||||||
);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -191,17 +206,17 @@ namespace atrip {
|
|||||||
|
|
||||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||||
|
|
||||||
const int Nv = from.lens[0]
|
const int
|
||||||
, No = this->sliceLength[1]
|
Nv = from.lens[0],
|
||||||
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
No = this->sliceLength[1],
|
||||||
, a = el % Nv
|
el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it}),
|
||||||
, b = el / Nv
|
a = el % Nv,
|
||||||
;
|
b = el / Nv;
|
||||||
|
|
||||||
sliceIntoVector<F>( this->sources[it]
|
|
||||||
, to, {0, 0}, {No, No}
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
||||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
to, {0, 0}, {No, No},
|
||||||
);
|
from, {a, b, 0, 0}, {a+1, b+1, No, No});
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -231,17 +246,16 @@ namespace atrip {
|
|||||||
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
||||||
// TODO: maybe generalize this with ABHH
|
// TODO: maybe generalize this with ABHH
|
||||||
|
|
||||||
const int Nv = from.lens[0]
|
const int
|
||||||
, No = this->sliceLength[1]
|
Nv = from.lens[0],
|
||||||
, el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it})
|
No = this->sliceLength[1],
|
||||||
, a = el % Nv
|
el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it}),
|
||||||
, b = el / Nv
|
a = el % Nv,
|
||||||
;
|
b = el / Nv;
|
||||||
|
|
||||||
sliceIntoVector<F>( this->sources[it]
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
||||||
, to, {0, 0}, {No, No}
|
to, {0, 0}, {No, No},
|
||||||
, from, {a, b, 0, 0}, {a+1, b+1, No, No}
|
from, {a, b, 0, 0}, {a+1, b+1, No, No});
|
||||||
);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user