267 lines
8.1 KiB
C++
267 lines
8.1 KiB
C++
// Copyright 2022 Alejandro Gallo
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
// [[file:~/cuda/atrip/atrip.org::*Unions][Unions:1]]
|
|
#pragma once
|
|
#include <atrip/SliceUnion.hpp>
|
|
|
|
namespace atrip {
|
|
|
|
template <typename F=double>
|
|
static
|
|
void sliceIntoVector
|
|
#if defined(ATRIP_SOURCES_IN_GPU)
|
|
( DataPtr<F> &source
|
|
#else
|
|
( std::vector<F> &source
|
|
#endif
|
|
, size_t sliceSize
|
|
, CTF::Tensor<F> &toSlice
|
|
, std::vector<int64_t> const low
|
|
, std::vector<int64_t> const up
|
|
, CTF::Tensor<F> const& origin
|
|
, std::vector<int64_t> const originLow
|
|
, std::vector<int64_t> const originUp
|
|
) {
|
|
// Thank you CTF for forcing me to do this
|
|
struct { std::vector<int> up, low; }
|
|
toSlice_ = { {up.begin(), up.end()}
|
|
, {low.begin(), low.end()} }
|
|
, origin_ = { {originUp.begin(), originUp.end()}
|
|
, {originLow.begin(), originLow.end()} }
|
|
;
|
|
|
|
WITH_OCD
|
|
WITH_RANK << "slicing into " << pretty_print(toSlice_.up)
|
|
<< "," << pretty_print(toSlice_.low)
|
|
<< " from " << pretty_print(origin_.up)
|
|
<< "," << pretty_print(origin_.low)
|
|
<< "\n";
|
|
|
|
#ifndef ATRIP_DONT_SLICE
|
|
toSlice.slice(toSlice_.low.data(),
|
|
toSlice_.up.data(),
|
|
0.0,
|
|
origin,
|
|
origin_.low.data(),
|
|
origin_.up.data(),
|
|
1.0);
|
|
#else
|
|
# pragma message("WARNING: COMPILING WITHOUT SLICING THE TENSORS")
|
|
#endif
|
|
|
|
#if defined(ATRIP_SOURCES_IN_GPU)
|
|
WITH_CHRONO("cuda:sources",
|
|
_CHECK_CUDA_SUCCESS("copying sources data to device",
|
|
cuMemcpyHtoD(source,
|
|
toSlice.data,
|
|
sliceSize));
|
|
)
|
|
#else
|
|
memcpy(source.data(),
|
|
toSlice.data,
|
|
sizeof(F) * sliceSize);
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
template <typename F=double>
|
|
struct TAPHH : public SliceUnion<F> {
|
|
TAPHH( CTF::Tensor<F> const& sourceTensor
|
|
, size_t No
|
|
, size_t Nv
|
|
, size_t np
|
|
, MPI_Comm child_world
|
|
, MPI_Comm global_world
|
|
) : SliceUnion<F>( {Slice<F>::A, Slice<F>::B, Slice<F>::C}
|
|
, {Nv, No, No} // size of the slices
|
|
, {Nv}
|
|
, np
|
|
, child_world
|
|
, global_world
|
|
, Slice<F>::TA
|
|
, 6) {
|
|
this->init(sourceTensor);
|
|
}
|
|
|
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
|
{
|
|
|
|
const int
|
|
Nv = this->sliceLength[0],
|
|
No = this->sliceLength[1],
|
|
a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
|
|
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
|
to, {0, 0, 0}, {Nv, No, No},
|
|
from, {a, 0, 0, 0}, {a+1, Nv, No, No});
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
template <typename F=double>
|
|
struct HHHA : public SliceUnion<F> {
|
|
HHHA( CTF::Tensor<F> const& sourceTensor
|
|
, size_t No
|
|
, size_t Nv
|
|
, size_t np
|
|
, MPI_Comm child_world
|
|
, MPI_Comm global_world
|
|
) : SliceUnion<F>( {Slice<F>::A, Slice<F>::B, Slice<F>::C}
|
|
, {No, No, No} // size of the slices
|
|
, {Nv} // size of the parametrization
|
|
, np
|
|
, child_world
|
|
, global_world
|
|
, Slice<F>::VIJKA
|
|
, 6) {
|
|
this->init(sourceTensor);
|
|
}
|
|
|
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override
|
|
{
|
|
|
|
const int
|
|
No = this->sliceLength[0],
|
|
a = this->rankMap.find({static_cast<size_t>(Atrip::rank), it});
|
|
|
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
|
to, {0, 0, 0}, {No, No, No},
|
|
from, {0, 0, 0, a}, {No, No, No, a+1});
|
|
|
|
}
|
|
};
|
|
|
|
template <typename F=double>
|
|
struct ABPH : public SliceUnion<F> {
|
|
ABPH( CTF::Tensor<F> const& sourceTensor
|
|
, size_t No
|
|
, size_t Nv
|
|
, size_t np
|
|
, MPI_Comm child_world
|
|
, MPI_Comm global_world
|
|
) : SliceUnion<F>( { Slice<F>::AB, Slice<F>::BC, Slice<F>::AC
|
|
, Slice<F>::BA, Slice<F>::CB, Slice<F>::CA
|
|
}
|
|
, {Nv, No} // size of the slices
|
|
, {Nv, Nv} // size of the parametrization
|
|
, np
|
|
, child_world
|
|
, global_world
|
|
, Slice<F>::VABCI
|
|
, 2*6) {
|
|
this->init(sourceTensor);
|
|
}
|
|
|
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
|
|
|
const int
|
|
Nv = this->sliceLength[0],
|
|
No = this->sliceLength[1],
|
|
el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it}),
|
|
a = el % Nv,
|
|
b = el / Nv;
|
|
|
|
|
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
|
to, {0, 0}, {Nv, No},
|
|
from, {a, b, 0, 0}, {a+1, b+1, Nv, No});
|
|
|
|
}
|
|
|
|
};
|
|
|
|
template <typename F=double>
|
|
struct ABHH : public SliceUnion<F> {
|
|
ABHH( CTF::Tensor<F> const& sourceTensor
|
|
, size_t No
|
|
, size_t Nv
|
|
, size_t np
|
|
, MPI_Comm child_world
|
|
, MPI_Comm global_world
|
|
) : SliceUnion<F>( {Slice<F>::AB, Slice<F>::BC, Slice<F>::AC}
|
|
, {No, No} // size of the slices
|
|
, {Nv, Nv} // size of the parametrization
|
|
, np
|
|
, child_world
|
|
, global_world
|
|
, Slice<F>::VABIJ
|
|
, 6) {
|
|
this->init(sourceTensor);
|
|
}
|
|
|
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
|
|
|
const int
|
|
Nv = from.lens[0],
|
|
No = this->sliceLength[1],
|
|
el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it}),
|
|
a = el % Nv,
|
|
b = el / Nv;
|
|
|
|
|
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
|
to, {0, 0}, {No, No},
|
|
from, {a, b, 0, 0}, {a+1, b+1, No, No});
|
|
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
template <typename F=double>
|
|
struct TABHH : public SliceUnion<F> {
|
|
TABHH( CTF::Tensor<F> const& sourceTensor
|
|
, size_t No
|
|
, size_t Nv
|
|
, size_t np
|
|
, MPI_Comm child_world
|
|
, MPI_Comm global_world
|
|
) : SliceUnion<F>( {Slice<F>::AB, Slice<F>::BC, Slice<F>::AC}
|
|
, {No, No} // size of the slices
|
|
, {Nv, Nv} // size of the parametrization
|
|
, np
|
|
, child_world
|
|
, global_world
|
|
, Slice<F>::TABIJ
|
|
, 6) {
|
|
this->init(sourceTensor);
|
|
}
|
|
|
|
void sliceIntoBuffer(size_t it, CTF::Tensor<F> &to, CTF::Tensor<F> const& from) override {
|
|
// TODO: maybe generalize this with ABHH
|
|
|
|
const int
|
|
Nv = from.lens[0],
|
|
No = this->sliceLength[1],
|
|
el = this->rankMap.find({static_cast<size_t>(Atrip::rank), it}),
|
|
a = el % Nv,
|
|
b = el / Nv;
|
|
|
|
sliceIntoVector<F>(this->sources[it], this->sliceSize,
|
|
to, {0, 0}, {No, No},
|
|
from, {a, b, 0, 0}, {a+1, b+1, No, No});
|
|
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
// Unions:1 ends here
|