229 lines
7.3 KiB
C++
229 lines
7.3 KiB
C++
#include <iostream>
|
|
#include <atrip.hpp>
|
|
#include <atrip/Debug.hpp>
|
|
#include <atrip/Utils.hpp>
|
|
#include <CLI11.hpp>
|
|
|
|
#define _print_size(what, size) \
|
|
if (rank == 0) { \
|
|
std::cout << #what \
|
|
<< " => " \
|
|
<< (double)size * elem_to_gb \
|
|
<< "GB" \
|
|
<< std::endl; \
|
|
}
|
|
|
|
int main(int argc, char** argv) {
|
|
MPI_Init(&argc, &argv);
|
|
|
|
size_t checkpoint_it;
|
|
int no(10), nv(100), itMod(-1), percentageMod(10);
|
|
float checkpoint_percentage;
|
|
bool
|
|
nochrono(false), barrier(false), rankRoundRobin(false),
|
|
keepVppph(false),
|
|
noCheckpoint = false;
|
|
std::string tuplesDistributionString = "naive",
|
|
checkpoint_path = "checkpoint.yaml";
|
|
|
|
CLI::App app{"Main bench for atrip"};
|
|
app.add_option("--no", no, "Occupied orbitals");
|
|
app.add_option("--nv", nv, "Virtual orbitals");
|
|
app.add_option("--mod", itMod, "Iteration modifier");
|
|
app.add_flag("--keep-vppph", keepVppph, "Do not delete Vppph");
|
|
app.add_flag("--nochrono", nochrono, "Do not print chrono");
|
|
app.add_flag("--rank-round-robin", rankRoundRobin, "Do rank round robin");
|
|
app.add_flag("--barrier", barrier, "Use the first barrier");
|
|
app.add_option("--dist", tuplesDistributionString, "Which distribution");
|
|
app.add_option("-%", percentageMod, "Percentage to be printed");
|
|
// checkpointing
|
|
app.add_flag("--nocheckpoint", noCheckpoint, "Do not use checkpoint");
|
|
app.add_option("--checkpoint-path", checkpoint_path, "Path for checkpoint");
|
|
app.add_option("--checkpoint-it",
|
|
checkpoint_it, "Checkpoint at every iteration");
|
|
app.add_option("--checkpoint-%",
|
|
checkpoint_percentage,
|
|
"Percentage for checkpoints");
|
|
|
|
#if defined(HAVE_CUDA)
|
|
size_t ooo_threads = 0, ooo_blocks = 0;
|
|
app.add_option("--ooo-blocks",
|
|
ooo_blocks,
|
|
"CUDA: Number of blocks per block for kernels going through ooo tensors");
|
|
app.add_option("--ooo-threads",
|
|
ooo_threads,
|
|
"CUDA: Number of threads per block for kernels going through ooo tensors");
|
|
#endif
|
|
|
|
CLI11_PARSE(app, argc, argv);
|
|
|
|
CTF::World world(argc, argv);
|
|
int rank, nranks;
|
|
MPI_Comm_rank(world.comm, &rank);
|
|
MPI_Comm_size(world.comm, &nranks);
|
|
constexpr double elem_to_gb = 8.0 / 1024.0 / 1024.0 / 1024.0;
|
|
|
|
// USER PRINTING TEST BEGIN
|
|
const double doublesFlops
|
|
= no * no * no
|
|
* (no + nv)
|
|
* 2.0
|
|
* 6.0
|
|
/ 1.0e9
|
|
;
|
|
double lastElapsedTime = 0;
|
|
bool firstHeaderPrinted = false;
|
|
atrip::registerIterationDescriptor
|
|
([doublesFlops, &firstHeaderPrinted, rank, &lastElapsedTime]
|
|
(atrip::IterationDescription const& d) {
|
|
const char
|
|
*fmt_header = "%-13s%-10s%-13s",
|
|
*fmt_nums = "%-13.0f%-10.0f%-13.3f";
|
|
char out[256];
|
|
if (!firstHeaderPrinted) {
|
|
sprintf(out, fmt_header, "Progress(%)", "time(s)", "GFLOP/s");
|
|
firstHeaderPrinted = true;
|
|
if (rank == 0) std::cout << out << "\n";
|
|
}
|
|
sprintf(out, fmt_nums,
|
|
double(d.currentIteration) / double(d.totalIterations) * 100,
|
|
(d.currentElapsedTime - lastElapsedTime),
|
|
d.currentIteration * doublesFlops / d.currentElapsedTime);
|
|
lastElapsedTime = d.currentElapsedTime;
|
|
if (rank == 0) std::cout << out << "\n";
|
|
});
|
|
// USER PRINTING TEST END
|
|
|
|
|
|
atrip::Atrip::Input<double>::TuplesDistribution tuplesDistribution;
|
|
{ using atrip::Atrip;
|
|
if (tuplesDistributionString == "naive") {
|
|
tuplesDistribution
|
|
= Atrip::Input<double>::TuplesDistribution::NAIVE;
|
|
} else if (tuplesDistributionString == "group") {
|
|
tuplesDistribution
|
|
= Atrip::Input<double>::TuplesDistribution::GROUP_AND_SORT;
|
|
} else {
|
|
std::cout << "--dist should be either naive or group\n";
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
size_t
|
|
f = sizeof(double)
|
|
, n_tuples = nv * (nv + 1) * (nv + 2) / 6 - nv
|
|
, atrip_memory
|
|
= /* tuples_memory */ 3 * sizeof(size_t) * n_tuples
|
|
//
|
|
// one dimensional slices (all ranks)
|
|
//
|
|
+ /* taphh */ f * nranks * 6 * nv * no * no
|
|
+ /* hhha */ f * nranks * 6 * no * no * no
|
|
//
|
|
// two dimensional slices (all ranks)
|
|
//
|
|
+ /* abph */ f * nranks * 12 * nv * no
|
|
+ /* abhh */ f * nranks * 6 * no * no
|
|
+ /* tabhh */ f * nranks * 6 * no * no
|
|
//
|
|
// distributed sources (all ranks)
|
|
//
|
|
+ /* tpphh */ f * nv * nv * no * no
|
|
+ /* vhhhp */ f * no * no * no * nv
|
|
+ /* vppph */ f * nv * nv * nv * no
|
|
+ /* vpphh */ f * nv * nv * no * no
|
|
+ /* tpphh2 */ f * nv * nv * no * no
|
|
//
|
|
// tensors in every rank
|
|
//
|
|
+ /* tijk */ f * nranks * no * no * no
|
|
+ /* zijk */ f * nranks * no * no * no
|
|
+ /* epsp */ f * nranks * (no + nv)
|
|
+ /* tai */ f * nranks * no * nv
|
|
;
|
|
|
|
if (rank == 0) {
|
|
std::cout << "Tentative MEMORY USAGE (GB): "
|
|
<< double(atrip_memory) / 1024.0 / 1024.0 / 1024.0 << "\n";
|
|
}
|
|
|
|
|
|
std::vector<int> symmetries(4, NS)
|
|
, vo({nv, no})
|
|
, vvoo({nv, nv, no, no})
|
|
, ooov({no, no, no, nv})
|
|
, vvvo({nv, nv, nv, no})
|
|
;
|
|
|
|
CTF::Tensor<double>
|
|
ei(1, ooov.data(), symmetries.data(), world)
|
|
, ea(1, vo.data(), symmetries.data(), world)
|
|
, Tph(2, vo.data(), symmetries.data(), world)
|
|
, Tpphh(4, vvoo.data(), symmetries.data(), world)
|
|
, Vpphh(4, vvoo.data(), symmetries.data(), world)
|
|
, Vhhhp(4, ooov.data(), symmetries.data(), world)
|
|
;
|
|
|
|
// initialize deletable tensors in heap
|
|
auto Vppph
|
|
= new CTF::Tensor<double>(4, vvvo.data(), symmetries.data(), world);
|
|
|
|
_print_size(Vabci, no*nv*nv*nv)
|
|
_print_size(Vabij, no*no*nv*nv)
|
|
_print_size(Vijka, no*no*no*nv)
|
|
|
|
ei.fill_random(-40.0, -2);
|
|
ea.fill_random(2, 50);
|
|
Tpphh.fill_random(0, 1);
|
|
Tph.fill_random(0, 1);
|
|
Vpphh.fill_random(0, 1);
|
|
Vhhhp.fill_random(0, 1);
|
|
Vppph->fill_random(0, 1);
|
|
|
|
atrip::Atrip::init(MPI_COMM_WORLD);
|
|
const auto in
|
|
= atrip::Atrip::Input<double>()
|
|
// Tensors
|
|
.with_epsilon_i(&ei)
|
|
.with_epsilon_a(&ea)
|
|
.with_Tai(&Tph)
|
|
.with_Tabij(&Tpphh)
|
|
.with_Vabij(&Vpphh)
|
|
.with_Vijka(&Vhhhp)
|
|
.with_Vabci(Vppph)
|
|
// some options
|
|
.with_deleteVppph(!keepVppph)
|
|
.with_barrier(barrier)
|
|
.with_chrono(!nochrono)
|
|
.with_rankRoundRobin(rankRoundRobin)
|
|
.with_iterationMod(itMod)
|
|
.with_percentageMod(percentageMod)
|
|
.with_tuplesDistribution(tuplesDistribution)
|
|
// checkpoint options
|
|
.with_checkpointAtEveryIteration(checkpoint_it)
|
|
.with_checkpointAtPercentage(checkpoint_percentage)
|
|
.with_checkpointPath(checkpoint_path)
|
|
.with_readCheckpointIfExists(!noCheckpoint)
|
|
#if defined(HAVE_CUDA)
|
|
.with_oooThreads(ooo_threads)
|
|
.with_oooBlocks(ooo_blocks)
|
|
#endif
|
|
;
|
|
|
|
try {
|
|
auto out = atrip::Atrip::run(in);
|
|
if (atrip::Atrip::rank == 0)
|
|
std::cout << "Energy: " << out.energy << std::endl;
|
|
} catch (const char* msg) {
|
|
if (atrip::Atrip::rank == 0)
|
|
std::cout << "Atrip throwed with msg:\n\t\t " << msg << "\n";
|
|
}
|
|
|
|
if (!in.deleteVppph)
|
|
delete Vppph;
|
|
|
|
|
|
MPI_Finalize();
|
|
return 0;
|
|
}
|