diff --git a/bench/test_main.cxx b/bench/test_main.cxx index 2d32bd8..c02401d 100644 --- a/bench/test_main.cxx +++ b/bench/test_main.cxx @@ -20,8 +20,26 @@ int main(int argc, char** argv) { constexpr double elem_to_gb = 8.0 / 1024.0 / 1024.0 / 1024.0; const int no(hauta::option(argc, argv, "--no")) , nv(hauta::option(argc, argv, "--nv")) + , itMod(hauta::option(argc, argv, "--mod", 100)) ; + const bool nochrono(hauta::option(argc, argv, "--nochrono", false)) + , barrier(hauta::option(argc, argv, "--barrier", false)) + ; + const std::string tuplesDistributionString + = hauta::option(argc, argv, "--dist", "naive"); + + atrip::Atrip::Input::TuplesDistribution tuplesDistribution; + { using atrip::Atrip; + if (tuplesDistributionString == "naive") { + tuplesDistribution = Atrip::Input::TuplesDistribution::NAIVE; + } else if (tuplesDistributionString == "group") { + tuplesDistribution = Atrip::Input::TuplesDistribution::GROUP_AND_SORT; + } else { + std::cout << "--dist should be either naive or group\n"; + exit(1); + } + } std::vector symmetries(4, NS) , vo({nv, no}) @@ -53,9 +71,7 @@ int main(int argc, char** argv) { Vppph.fill_random(0, 1); atrip::Atrip::init(); - atrip::Atrip::Input in; - - in + const auto in = atrip::Atrip::Input() // Tensors .with_epsilon_i(&ei) .with_epsilon_a(&ea) @@ -65,8 +81,10 @@ int main(int argc, char** argv) { .with_Vijka(&Vhhhp) .with_Vabci(&Vppph) // some options - .with_barrier(false) - .with_iterationMod(100) + .with_barrier(barrier) + .with_chrono(!nochrono) + .with_iterationMod(itMod) + .with_tuplesDistribution(tuplesDistribution) ; auto out = atrip::Atrip::run(in); diff --git a/include/atrip/Atrip.hpp b/include/atrip/Atrip.hpp index a0cad96..3c9027e 100644 --- a/include/atrip/Atrip.hpp +++ b/include/atrip/Atrip.hpp @@ -7,6 +7,13 @@ #include +#define ADD_ATTRIBUTE(_type, _name, _default) \ + _type _name = _default; \ + Input& with_ ## _name(_type i) { \ + _name = i; \ + return *this; \ + } + namespace atrip { struct Atrip { @@ -24,9 +31,6 @@ namespace atrip { , *Vhhhp = nullptr , *Vppph = nullptr ; - int maxIterations = 0, iterationMod = -1; - bool barrier = false; - bool chrono = false; Input& with_epsilon_i(CTF::Tensor * t) { ei = t; return *this; } Input& with_epsilon_a(CTF::Tensor * t) { ea = t; return *this; } Input& with_Tai(CTF::Tensor * t) { Tph = t; return *this; } @@ -34,10 +38,19 @@ namespace atrip { Input& with_Vabij(CTF::Tensor * t) { Vpphh = t; return *this; } Input& with_Vijka(CTF::Tensor * t) { Vhhhp = t; return *this; } Input& with_Vabci(CTF::Tensor * t) { Vppph = t; return *this; } - Input& with_maxIterations(int i) { maxIterations = i; return *this; } - Input& with_iterationMod(int i) { iterationMod = i; return *this; } - Input& with_barrier(bool i) { barrier = i; return *this; } - Input& with_chrono(bool i) { chrono = i; return *this; } + + enum TuplesDistribution { + NAIVE, + GROUP_AND_SORT, + }; + + ADD_ATTRIBUTE(bool, chrono, false) + ADD_ATTRIBUTE(bool, barrier, false) + ADD_ATTRIBUTE(int, maxIterations, 0) + ADD_ATTRIBUTE(int, iterationMod, -1) + ADD_ATTRIBUTE(TuplesDistribution, tuplesDistribution, NAIVE) + + }; struct Output { @@ -47,4 +60,6 @@ namespace atrip { }; } + +#undef ADD_ATTRIBUTE // Atrip:1 ends here diff --git a/include/atrip/Debug.hpp b/include/atrip/Debug.hpp index 9153954..ba04314 100644 --- a/include/atrip/Debug.hpp +++ b/include/atrip/Debug.hpp @@ -2,12 +2,17 @@ #pragma once #define ATRIP_BENCHMARK //#define ATRIP_DONT_SLICE -#define ATRIP_DEBUG 1 //#define ATRIP_WORKLOAD_DUMP #define ATRIP_USE_DGEMM //#define ATRIP_PRINT_TUPLES +#ifndef ATRIP_DEBUG +#define ATRIP_DEBUG 1 +#endif + +#ifndef LOG #define LOG(level, name) if (Atrip::rank == 0) std::cout << name << ": " +#endif #if ATRIP_DEBUG == 4 # pragma message("WARNING: You have OCD debugging ABC triples "\ @@ -45,7 +50,7 @@ # define WITH_CRAZY_DEBUG if (false) # define WITH_DBG # define DBG(...) dbg(__VA_ARGS__) -#elif ATRIP_DEBUG == 1 +#else # define OCD_Barrier(com) # define WITH_OCD if (false) # define WITH_ROOT if (false) @@ -54,7 +59,5 @@ # define WITH_DBG if (false) # define WITH_CRAZY_DEBUG if (false) # define DBG(...) -#else -# error("ATRIP_DEBUG is not defined!") #endif // Debug:1 ends here diff --git a/include/atrip/Tuples.hpp b/include/atrip/Tuples.hpp index 77f3a82..30b98f7 100644 --- a/include/atrip/Tuples.hpp +++ b/include/atrip/Tuples.hpp @@ -77,10 +77,16 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) { auto const& it = n_tuples_per_rank.begin(); - return - { std::accumulate(it, it + rank , 0) - , std::accumulate(it, it + rank + 1, 0) - }; + std::pair const + range = { std::accumulate(it, it + rank , 0) + , std::accumulate(it, it + rank + 1, 0) - 1 + }; + + WITH_RANK << "range = " + << range.first << " -> " << range.second + << std::endl; + + return range; } // Naive list:2 ends here @@ -92,12 +98,23 @@ struct NaiveDistribution : public TuplesDistribution { MPI_Comm_rank(universe, &rank); MPI_Comm_size(universe, &np); auto const all = getTuplesList(Nv); - auto const range = getABCRange((size_t)np, (size_t)rank, all); + const size_t + tuplesPerRank + = all.size() / np + + size_t(all.size() % np != 0) + ; + //auto const range = getABCRange((size_t)np, (size_t)rank, all); + + std::pair const + range = { tuplesPerRank * rank + , tuplesPerRank * (rank + 1) - 1 + }; + std::vector result(range.second - range.first, FAKE_TUPLE); std::copy(all.begin() + range.first, range.second >= all.size() - ? all.end() - : all.begin() + range.first + range.second, + ? all.end() + : all.begin() + range.first + range.second, result.begin()); return result; } @@ -226,7 +243,6 @@ specialDistribution(Info info, std::vector const& allTuples) { size_t nNodes(info.nNodes); size_t np(info.np); size_t N(allTuples.size()); - size_t tuplePerNode( ceil( ((double)N) / nNodes) ); // nodeid tuple list std::map > container1d; diff --git a/include/atrip/Utils.hpp b/include/atrip/Utils.hpp index bf17398..85eec8f 100644 --- a/include/atrip/Utils.hpp +++ b/include/atrip/Utils.hpp @@ -6,6 +6,7 @@ #include #include +#include namespace atrip { // Prolog:1 ends here diff --git a/src/atrip/Atrip.cxx b/src/atrip/Atrip.cxx index 06c4079..e365df9 100644 --- a/src/atrip/Atrip.cxx +++ b/src/atrip/Atrip.cxx @@ -83,36 +83,36 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // all tensors std::vector< SliceUnion* > unions = {&taphh, &hhha, &abph, &abhh, &tabhh}; - //CONSTRUCT TUPLE LIST ==============================================={{{1 - LOG(0,"Atrip") << "BUILD TUPLE LIST\n"; - const auto tuplesList = std::move(getTuplesList(Nv)); - WITH_RANK << "tupList.size() = " << tuplesList.size() << "\n"; + // get tuples for the current rank + TuplesDistribution *distribution; - // GET ABC INDEX RANGE FOR RANK ======================================{{{1 - auto abcIndex = getABCRange(np, rank, tuplesList); - size_t nIterations = abcIndex.second - abcIndex.first; - -#ifdef ATRIP_BENCHMARK - { const size_t maxIterations = in.maxIterations; - if (maxIterations != 0) { - abcIndex.second = abcIndex.first + maxIterations % (nIterations + 1); - nIterations = maxIterations % (nIterations + 1); - } + if (in.tuplesDistribution == Atrip::Input::TuplesDistribution::NAIVE) { + LOG(0,"Atrip") << "Using the naive distribution\n"; + distribution = new NaiveDistribution(); + } else { + LOG(0,"Atrip") << "Using the group-and-sort distribution\n"; + distribution = new group_and_sort::Distribution(); } -#endif - WITH_RANK << "abcIndex = " << pretty_print(abcIndex) << "\n"; - LOG(0,"Atrip") << "#iterations: " << nIterations << "\n"; - - // first abc - const ABCTuple firstAbc = tuplesList[abcIndex.first]; - - - double energy(0.); + LOG(0,"Atrip") << "BUILDING TUPLE LIST\n"; + WITH_CHRONO(chrono["tuples:build"], + auto const tuplesList = distribution->getTuples(Nv, universe); + ) + size_t nIterations = tuplesList.size(); + { + const size_t _all_tuples = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv; + LOG(0,"Atrip") << "#iterations: " + << nIterations + << "/" + << _all_tuples + << "\n"; + } auto const isFakeTuple - = [&tuplesList](size_t const i) { return i >= tuplesList.size(); }; + = [&tuplesList, distribution](size_t const i) { + return distribution->tupleIsFake(tuplesList[i]); + }; auto communicateDatabase @@ -255,10 +255,10 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // START MAIN LOOP ======================================================{{{1 - Slice::Database db; + double energy(0.); - for ( size_t i = abcIndex.first, iteration = 1 - ; i < abcIndex.second + for ( size_t i = 0, iteration = 1 + ; i < tuplesList.size() ; i++, iteration++ ) { chrono["iterations"].start(); @@ -267,13 +267,10 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { chrono["start:stop"].start(); chrono["start:stop"].stop(); // check overhead of doing a barrier at the beginning - chrono["oneshot-mpi:barrier"].start(); - chrono["mpi:barrier"].start(); - // TODO: REMOVE - if (in.barrier == 1) - MPI_Barrier(universe); - chrono["mpi:barrier"].stop(); - chrono["oneshot-mpi:barrier"].stop(); + WITH_CHRONO(chrono["oneshot-mpi:barrier"], + WITH_CHRONO(chrono["mpi:barrier"], + if (in.barrier) MPI_Barrier(universe); + )) if (iteration % in.iterationMod == 0) { LOG(0,"Atrip") @@ -297,7 +294,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { const ABCTuple abc = isFakeTuple(i) ? tuplesList[tuplesList.size() - 1] : tuplesList[i] - , *abcNext = i == (abcIndex.second - 1) + , *abcNext = i == (tuplesList.size() - 1) ? nullptr : isFakeTuple(i + 1) ? &tuplesList[tuplesList.size() - 1] @@ -314,12 +311,12 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { // COMM FIRST DATABASE ================================================{{{1 - if (i == abcIndex.first) { + if (i == 0) { WITH_RANK << "__first__:first database ............ \n"; - const auto __db = communicateDatabase(abc, universe); + const auto db = communicateDatabase(abc, universe); WITH_RANK << "__first__:first database communicated \n"; WITH_RANK << "__first__:first database io phase \n"; - doIOPhase(__db); + doIOPhase(db); WITH_RANK << "__first__:first database io phase DONE\n"; WITH_RANK << "__first__::::Unwrapping all slices for first database\n"; for (auto& u: unions) u->unwrapAll(abc); @@ -331,8 +328,7 @@ Atrip::Output Atrip::run(Atrip::Input const& in) { if (abcNext) { WITH_RANK << "__comm__:" << iteration << "th communicating database\n"; chrono["db:comm"].start(); - //const auto db = communicateDatabase(*abcNext, universe); - db = communicateDatabase(*abcNext, universe); + const auto db = communicateDatabase(*abcNext, universe); chrono["db:comm"].stop(); chrono["db:io"].start(); doIOPhase(db);