#+title: ATRIP: An MPI-asynchronous implementation of CCSD(T) #+PROPERTY: header-args+ :noweb yes :comments noweb :mkdirp t * About this document * Implementation The algorithm uses two main data types, the =Slice= and the =SliceUnion= as a container and resource manager of the =Slice=. ** The slice #+begin_src c++ :tangle (atrip-slice-h) #pragma once #include #include #include #include #include namespace atrip { using ABCTuple = std::array; using PartialTuple = std::array; struct Slice { using F = double; #+end_src A slice is the concept of a subset of values of a given tensor. As an example, for the doubles amplitudes \( T^{ab}_{ij} \), one need two kinds of objects: - the object \( \mathsf{T}(a)^b_{ij} \) which for every \( a \) gets assigned the tensor \( T^{ab}_{ij} \) of size \( N_\mathrm{o}^2 \times N_\mathrm{v} \) - the object \( \mathsf{T}(a,b)_{ij} \) which for every pair of \( a, b \) corresponds the \( N_\mathrm{o}^2 \)-sized tensor \( T^{ab}_{ij} \). #+begin_src c++ :tangle (atrip-slice-h) // ASSOCIATED TYPES %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% struct Location { size_t rank; size_t source; }; enum Type { A = 10 , B , C // Two-parameter slices , AB = 20 , BC , AC // for abci and the doubles , CB , BA , CA // The non-typed slice , Blank = 404 }; enum State { // Fetch represents the state where a slice is to be fetched // and has a valid data pointer that can be written to Fetch = 0, // Dispatches represents the state that an MPI call has been // dispatched in order to get the data, but the data has not been // yet unwrapped, the data might be there or we might have to wait. Dispatched = 2, // Ready means that the data pointer can be read from Ready = 1, // Self sufficient is a slice when its contents are located // in the same rank that it lives, so that it does not have to // fetch from no one else. SelfSufficient = 911, // Recycled means that this slice gets its data pointer from another // slice, so it should not be written to Recycled = 123, // Acceptor means that the Slice can accept a new Slice, it is // the counterpart of the Blank type, but for states Acceptor = 405 }; struct Info { // which part of a,b,c the slice holds PartialTuple tuple; // The type of slice for the user to retrieve the correct one Type type; // What is the state of the slice State state; // Where the slice is to be retrieved // NOTE: this can actually be computed from tuple Location from; // If the data are actually to be found in this other slice Type recycling; Info() : tuple{0,0} , type{Blank} , state{Acceptor} , from{0,0} , recycling{Blank} {} }; using Ty_x_Tu = std::pair< Type, PartialTuple >; // Names of the integrals that are considered in CCSD(T) enum Name { TA = 100 , VIJKA = 101 , VABCI = 200 , TABIJ = 201 , VABIJ = 202 }; // DATABASE ==========================================================={{{1 struct LocalDatabaseElement { Slice::Name name; Slice::Info info; }; using LocalDatabase = std::vector; using Database = LocalDatabase; // STATIC METHODS =========================================================== // // They are useful to organize the structure of slices struct mpi { static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) { MPI_Datatype dt; MPI_Type_vector(n, 1, 1, DT, &dt); MPI_Type_commit(&dt); return dt; } static MPI_Datatype sliceLocation () { constexpr int n = 2; // create a sliceLocation to measure in the current architecture // the packing of the struct Slice::Location measure; MPI_Datatype dt; const std::vector lengths(n, 1); const MPI_Datatype types[n] = {usizeDt(), usizeDt()}; // measure the displacements in the struct size_t j = 0; MPI_Aint displacements[n]; MPI_Get_address(&measure.rank, &displacements[j++]); MPI_Get_address(&measure.source, &displacements[j++]); for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0]; displacements[0] = 0; MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt); MPI_Type_commit(&dt); return dt; } static MPI_Datatype enumDt() { return MPI_INT; } static MPI_Datatype usizeDt() { return MPI_UINT64_T; } static MPI_Datatype sliceInfo () { constexpr int n = 5; MPI_Datatype dt; Slice::Info measure; const std::vector lengths(n, 1); const MPI_Datatype types[n] = { vector(2, usizeDt()) , enumDt() , enumDt() , sliceLocation() , enumDt() }; // create the displacements from the info measurement struct size_t j = 0; MPI_Aint displacements[n]; MPI_Get_address(measure.tuple.data(), &displacements[j++]); MPI_Get_address(&measure.type, &displacements[j++]); MPI_Get_address(&measure.state, &displacements[j++]); MPI_Get_address(&measure.from, &displacements[j++]); MPI_Get_address(&measure.recycling, &displacements[j++]); for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0]; displacements[0] = 0; MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt); MPI_Type_commit(&dt); return dt; } static MPI_Datatype localDatabaseElement () { constexpr int n = 2; MPI_Datatype dt; LocalDatabaseElement measure; const std::vector lengths(n, 1); const MPI_Datatype types[n] = { enumDt() , sliceInfo() }; // measure the displacements in the struct size_t j = 0; MPI_Aint displacements[n]; MPI_Get_address(&measure.name, &displacements[j++]); MPI_Get_address(&measure.info, &displacements[j++]); for (size_t i = 1; i < n; i++) displacements[i] -= displacements[0]; displacements[0] = 0; MPI_Type_create_struct(n, lengths.data(), displacements, types, &dt); MPI_Type_commit(&dt); return dt; } }; static PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) { switch (sliceType) { case AB: return {abc[0], abc[1]}; case BC: return {abc[1], abc[2]}; case AC: return {abc[0], abc[2]}; case CB: return {abc[2], abc[1]}; case BA: return {abc[1], abc[0]}; case CA: return {abc[2], abc[0]}; case A: return {abc[0], 0}; case B: return {abc[1], 0}; case C: return {abc[2], 0}; default: throw "Switch statement not exhaustive!"; } } /** ,* It is important here to return a reference to a Slice ,* not to accidentally copy the associated buffer of the slice. ,*/ static Slice& findOneByType(std::vector &slices, Slice::Type type) { const auto sliceIt = std::find_if(slices.begin(), slices.end(), [&type](Slice const& s) { return type == s.info.type; }); WITH_CRAZY_DEBUG WITH_RANK << "\t__ looking for " << type << "\n"; if (sliceIt == slices.end()) throw std::domain_error("Slice by type not found!"); return *sliceIt; } /* ,* Check if an info has ,* ,*/ static std::vector hasRecycledReferencingToIt ( std::vector &slices , Info const& info ) { std::vector result; for (auto& s: slices) if ( s.info.recycling == info.type && s.info.tuple == info.tuple && s.info.state == Recycled ) result.push_back(&s); return result; } static Slice& findRecycledSource (std::vector &slices, Slice::Info info) { const auto sliceIt = std::find_if(slices.begin(), slices.end(), [&info](Slice const& s) { return info.recycling == s.info.type && info.tuple == s.info.tuple && State::Recycled != s.info.state ; }); WITH_CRAZY_DEBUG WITH_RANK << "__slice__:find: recycling source of " << pretty_print(info) << "\n"; if (sliceIt == slices.end()) throw std::domain_error( "Slice not found: " + pretty_print(info) + " rank: " + pretty_print(cc4s::Cc4s::world->rank) ); WITH_RANK << "__slice__:find: " << pretty_print(sliceIt->info) << "\n"; return *sliceIt; } static Slice& findByTypeAbc ( std::vector &slices , Slice::Type type , ABCTuple const& abc ) { const auto tuple = Slice::subtupleBySlice(abc, type); const auto sliceIt = std::find_if(slices.begin(), slices.end(), [&type, &tuple](Slice const& s) { return type == s.info.type && tuple == s.info.tuple ; }); WITH_CRAZY_DEBUG WITH_RANK << "__slice__:find:" << type << " and tuple " << pretty_print(tuple) << "\n"; if (sliceIt == slices.end()) throw std::domain_error( "Slice not found: " + pretty_print(tuple) + ", " + pretty_print(type) + " rank: " + pretty_print(cc4s::Cc4s::world->rank) ); return *sliceIt; } static Slice& findByInfo(std::vector &slices, Slice::Info const& info) { const auto sliceIt = std::find_if(slices.begin(), slices.end(), [&info](Slice const& s) { // TODO: maybe implement comparison in Info struct return info.type == s.info.type && info.state == s.info.state && info.tuple == s.info.tuple && info.from.rank == s.info.from.rank && info.from.source == s.info.from.source ; }); WITH_CRAZY_DEBUG WITH_RANK << "__slice__:find:looking for " << pretty_print(info) << "\n"; if (sliceIt == slices.end()) throw std::domain_error( "Slice by info not found: " + pretty_print(info)); return *sliceIt; } // SLICE DEFINITION =================================================={{{1 // ATTRIBUTES ============================================================ Info info; F *data; MPI_Request request; const size_t size; void markReady() noexcept { info.state = Ready; info.recycling = Blank; } /* ,* This means that the data is there ,*/ bool isUnwrapped() const noexcept { return info.state == Ready || info.state == SelfSufficient ; } bool isUnwrappable() const noexcept { return isUnwrapped() || info.state == Recycled || info.state == Dispatched ; } inline bool isDirectlyFetchable() const noexcept { return info.state == Ready || info.state == Dispatched; } void free() noexcept { info.tuple = {0, 0}; info.type = Blank; info.state = Acceptor; info.from = {0, 0}; info.recycling = Blank; data = nullptr; } inline bool isFree() const noexcept { return info.tuple == PartialTuple{0, 0} && info.type == Blank && info.state == Acceptor && info.from.rank == 0 && info.from.source == 0 && info.recycling == Blank && data == nullptr ; } /* ,* This function answers the question, which slices can be recycled. ,* ,* A slice can only be recycled if it is Fetch or Ready and has ,* a valid datapointer. ,* ,* In particular, SelfSufficient are not recyclable, since it is easier ,* just to create a SelfSufficient slice than deal with data dependencies. ,* ,* Furthermore, a recycled slice is not recyclable, if this is the case ,* then it is either bad design or a bug. ,*/ inline bool isRecyclable() const noexcept { return ( info.state == Dispatched || info.state == Ready || info.state == Fetch ) && hasValidDataPointer() ; } /* ,* This function describes if a slice has a valid data pointer. ,* ,* This is important to know if the slice has some data to it, also ,* some structural checks are done, so that it should not be Acceptor ,* or Blank, if this is the case then it is a bug. ,*/ inline bool hasValidDataPointer() const noexcept { return data != nullptr && info.state != Acceptor && info.type != Blank ; } void unwrapAndMarkReady() { if (info.state == Ready) return; if (info.state != Dispatched) throw std::domain_error("Can't unwrap a non-ready, non-dispatched slice!"); markReady(); MPI_Status status; #ifdef HAVE_OCD WITH_RANK << "__slice__:mpi: waiting " << "\n"; #endif const int errorCode = MPI_Wait(&request, &status); if (errorCode != MPI_SUCCESS) throw "MPI ERROR HAPPENED...."; #ifdef HAVE_OCD char errorString[MPI_MAX_ERROR_STRING]; int errorSize; MPI_Error_string(errorCode, errorString, &errorSize); WITH_RANK << "__slice__:mpi: status " << "{ .source=" << status.MPI_SOURCE << ", .tag=" << status.MPI_TAG << ", .error=" << status.MPI_ERROR << ", .errCode=" << errorCode << ", .err=" << errorString << " }" << "\n"; #endif } Slice(size_t size_) : info({}) , data(nullptr) , size(size_) {} }; // struct Slice std::ostream& operator<<(std::ostream& out, Slice::Location const& v) { // TODO: remove me out << "{.r(" << v.rank << "), .s(" << v.source << ")};"; return out; } std::ostream& operator<<(std::ostream& out, Slice::Info const& i) { out << "«t" << i.type << ", s" << i.state << "»" << " ⊙ {" << i.from.rank << ", " << i.from.source << "}" << " ∴ {" << i.tuple[0] << ", " << i.tuple[1] << "}" << " ♲t" << i.recycling ; return out; } #+end_src