Tangle sources

This commit is contained in:
Alejandro Gallo 2022-05-06 13:58:26 +02:00
parent d76c33f9e8
commit bea9c7a75e
6 changed files with 286 additions and 131 deletions

View File

@ -17,6 +17,7 @@
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <map> #include <map>
#include <mpi.h>
#include <atrip/Utils.hpp> #include <atrip/Utils.hpp>
@ -34,8 +35,9 @@ namespace atrip {
static int rank; static int rank;
static int np; static int np;
static MPI_Comm communicator;
static Timings chrono; static Timings chrono;
static void init(); static void init(MPI_Comm);
template <typename F=double> template <typename F=double>
struct Input { struct Input {
@ -68,6 +70,11 @@ namespace atrip {
ADD_ATTRIBUTE(int, iterationMod, -1) ADD_ATTRIBUTE(int, iterationMod, -1)
ADD_ATTRIBUTE(int, percentageMod, -1) ADD_ATTRIBUTE(int, percentageMod, -1)
ADD_ATTRIBUTE(TuplesDistribution, tuplesDistribution, NAIVE) ADD_ATTRIBUTE(TuplesDistribution, tuplesDistribution, NAIVE)
ADD_ATTRIBUTE(std::string, checkpointPath, "atrip-checkpoint.yaml")
ADD_ATTRIBUTE(bool, readCheckpointIfExists, true)
ADD_ATTRIBUTE(bool, writeCheckpoint, true)
ADD_ATTRIBUTE(float, checkpointAtPercentage, 10)
ADD_ATTRIBUTE(size_t, checkpointAtEveryIteration, 0)
}; };

View File

@ -0,0 +1,92 @@
// Copyright 2022 Alejandro Gallo
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// [[file:../../atrip.org::*Prolog][Prolog:1]]
#pragma once
#include <fstream>
#include <iomanip>
#include <atrip/Atrip.hpp>
namespace atrip {
// Prolog:1 ends here
// [[file:../../atrip.org::checkpoint-definition][checkpoint-definition]]
// template <typename F>
struct Checkpoint {
size_t no, nv;
size_t nranks;
size_t nnodes;
double energy;
size_t iteration;
// TODO
// Input<F>::TuplesDistribution distribution(GROUP_AND_SORT);
bool rankRoundRobin;
};
// checkpoint-definition ends here
// [[file:../../atrip.org::*Input and output][Input and output:1]]
void write_checkpoint(Checkpoint const& c, std::string const& filepath) {
std::ofstream out(filepath);
out << "No: " << c.no
<< "\n"
<< "Nv: " << c.nv
<< "\n"
<< "Nranks: " << c.nranks
<< "\n"
<< "Nnodes: " << c.nnodes
<< "\n"
<< "Energy: " << std::setprecision(19) << c.energy
<< "\n"
<< "Iteration: " << c.iteration
<< "\n"
<< "RankRoundRobin: " << (c.rankRoundRobin ? "true" : "false")
<< "\n";
}
Checkpoint read_checkpoint(std::ifstream& in) {
Checkpoint c;
// trim chars from the string, to be more sure and not use regexes
auto trim = [](std::string& s, std::string const& chars) {
s.erase(0, s.find_first_not_of(chars));
s.erase(s.find_last_not_of(chars) + 1);
return s;
};
for (std::string header, value; std::getline(in, header, ':');) {
std::getline(in, value, '\n');
trim(value, " \t"); // trim all whitespaces
trim(header, " \t");
/**/ if (header == "No") c.no = std::atoi(value.c_str());
else if (header == "Nv") c.nv = std::atoi(value.c_str());
else if (header == "Nranks") c.nranks = std::atoi(value.c_str());
else if (header == "Nnodes") c.nnodes = std::atoi(value.c_str());
else if (header == "Energy") c.energy = std::atof(value.c_str());
else if (header == "Iteration") c.iteration = std::atoi(value.c_str());
else if (header == "RankRoundRobin") c.rankRoundRobin = (value[0] == 't');
}
return c;
}
Checkpoint read_checkpoint(std::string const& filepath) {
std::ifstream in(filepath);
return read_checkpoint(in);
}
// Input and output:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here

View File

@ -44,36 +44,36 @@ struct Slice {
// Prolog:1 ends here // Prolog:1 ends here
// [[file:../../atrip.org::*Location][Location:1]] // [[file:../../atrip.org::*Location][Location:1]]
struct Location { size_t rank; size_t source; }; struct Location { size_t rank; size_t source; };
// Location:1 ends here // Location:1 ends here
// [[file:../../atrip.org::*Type][Type:1]] // [[file:../../atrip.org::*Type][Type:1]]
enum Type enum Type
{ A = 10 { A = 10
, B , B
, C , C
// Two-parameter slices // Two-parameter slices
, AB = 20 , AB = 20
, BC , BC
, AC , AC
// for abci and the doubles // for abci and the doubles
, CB , CB
, BA , BA
, CA , CA
// The non-typed slice // The non-typed slice
, Blank = 404 , Blank = 404
}; };
// Type:1 ends here // Type:1 ends here
// [[file:../../atrip.org::*State][State:1]] // [[file:../../atrip.org::*State][State:1]]
enum State { enum State {
Fetch = 0, Fetch = 0,
Dispatched = 2, Dispatched = 2,
Ready = 1, Ready = 1,
SelfSufficient = 911, SelfSufficient = 911,
Recycled = 123, Recycled = 123,
Acceptor = 405 Acceptor = 405
}; };
// State:1 ends here // State:1 ends here
// [[file:../../atrip.org::*The Info structure][The Info structure:1]] // [[file:../../atrip.org::*The Info structure][The Info structure:1]]
@ -101,25 +101,25 @@ using Ty_x_Tu = std::pair< Type, PartialTuple >;
// The Info structure:1 ends here // The Info structure:1 ends here
// [[file:../../atrip.org::*Name][Name:1]] // [[file:../../atrip.org::*Name][Name:1]]
enum Name enum Name
{ TA = 100 { TA = 100
, VIJKA = 101 , VIJKA = 101
, VABCI = 200 , VABCI = 200
, TABIJ = 201 , TABIJ = 201
, VABIJ = 202 , VABIJ = 202
}; };
// Name:1 ends here // Name:1 ends here
// [[file:../../atrip.org::*Database][Database:1]] // [[file:../../atrip.org::*Database][Database:1]]
struct LocalDatabaseElement { struct LocalDatabaseElement {
Slice<F>::Name name; Slice<F>::Name name;
Slice<F>::Info info; Slice<F>::Info info;
}; };
// Database:1 ends here // Database:1 ends here
// [[file:../../atrip.org::*Database][Database:2]] // [[file:../../atrip.org::*Database][Database:2]]
using LocalDatabase = std::vector<LocalDatabaseElement>; using LocalDatabase = std::vector<LocalDatabaseElement>;
using Database = LocalDatabase; using Database = LocalDatabase;
// Database:2 ends here // Database:2 ends here
// [[file:../../atrip.org::*MPI Types][MPI Types:1]] // [[file:../../atrip.org::*MPI Types][MPI Types:1]]
@ -359,91 +359,91 @@ static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
// Static utilities:6 ends here // Static utilities:6 ends here
// [[file:../../atrip.org::*Attributes][Attributes:1]] // [[file:../../atrip.org::*Attributes][Attributes:1]]
Info info; Info info;
// Attributes:1 ends here // Attributes:1 ends here
// [[file:../../atrip.org::*Attributes][Attributes:2]] // [[file:../../atrip.org::*Attributes][Attributes:2]]
F *data; F *data;
// Attributes:2 ends here // Attributes:2 ends here
// [[file:../../atrip.org::*Attributes][Attributes:3]] // [[file:../../atrip.org::*Attributes][Attributes:3]]
MPI_Request request; MPI_Request request;
// Attributes:3 ends here // Attributes:3 ends here
// [[file:../../atrip.org::*Attributes][Attributes:4]] // [[file:../../atrip.org::*Attributes][Attributes:4]]
const size_t size; const size_t size;
// Attributes:4 ends here // Attributes:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:1]] // [[file:../../atrip.org::*Member functions][Member functions:1]]
void markReady() noexcept { void markReady() noexcept {
info.state = Ready; info.state = Ready;
info.recycling = Blank; info.recycling = Blank;
} }
// Member functions:1 ends here // Member functions:1 ends here
// [[file:../../atrip.org::*Member functions][Member functions:2]] // [[file:../../atrip.org::*Member functions][Member functions:2]]
bool isUnwrapped() const noexcept { bool isUnwrapped() const noexcept {
return info.state == Ready return info.state == Ready
|| info.state == SelfSufficient || info.state == SelfSufficient
; ;
} }
// Member functions:2 ends here // Member functions:2 ends here
// [[file:../../atrip.org::*Member functions][Member functions:3]] // [[file:../../atrip.org::*Member functions][Member functions:3]]
bool isUnwrappable() const noexcept { bool isUnwrappable() const noexcept {
return isUnwrapped() return isUnwrapped()
|| info.state == Recycled || info.state == Recycled
|| info.state == Dispatched || info.state == Dispatched
; ;
} }
inline bool isDirectlyFetchable() const noexcept { inline bool isDirectlyFetchable() const noexcept {
return info.state == Ready || info.state == Dispatched; return info.state == Ready || info.state == Dispatched;
} }
void free() noexcept { void free() noexcept {
info.tuple = {0, 0}; info.tuple = {0, 0};
info.type = Blank; info.type = Blank;
info.state = Acceptor; info.state = Acceptor;
info.from = {0, 0}; info.from = {0, 0};
info.recycling = Blank; info.recycling = Blank;
data = nullptr; data = nullptr;
} }
inline bool isFree() const noexcept { inline bool isFree() const noexcept {
return info.tuple == PartialTuple{0, 0} return info.tuple == PartialTuple{0, 0}
&& info.type == Blank && info.type == Blank
&& info.state == Acceptor && info.state == Acceptor
&& info.from.rank == 0 && info.from.rank == 0
&& info.from.source == 0 && info.from.source == 0
&& info.recycling == Blank && info.recycling == Blank
&& data == nullptr && data == nullptr
; ;
} }
// Member functions:3 ends here // Member functions:3 ends here
// [[file:../../atrip.org::*Member functions][Member functions:4]] // [[file:../../atrip.org::*Member functions][Member functions:4]]
inline bool isRecyclable() const noexcept { inline bool isRecyclable() const noexcept {
return ( info.state == Dispatched return ( info.state == Dispatched
|| info.state == Ready || info.state == Ready
|| info.state == Fetch || info.state == Fetch
) )
&& hasValidDataPointer() && hasValidDataPointer()
; ;
} }
// Member functions:4 ends here // Member functions:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:5]] // [[file:../../atrip.org::*Member functions][Member functions:5]]
inline bool hasValidDataPointer() const noexcept { inline bool hasValidDataPointer() const noexcept {
return data != nullptr return data != nullptr
&& info.state != Acceptor && info.state != Acceptor
&& info.type != Blank && info.type != Blank
; ;
} }
// Member functions:5 ends here // Member functions:5 ends here
// [[file:../../atrip.org::*Member functions][Member functions:6]] // [[file:../../atrip.org::*Member functions][Member functions:6]]
void unwrapAndMarkReady() { void unwrapAndMarkReady() {
if (info.state == Ready) return; if (info.state == Ready) return;
if (info.state != Dispatched) if (info.state != Dispatched)
throw throw
@ -475,14 +475,14 @@ static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
// Member functions:6 ends here // Member functions:6 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]] // [[file:../../atrip.org::*Epilog][Epilog:1]]
Slice(size_t size_) Slice(size_t size_)
: info({}) : info({})
, data(nullptr) , data(nullptr)
, size(size_) , size(size_)
{} {}
}; // struct Slice }; // struct Slice
// Epilog:1 ends here // Epilog:1 ends here
// [[file:../../atrip.org::*Debug][Debug:1]] // [[file:../../atrip.org::*Debug][Debug:1]]

View File

@ -467,31 +467,31 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
// Main:1 ends here // Main:1 ends here
// [[file:../../atrip.org::*Main][Main:2]] // [[file:../../atrip.org::*Main][Main:2]]
size_t const size_t const
tuplesPerRankLocal tuplesPerRankLocal
= nodeTuples.size() / nodeInfos[rank].ranksPerNode = nodeTuples.size() / nodeInfos[rank].ranksPerNode
+ size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0) + size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
; ;
size_t tuplesPerRankGlobal; size_t tuplesPerRankGlobal;
MPI_Reduce(&tuplesPerRankLocal, MPI_Reduce(&tuplesPerRankLocal,
&tuplesPerRankGlobal, &tuplesPerRankGlobal,
1, 1,
MPI_UINT64_T, MPI_UINT64_T,
MPI_MAX, MPI_MAX,
0, 0,
universe); universe);
MPI_Bcast(&tuplesPerRankGlobal, MPI_Bcast(&tuplesPerRankGlobal,
1, 1,
MPI_UINT64_T, MPI_UINT64_T,
0, 0,
universe); universe);
LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n"; LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n"; LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
LOG(1,"Atrip") << "#nodes " << nNodes << "\n"; LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
// Main:2 ends here // Main:2 ends here
// [[file:../../atrip.org::*Main][Main:3]] // [[file:../../atrip.org::*Main][Main:3]]
@ -531,7 +531,7 @@ if (computeDistribution) {
// Main:4 ends here // Main:4 ends here
// [[file:../../atrip.org::*Main][Main:5]] // [[file:../../atrip.org::*Main][Main:5]]
return result; return result;
} }
// Main:5 ends here // Main:5 ends here

View File

@ -33,7 +33,7 @@ namespace atrip {
// Prolog:1 ends here // Prolog:1 ends here
// [[file:../../atrip.org::*Pretty printing][Pretty printing:1]] // [[file:../../atrip.org::*Pretty printing][Pretty printing:1]]
template <typename T> template <typename T>
std::string pretty_print(T&& value) { std::string pretty_print(T&& value) {
std::stringstream stream; std::stringstream stream;
#if ATRIP_DEBUG > 2 #if ATRIP_DEBUG > 2

View File

@ -20,6 +20,7 @@
#include <atrip/Equations.hpp> #include <atrip/Equations.hpp>
#include <atrip/SliceUnion.hpp> #include <atrip/SliceUnion.hpp>
#include <atrip/Unions.hpp> #include <atrip/Unions.hpp>
#include <atrip/Checkpoint.hpp>
using namespace atrip; using namespace atrip;
@ -28,6 +29,7 @@ template bool RankMap<double>::RANK_ROUND_ROBIN;
template bool RankMap<Complex>::RANK_ROUND_ROBIN; template bool RankMap<Complex>::RANK_ROUND_ROBIN;
int Atrip::rank; int Atrip::rank;
int Atrip::np; int Atrip::np;
MPI_Comm Atrip::communicator;
Timings Atrip::chrono; Timings Atrip::chrono;
// user printing block // user printing block
@ -36,9 +38,10 @@ void atrip::registerIterationDescriptor(IterationDescriptor d) {
IterationDescription::descriptor = d; IterationDescription::descriptor = d;
} }
void Atrip::init() { void Atrip::init(MPI_Comm world) {
MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank); Atrip::communicator = world;
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np); MPI_Comm_rank(world, &Atrip::rank);
MPI_Comm_size(world, &Atrip::np);
} }
template <typename F> template <typename F>
@ -46,7 +49,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
const int np = Atrip::np; const int np = Atrip::np;
const int rank = Atrip::rank; const int rank = Atrip::rank;
MPI_Comm universe = in.ei->wrld->comm; MPI_Comm universe = Atrip::communicator;
const size_t No = in.ei->lens[0]; const size_t No = in.ei->lens[0];
const size_t Nv = in.ea->lens[0]; const size_t Nv = in.ea->lens[0];
@ -70,10 +73,10 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
RankMap<F>::RANK_ROUND_ROBIN = in.rankRoundRobin; RankMap<F>::RANK_ROUND_ROBIN = in.rankRoundRobin;
if (RankMap<F>::RANK_ROUND_ROBIN) { if (RankMap<F>::RANK_ROUND_ROBIN) {
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n"; LOG(0,"Atrip") << "Doing rank round robin slices distribution\n";
} else { } else {
LOG(0,"Atrip") LOG(0,"Atrip")
<< "Doing node > local rank round robin slices distribution" << "\n"; << "Doing node > local rank round robin slices distribution\n";
} }
@ -146,7 +149,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
const size_t const size_t
iterationMod = (in.percentageMod > 0) iterationMod = (in.percentageMod > 0)
? nIterations * in.percentageMod / 100 ? nIterations * in.percentageMod / 100.0
: in.iterationMod : in.iterationMod
, iteration1Percent = nIterations * 0.01 , iteration1Percent = nIterations * 0.01
@ -300,8 +303,44 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
// START MAIN LOOP ======================================================{{{1 // START MAIN LOOP ======================================================{{{1
double energy(0.); double energy(0.);
size_t first_iteration = 0;
Checkpoint c;
const size_t checkpoint_mod
= in.checkpointAtEveryIteration != 0
? in.checkpointAtEveryIteration
: nIterations * in.checkpointAtPercentage / 100;
if (in.readCheckpointIfExists) {
std::ifstream fin(in.checkpointPath);
if (fin.is_open()) {
LOG(0, "Atrip") << "Reading checkpoint from "
<< in.checkpointPath << "\n";
c = read_checkpoint(fin);
first_iteration = (size_t)c.iteration;
if (first_iteration > nIterations) {
// TODO: throw an error here
// first_iteration is bigger than nIterations,
// you probably started the program with a different number
// of cores
}
if (No != c.no) {/* TODO: write warning */}
if (Nv != c.nv) {/* TODO: write warning */}
// TODO write warnings for nrank and so on
if (Atrip::rank == 0) {
// take the negative of the energy to correct for the
// negativity of the equations, the energy in the checkpoint
// should always be the correct physical one.
energy = - (double)c.energy;
}
LOG(0, "Atrip") << "energy from checkpoint "
<< energy << "\n";
LOG(0, "Atrip") << "iteration from checkpoint "
<< first_iteration << "\n";
}
}
for ( size_t i = 0, iteration = 1 for ( size_t
i = first_iteration,
iteration = first_iteration + 1
; i < tuplesList.size() ; i < tuplesList.size()
; i++, iteration++ ; i++, iteration++
) { ) {
@ -316,6 +355,23 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
if (in.barrier) MPI_Barrier(universe); if (in.barrier) MPI_Barrier(universe);
)) ))
// write checkpoints
if (iteration % checkpoint_mod == 0) {
double globalEnergy = 0;
MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe);
Checkpoint out
= {No,
Nv,
0, // TODO
0, // TODO
- globalEnergy,
iteration - 1,
in.rankRoundRobin};
LOG(0, "Atrip") << "Writing checkpoint\n";
if (Atrip::rank == 0) write_checkpoint(out, in.checkpointPath);
}
// write reporting
if (iteration % iterationMod == 0 || iteration == iteration1Percent) { if (iteration % iterationMod == 0 || iteration == iteration1Percent) {
if (IterationDescription::descriptor) { if (IterationDescription::descriptor) {
@ -363,7 +419,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
// COMM FIRST DATABASE ================================================{{{1 // COMM FIRST DATABASE ================================================{{{1
if (i == 0) { if (i == first_iteration) {
WITH_RANK << "__first__:first database ............ \n"; WITH_RANK << "__first__:first database ............ \n";
const auto db = communicateDatabase(abc, universe); const auto db = communicateDatabase(abc, universe);
WITH_RANK << "__first__:first database communicated \n"; WITH_RANK << "__first__:first database communicated \n";