Tangle sources
This commit is contained in:
parent
d76c33f9e8
commit
bea9c7a75e
@ -17,6 +17,7 @@
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <mpi.h>
|
||||
|
||||
|
||||
#include <atrip/Utils.hpp>
|
||||
@ -34,8 +35,9 @@ namespace atrip {
|
||||
|
||||
static int rank;
|
||||
static int np;
|
||||
static MPI_Comm communicator;
|
||||
static Timings chrono;
|
||||
static void init();
|
||||
static void init(MPI_Comm);
|
||||
|
||||
template <typename F=double>
|
||||
struct Input {
|
||||
@ -68,6 +70,11 @@ namespace atrip {
|
||||
ADD_ATTRIBUTE(int, iterationMod, -1)
|
||||
ADD_ATTRIBUTE(int, percentageMod, -1)
|
||||
ADD_ATTRIBUTE(TuplesDistribution, tuplesDistribution, NAIVE)
|
||||
ADD_ATTRIBUTE(std::string, checkpointPath, "atrip-checkpoint.yaml")
|
||||
ADD_ATTRIBUTE(bool, readCheckpointIfExists, true)
|
||||
ADD_ATTRIBUTE(bool, writeCheckpoint, true)
|
||||
ADD_ATTRIBUTE(float, checkpointAtPercentage, 10)
|
||||
ADD_ATTRIBUTE(size_t, checkpointAtEveryIteration, 0)
|
||||
|
||||
};
|
||||
|
||||
|
||||
92
include/atrip/Checkpoint.hpp
Normal file
92
include/atrip/Checkpoint.hpp
Normal file
@ -0,0 +1,92 @@
|
||||
// Copyright 2022 Alejandro Gallo
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// [[file:../../atrip.org::*Prolog][Prolog:1]]
|
||||
#pragma once
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
|
||||
#include <atrip/Atrip.hpp>
|
||||
|
||||
namespace atrip {
|
||||
// Prolog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::checkpoint-definition][checkpoint-definition]]
|
||||
// template <typename F>
|
||||
struct Checkpoint {
|
||||
size_t no, nv;
|
||||
size_t nranks;
|
||||
size_t nnodes;
|
||||
double energy;
|
||||
size_t iteration;
|
||||
// TODO
|
||||
// Input<F>::TuplesDistribution distribution(GROUP_AND_SORT);
|
||||
bool rankRoundRobin;
|
||||
};
|
||||
// checkpoint-definition ends here
|
||||
|
||||
// [[file:../../atrip.org::*Input and output][Input and output:1]]
|
||||
void write_checkpoint(Checkpoint const& c, std::string const& filepath) {
|
||||
std::ofstream out(filepath);
|
||||
out << "No: " << c.no
|
||||
<< "\n"
|
||||
<< "Nv: " << c.nv
|
||||
<< "\n"
|
||||
<< "Nranks: " << c.nranks
|
||||
<< "\n"
|
||||
<< "Nnodes: " << c.nnodes
|
||||
<< "\n"
|
||||
<< "Energy: " << std::setprecision(19) << c.energy
|
||||
<< "\n"
|
||||
<< "Iteration: " << c.iteration
|
||||
<< "\n"
|
||||
<< "RankRoundRobin: " << (c.rankRoundRobin ? "true" : "false")
|
||||
<< "\n";
|
||||
}
|
||||
|
||||
|
||||
Checkpoint read_checkpoint(std::ifstream& in) {
|
||||
Checkpoint c;
|
||||
// trim chars from the string, to be more sure and not use regexes
|
||||
auto trim = [](std::string& s, std::string const& chars) {
|
||||
s.erase(0, s.find_first_not_of(chars));
|
||||
s.erase(s.find_last_not_of(chars) + 1);
|
||||
return s;
|
||||
};
|
||||
for (std::string header, value; std::getline(in, header, ':');) {
|
||||
std::getline(in, value, '\n');
|
||||
trim(value, " \t"); // trim all whitespaces
|
||||
trim(header, " \t");
|
||||
|
||||
/**/ if (header == "No") c.no = std::atoi(value.c_str());
|
||||
else if (header == "Nv") c.nv = std::atoi(value.c_str());
|
||||
else if (header == "Nranks") c.nranks = std::atoi(value.c_str());
|
||||
else if (header == "Nnodes") c.nnodes = std::atoi(value.c_str());
|
||||
else if (header == "Energy") c.energy = std::atof(value.c_str());
|
||||
else if (header == "Iteration") c.iteration = std::atoi(value.c_str());
|
||||
else if (header == "RankRoundRobin") c.rankRoundRobin = (value[0] == 't');
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
Checkpoint read_checkpoint(std::string const& filepath) {
|
||||
std::ifstream in(filepath);
|
||||
return read_checkpoint(in);
|
||||
}
|
||||
// Input and output:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||
}
|
||||
// Epilog:1 ends here
|
||||
@ -44,11 +44,11 @@ struct Slice {
|
||||
// Prolog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Location][Location:1]]
|
||||
struct Location { size_t rank; size_t source; };
|
||||
struct Location { size_t rank; size_t source; };
|
||||
// Location:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Type][Type:1]]
|
||||
enum Type
|
||||
enum Type
|
||||
{ A = 10
|
||||
, B
|
||||
, C
|
||||
@ -66,14 +66,14 @@ struct Slice {
|
||||
// Type:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*State][State:1]]
|
||||
enum State {
|
||||
enum State {
|
||||
Fetch = 0,
|
||||
Dispatched = 2,
|
||||
Ready = 1,
|
||||
SelfSufficient = 911,
|
||||
Recycled = 123,
|
||||
Acceptor = 405
|
||||
};
|
||||
};
|
||||
// State:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*The Info structure][The Info structure:1]]
|
||||
@ -101,7 +101,7 @@ using Ty_x_Tu = std::pair< Type, PartialTuple >;
|
||||
// The Info structure:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Name][Name:1]]
|
||||
enum Name
|
||||
enum Name
|
||||
{ TA = 100
|
||||
, VIJKA = 101
|
||||
, VABCI = 200
|
||||
@ -111,15 +111,15 @@ using Ty_x_Tu = std::pair< Type, PartialTuple >;
|
||||
// Name:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Database][Database:1]]
|
||||
struct LocalDatabaseElement {
|
||||
struct LocalDatabaseElement {
|
||||
Slice<F>::Name name;
|
||||
Slice<F>::Info info;
|
||||
};
|
||||
};
|
||||
// Database:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Database][Database:2]]
|
||||
using LocalDatabase = std::vector<LocalDatabaseElement>;
|
||||
using Database = LocalDatabase;
|
||||
using LocalDatabase = std::vector<LocalDatabaseElement>;
|
||||
using Database = LocalDatabase;
|
||||
// Database:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*MPI Types][MPI Types:1]]
|
||||
@ -359,58 +359,58 @@ static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
|
||||
// Static utilities:6 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:1]]
|
||||
Info info;
|
||||
Info info;
|
||||
// Attributes:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:2]]
|
||||
F *data;
|
||||
F *data;
|
||||
// Attributes:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:3]]
|
||||
MPI_Request request;
|
||||
MPI_Request request;
|
||||
// Attributes:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Attributes][Attributes:4]]
|
||||
const size_t size;
|
||||
const size_t size;
|
||||
// Attributes:4 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:1]]
|
||||
void markReady() noexcept {
|
||||
void markReady() noexcept {
|
||||
info.state = Ready;
|
||||
info.recycling = Blank;
|
||||
}
|
||||
}
|
||||
// Member functions:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:2]]
|
||||
bool isUnwrapped() const noexcept {
|
||||
bool isUnwrapped() const noexcept {
|
||||
return info.state == Ready
|
||||
|| info.state == SelfSufficient
|
||||
;
|
||||
}
|
||||
}
|
||||
// Member functions:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:3]]
|
||||
bool isUnwrappable() const noexcept {
|
||||
bool isUnwrappable() const noexcept {
|
||||
return isUnwrapped()
|
||||
|| info.state == Recycled
|
||||
|| info.state == Dispatched
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool isDirectlyFetchable() const noexcept {
|
||||
inline bool isDirectlyFetchable() const noexcept {
|
||||
return info.state == Ready || info.state == Dispatched;
|
||||
}
|
||||
}
|
||||
|
||||
void free() noexcept {
|
||||
void free() noexcept {
|
||||
info.tuple = {0, 0};
|
||||
info.type = Blank;
|
||||
info.state = Acceptor;
|
||||
info.from = {0, 0};
|
||||
info.recycling = Blank;
|
||||
data = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool isFree() const noexcept {
|
||||
inline bool isFree() const noexcept {
|
||||
return info.tuple == PartialTuple{0, 0}
|
||||
&& info.type == Blank
|
||||
&& info.state == Acceptor
|
||||
@ -419,31 +419,31 @@ static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
|
||||
&& info.recycling == Blank
|
||||
&& data == nullptr
|
||||
;
|
||||
}
|
||||
}
|
||||
// Member functions:3 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:4]]
|
||||
inline bool isRecyclable() const noexcept {
|
||||
inline bool isRecyclable() const noexcept {
|
||||
return ( info.state == Dispatched
|
||||
|| info.state == Ready
|
||||
|| info.state == Fetch
|
||||
)
|
||||
&& hasValidDataPointer()
|
||||
;
|
||||
}
|
||||
}
|
||||
// Member functions:4 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:5]]
|
||||
inline bool hasValidDataPointer() const noexcept {
|
||||
inline bool hasValidDataPointer() const noexcept {
|
||||
return data != nullptr
|
||||
&& info.state != Acceptor
|
||||
&& info.type != Blank
|
||||
;
|
||||
}
|
||||
}
|
||||
// Member functions:5 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Member functions][Member functions:6]]
|
||||
void unwrapAndMarkReady() {
|
||||
void unwrapAndMarkReady() {
|
||||
if (info.state == Ready) return;
|
||||
if (info.state != Dispatched)
|
||||
throw
|
||||
@ -475,14 +475,14 @@ static Slice<F>& findByInfo(std::vector<Slice<F>> &slices,
|
||||
// Member functions:6 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Epilog][Epilog:1]]
|
||||
Slice(size_t size_)
|
||||
Slice(size_t size_)
|
||||
: info({})
|
||||
, data(nullptr)
|
||||
, size(size_)
|
||||
{}
|
||||
|
||||
|
||||
}; // struct Slice
|
||||
}; // struct Slice
|
||||
// Epilog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Debug][Debug:1]]
|
||||
|
||||
@ -467,15 +467,15 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
|
||||
// Main:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:2]]
|
||||
size_t const
|
||||
size_t const
|
||||
tuplesPerRankLocal
|
||||
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
|
||||
+ size_t(nodeTuples.size() % nodeInfos[rank].ranksPerNode != 0)
|
||||
;
|
||||
|
||||
size_t tuplesPerRankGlobal;
|
||||
size_t tuplesPerRankGlobal;
|
||||
|
||||
MPI_Reduce(&tuplesPerRankLocal,
|
||||
MPI_Reduce(&tuplesPerRankLocal,
|
||||
&tuplesPerRankGlobal,
|
||||
1,
|
||||
MPI_UINT64_T,
|
||||
@ -483,15 +483,15 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
|
||||
0,
|
||||
universe);
|
||||
|
||||
MPI_Bcast(&tuplesPerRankGlobal,
|
||||
MPI_Bcast(&tuplesPerRankGlobal,
|
||||
1,
|
||||
MPI_UINT64_T,
|
||||
0,
|
||||
universe);
|
||||
|
||||
LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
|
||||
LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
|
||||
LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
|
||||
LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
|
||||
LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
|
||||
LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
|
||||
// Main:2 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:3]]
|
||||
@ -531,7 +531,7 @@ if (computeDistribution) {
|
||||
// Main:4 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Main][Main:5]]
|
||||
return result;
|
||||
return result;
|
||||
|
||||
}
|
||||
// Main:5 ends here
|
||||
|
||||
@ -33,7 +33,7 @@ namespace atrip {
|
||||
// Prolog:1 ends here
|
||||
|
||||
// [[file:../../atrip.org::*Pretty printing][Pretty printing:1]]
|
||||
template <typename T>
|
||||
template <typename T>
|
||||
std::string pretty_print(T&& value) {
|
||||
std::stringstream stream;
|
||||
#if ATRIP_DEBUG > 2
|
||||
|
||||
@ -20,6 +20,7 @@
|
||||
#include <atrip/Equations.hpp>
|
||||
#include <atrip/SliceUnion.hpp>
|
||||
#include <atrip/Unions.hpp>
|
||||
#include <atrip/Checkpoint.hpp>
|
||||
|
||||
using namespace atrip;
|
||||
|
||||
@ -28,6 +29,7 @@ template bool RankMap<double>::RANK_ROUND_ROBIN;
|
||||
template bool RankMap<Complex>::RANK_ROUND_ROBIN;
|
||||
int Atrip::rank;
|
||||
int Atrip::np;
|
||||
MPI_Comm Atrip::communicator;
|
||||
Timings Atrip::chrono;
|
||||
|
||||
// user printing block
|
||||
@ -36,9 +38,10 @@ void atrip::registerIterationDescriptor(IterationDescriptor d) {
|
||||
IterationDescription::descriptor = d;
|
||||
}
|
||||
|
||||
void Atrip::init() {
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &Atrip::rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &Atrip::np);
|
||||
void Atrip::init(MPI_Comm world) {
|
||||
Atrip::communicator = world;
|
||||
MPI_Comm_rank(world, &Atrip::rank);
|
||||
MPI_Comm_size(world, &Atrip::np);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
@ -46,7 +49,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
|
||||
const int np = Atrip::np;
|
||||
const int rank = Atrip::rank;
|
||||
MPI_Comm universe = in.ei->wrld->comm;
|
||||
MPI_Comm universe = Atrip::communicator;
|
||||
|
||||
const size_t No = in.ei->lens[0];
|
||||
const size_t Nv = in.ea->lens[0];
|
||||
@ -70,10 +73,10 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
|
||||
RankMap<F>::RANK_ROUND_ROBIN = in.rankRoundRobin;
|
||||
if (RankMap<F>::RANK_ROUND_ROBIN) {
|
||||
LOG(0,"Atrip") << "Doing rank round robin slices distribution" << "\n";
|
||||
LOG(0,"Atrip") << "Doing rank round robin slices distribution\n";
|
||||
} else {
|
||||
LOG(0,"Atrip")
|
||||
<< "Doing node > local rank round robin slices distribution" << "\n";
|
||||
<< "Doing node > local rank round robin slices distribution\n";
|
||||
}
|
||||
|
||||
|
||||
@ -146,7 +149,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
|
||||
const size_t
|
||||
iterationMod = (in.percentageMod > 0)
|
||||
? nIterations * in.percentageMod / 100
|
||||
? nIterations * in.percentageMod / 100.0
|
||||
: in.iterationMod
|
||||
|
||||
, iteration1Percent = nIterations * 0.01
|
||||
@ -300,8 +303,44 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
// START MAIN LOOP ======================================================{{{1
|
||||
|
||||
double energy(0.);
|
||||
size_t first_iteration = 0;
|
||||
Checkpoint c;
|
||||
const size_t checkpoint_mod
|
||||
= in.checkpointAtEveryIteration != 0
|
||||
? in.checkpointAtEveryIteration
|
||||
: nIterations * in.checkpointAtPercentage / 100;
|
||||
if (in.readCheckpointIfExists) {
|
||||
std::ifstream fin(in.checkpointPath);
|
||||
if (fin.is_open()) {
|
||||
LOG(0, "Atrip") << "Reading checkpoint from "
|
||||
<< in.checkpointPath << "\n";
|
||||
c = read_checkpoint(fin);
|
||||
first_iteration = (size_t)c.iteration;
|
||||
if (first_iteration > nIterations) {
|
||||
// TODO: throw an error here
|
||||
// first_iteration is bigger than nIterations,
|
||||
// you probably started the program with a different number
|
||||
// of cores
|
||||
}
|
||||
if (No != c.no) {/* TODO: write warning */}
|
||||
if (Nv != c.nv) {/* TODO: write warning */}
|
||||
// TODO write warnings for nrank and so on
|
||||
if (Atrip::rank == 0) {
|
||||
// take the negative of the energy to correct for the
|
||||
// negativity of the equations, the energy in the checkpoint
|
||||
// should always be the correct physical one.
|
||||
energy = - (double)c.energy;
|
||||
}
|
||||
LOG(0, "Atrip") << "energy from checkpoint "
|
||||
<< energy << "\n";
|
||||
LOG(0, "Atrip") << "iteration from checkpoint "
|
||||
<< first_iteration << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
for ( size_t i = 0, iteration = 1
|
||||
for ( size_t
|
||||
i = first_iteration,
|
||||
iteration = first_iteration + 1
|
||||
; i < tuplesList.size()
|
||||
; i++, iteration++
|
||||
) {
|
||||
@ -316,6 +355,23 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
if (in.barrier) MPI_Barrier(universe);
|
||||
))
|
||||
|
||||
// write checkpoints
|
||||
if (iteration % checkpoint_mod == 0) {
|
||||
double globalEnergy = 0;
|
||||
MPI_Reduce(&energy, &globalEnergy, 1, MPI_DOUBLE, MPI_SUM, 0, universe);
|
||||
Checkpoint out
|
||||
= {No,
|
||||
Nv,
|
||||
0, // TODO
|
||||
0, // TODO
|
||||
- globalEnergy,
|
||||
iteration - 1,
|
||||
in.rankRoundRobin};
|
||||
LOG(0, "Atrip") << "Writing checkpoint\n";
|
||||
if (Atrip::rank == 0) write_checkpoint(out, in.checkpointPath);
|
||||
}
|
||||
|
||||
// write reporting
|
||||
if (iteration % iterationMod == 0 || iteration == iteration1Percent) {
|
||||
|
||||
if (IterationDescription::descriptor) {
|
||||
@ -363,7 +419,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
|
||||
|
||||
|
||||
// COMM FIRST DATABASE ================================================{{{1
|
||||
if (i == 0) {
|
||||
if (i == first_iteration) {
|
||||
WITH_RANK << "__first__:first database ............ \n";
|
||||
const auto db = communicateDatabase(abc, universe);
|
||||
WITH_RANK << "__first__:first database communicated \n";
|
||||
|
||||
Loading…
Reference in New Issue
Block a user