Update all chronos to use the static chrono

This commit is contained in:
2021-10-21 15:25:01 +02:00
parent 2823fa3699
commit 79a3f99cb3
13 changed files with 553 additions and 571 deletions

View File

@@ -1,4 +1,4 @@
// [[file:../atrip.org::*Include header][Include header:1]]
// [[file:~/atrip/atrip.org::*Include%20header][Include header:1]]
#pragma once
#include <atrip/Atrip.hpp>

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Atrip][Atrip:1]]
// [[file:~/atrip/atrip.org::*Atrip][Atrip:1]]
#pragma once
#include <sstream>
#include <string>

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Blas][Blas:1]]
// [[file:~/atrip/atrip.org::*Blas][Blas:1]]
#pragma once
namespace atrip {
extern "C" {

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Debug][Debug:1]]
// [[file:~/atrip/atrip.org::*Debug][Debug:1]]
#pragma once
#define ATRIP_BENCHMARK
//#define ATRIP_DONT_SLICE

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Equations][Equations:1]]
// [[file:~/atrip/atrip.org::*Equations][Equations:1]]
#pragma once
#include<atrip/Slice.hpp>
@@ -150,54 +150,51 @@ namespace atrip {
, double const* TBChh
// -- TIJK
, double *Tijk
, atrip::Timings& chrono
) {
auto& t_reorder = chrono["doubles:reorder"];
const size_t a = abc[0], b = abc[1], c = abc[2]
, NoNo = No*No, NoNv = No*Nv
;
#if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \
t_reorder.start(); \
for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \
} \
t_reorder.stop();
#define DGEMM_PARTICLES(__A, __B) \
atrip::dgemm_( "T" \
, "N" \
, (int const*)&NoNo \
, (int const*)&No \
, (int const*)&Nv \
, &one \
, __A \
, (int const*)&Nv \
, __B \
, (int const*)&Nv \
, &zero \
, _t_buffer.data() \
, (int const*)&NoNo \
);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_( "N" \
, __TRANSB \
, (int const*)&NoNo \
, (int const*)&No \
, (int const*)&No \
, &m_one \
, __A \
, (int const*)&NoNo \
, __B \
, (int const*)&No \
, &zero \
, _t_buffer.data() \
, (int const*)&NoNo \
);
#if defined(ATRIP_USE_DGEMM)
#define _IJK_(i, j, k) i + j*No + k*NoNo
#define REORDER(__II, __JJ, __KK) \
WITH_CHRONO("double:reorder", \
for (size_t k = 0; k < No; k++) \
for (size_t j = 0; j < No; j++) \
for (size_t i = 0; i < No; i++) { \
Tijk[_IJK_(i, j, k)] \
+= _t_buffer[_IJK_(__II, __JJ, __KK)]; \
} \
)
#define DGEMM_PARTICLES(__A, __B) \
atrip::dgemm_("T", \
"N", \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&Nv, \
&one, \
__A, \
(int const*)&Nv, \
__B, \
(int const*)&Nv, \
&zero, \
_t_buffer.data(), \
(int const*)&NoNo);
#define DGEMM_HOLES(__A, __B, __TRANSB) \
atrip::dgemm_("N", \
__TRANSB, \
(int const*)&NoNo, \
(int const*)&No, \
(int const*)&No, \
&m_one, \
__A, \
(int const*)&NoNo, \
__B, \
(int const*)&No, \
&zero, \
_t_buffer.data(), \
(int const*)&NoNo);
using F = double;
const size_t NoNoNo = No*NoNo;
@@ -205,88 +202,86 @@ namespace atrip {
_t_buffer.reserve(NoNoNo);
F one{1.0}, m_one{-1.0}, zero{0.0};
t_reorder.start();
for (size_t k = 0; k < NoNoNo; k++) {
// zero the Tijk
Tijk[k] = 0.0;
}
t_reorder.stop();
WITH_CHRONO("double:reorder",
for (size_t k = 0; k < NoNoNo; k++) {
Tijk[k] = 0.0;
})
chrono["doubles:holes"].start();
{ // Holes part ============================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
chrono["doubles:holes:1"].start();
DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j)
chrono["doubles:holes:1"].stop();
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
chrono["doubles:holes:2"].start();
DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i)
chrono["doubles:holes:2"].stop();
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
chrono["doubles:holes:3"].start();
DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k)
chrono["doubles:holes:3"].stop();
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
chrono["doubles:holes:4"].start();
DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i)
chrono["doubles:holes:4"].stop();
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
chrono["doubles:holes:5"].start();
DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k)
chrono["doubles:holes:5"].stop();
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
chrono["doubles:holes:6"].start();
DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j)
chrono["doubles:holes:6"].stop();
}
chrono["doubles:holes"].stop();
WITH_CHRONO("doubles:holes",
{ // Holes part ================================================
// VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1
WITH_CHRONO("doubles:holes:1",
DGEMM_HOLES(VhhhC, TABhh, "N")
REORDER(i, k, j)
)
// VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0
WITH_CHRONO("doubles:holes:2",
DGEMM_HOLES(VhhhC, TABhh, "T")
REORDER(j, k, i)
)
// VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5
WITH_CHRONO("doubles:holes:3",
DGEMM_HOLES(VhhhB, TAChh, "N")
REORDER(i, j, k)
)
// VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3
WITH_CHRONO("doubles:holes:4",
DGEMM_HOLES(VhhhB, TAChh, "T")
REORDER(k, j, i)
)
// VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1
WITH_CHRONO("doubles:holes:5",
DGEMM_HOLES(VhhhA, TBChh, "N")
REORDER(j, i, k)
)
// VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4
WITH_CHRONO("doubles:holes:6",
DGEMM_HOLES(VhhhA, TBChh, "T")
REORDER(k, i, j)
)
}
)
chrono["doubles:particles"].start();
{ // Particle part =========================================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
chrono["doubles:particles:1"].start();
DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k)
chrono["doubles:particles:1"].stop();
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
chrono["doubles:particles:2"].start();
DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j)
chrono["doubles:particles:2"].stop();
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
chrono["doubles:particles:3"].start();
DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j)
chrono["doubles:particles:3"].stop();
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
chrono["doubles:particles:4"].start();
DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i)
chrono["doubles:particles:4"].stop();
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
chrono["doubles:particles:5"].start();
DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k)
chrono["doubles:particles:5"].stop();
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
chrono["doubles:particles:6"].start();
DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i)
chrono["doubles:particles:6"].stop();
}
chrono["doubles:particles"].stop();
WITH_CHRONO("doubles:particles",
{ // Particle part ===========================================
// TAphh[E + i*Nv + j*NoNv] * VBCph[E + k*Nv]; P0
WITH_CHRONO("doubles:particles:1",
DGEMM_PARTICLES(TAphh, VBCph)
REORDER(i, j, k)
)
// TAphh[E + i*Nv + k*NoNv] * VCBph[E + j*Nv]; P3
WITH_CHRONO("doubles:particles:2",
DGEMM_PARTICLES(TAphh, VCBph)
REORDER(i, k, j)
)
// TCphh[E + k*Nv + i*NoNv] * VABph[E + j*Nv]; P5
WITH_CHRONO("doubles:particles:3",
DGEMM_PARTICLES(TCphh, VABph)
REORDER(k, i, j)
)
// TCphh[E + k*Nv + j*NoNv] * VBAph[E + i*Nv]; P2
WITH_CHRONO("doubles:particles:4",
DGEMM_PARTICLES(TCphh, VBAph)
REORDER(k, j, i)
)
// TBphh[E + j*Nv + i*NoNv] * VACph[E + k*Nv]; P1
WITH_CHRONO("doubles:particles:5",
DGEMM_PARTICLES(TBphh, VACph)
REORDER(j, i, k)
)
// TBphh[E + j*Nv + k*NoNv] * VCAph[E + i*Nv]; P4
WITH_CHRONO("doubles:particles:6",
DGEMM_PARTICLES(TBphh, VCAph)
REORDER(j, k, i)
)
}
)
#undef REORDER
#undef DGEMM_HOLES
#undef DGEMM_PARTICLES
#undef _IJK_
#else
#undef REORDER
#undef DGEMM_HOLES
#undef DGEMM_PARTICLES
#undef _IJK_
#else
for (size_t k = 0; k < No; k++)
for (size_t j = 0; j < No; j++)
for (size_t i = 0; i < No; i++){
@@ -330,7 +325,7 @@ namespace atrip {
}
}
#endif
#endif
}
}

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*The rank mapping][The rank mapping:1]]
// [[file:~/atrip/atrip.org::*The%20rank%20mapping][The rank mapping:1]]
#pragma once
#include <vector>

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once
#include <iostream>
#include <algorithm>
@@ -16,11 +16,11 @@ struct Slice {
using F = double;
// Prolog:1 ends here
// [[file:../../atrip.org::*Location][Location:1]]
// [[file:~/atrip/atrip.org::*Location][Location:1]]
struct Location { size_t rank; size_t source; };
// Location:1 ends here
// [[file:../../atrip.org::*Type][Type:1]]
// [[file:~/atrip/atrip.org::*Type][Type:1]]
enum Type
{ A = 10
, B
@@ -38,7 +38,7 @@ enum Type
};
// Type:1 ends here
// [[file:../../atrip.org::*State][State:1]]
// [[file:~/atrip/atrip.org::*State][State:1]]
enum State {
Fetch = 0,
Dispatched = 2,
@@ -49,7 +49,7 @@ enum State {
};
// State:1 ends here
// [[file:../../atrip.org::*The Info structure][The Info structure:1]]
// [[file:~/atrip/atrip.org::*The%20Info%20structure][The Info structure:1]]
struct Info {
// which part of a,b,c the slice holds
PartialTuple tuple;
@@ -73,7 +73,7 @@ struct Info {
using Ty_x_Tu = std::pair< Type, PartialTuple >;
// The Info structure:1 ends here
// [[file:../../atrip.org::*Name][Name:1]]
// [[file:~/atrip/atrip.org::*Name][Name:1]]
enum Name
{ TA = 100
, VIJKA = 101
@@ -83,19 +83,19 @@ enum Name
};
// Name:1 ends here
// [[file:../../atrip.org::*Database][Database:1]]
// [[file:~/atrip/atrip.org::*Database][Database:1]]
struct LocalDatabaseElement {
Slice::Name name;
Slice::Info info;
};
// Database:1 ends here
// [[file:../../atrip.org::*Database][Database:2]]
// [[file:~/atrip/atrip.org::*Database][Database:2]]
using LocalDatabase = std::vector<LocalDatabaseElement>;
using Database = LocalDatabase;
// Database:2 ends here
// [[file:../../atrip.org::*MPI Types][MPI Types:1]]
// [[file:~/atrip/atrip.org::*MPI%20Types][MPI Types:1]]
struct mpi {
static MPI_Datatype vector(size_t n, MPI_Datatype const& DT) {
@@ -185,7 +185,7 @@ struct mpi {
};
// MPI Types:1 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:1]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:1]]
static
PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
switch (sliceType) {
@@ -203,7 +203,7 @@ PartialTuple subtupleBySlice(ABCTuple abc, Type sliceType) {
}
// Static utilities:1 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:2]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:2]]
static std::vector<Slice*> hasRecycledReferencingToIt
( std::vector<Slice> &slices
, Info const& info
@@ -220,7 +220,7 @@ static std::vector<Slice*> hasRecycledReferencingToIt
}
// Static utilities:2 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:3]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:3]]
static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
const auto sliceIt
= std::find_if(slices.begin(), slices.end(),
@@ -236,7 +236,7 @@ static Slice& findOneByType(std::vector<Slice> &slices, Slice::Type type) {
}
// Static utilities:3 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:4]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:4]]
static Slice&
findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
const auto sliceIt
@@ -262,7 +262,7 @@ findRecycledSource (std::vector<Slice> &slices, Slice::Info info) {
}
// Static utilities:4 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:5]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:5]]
static Slice& findByTypeAbc
( std::vector<Slice> &slices
, Slice::Type type
@@ -292,7 +292,7 @@ static Slice& findByTypeAbc
}
// Static utilities:5 ends here
// [[file:../../atrip.org::*Static utilities][Static utilities:6]]
// [[file:~/atrip/atrip.org::*Static%20utilities][Static utilities:6]]
static Slice& findByInfo(std::vector<Slice> &slices,
Slice::Info const& info) {
const auto sliceIt
@@ -315,30 +315,30 @@ static Slice& findByInfo(std::vector<Slice> &slices,
}
// Static utilities:6 ends here
// [[file:../../atrip.org::*Attributes][Attributes:1]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:1]]
Info info;
// Attributes:1 ends here
// [[file:../../atrip.org::*Attributes][Attributes:2]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:2]]
F *data;
// Attributes:2 ends here
// [[file:../../atrip.org::*Attributes][Attributes:3]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:3]]
MPI_Request request;
// Attributes:3 ends here
// [[file:../../atrip.org::*Attributes][Attributes:4]]
// [[file:~/atrip/atrip.org::*Attributes][Attributes:4]]
const size_t size;
// Attributes:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:1]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:1]]
void markReady() noexcept {
info.state = Ready;
info.recycling = Blank;
}
// Member functions:1 ends here
// [[file:../../atrip.org::*Member functions][Member functions:2]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:2]]
bool isUnwrapped() const noexcept {
return info.state == Ready
|| info.state == SelfSufficient
@@ -346,7 +346,7 @@ bool isUnwrapped() const noexcept {
}
// Member functions:2 ends here
// [[file:../../atrip.org::*Member functions][Member functions:3]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:3]]
bool isUnwrappable() const noexcept {
return isUnwrapped()
|| info.state == Recycled
@@ -379,7 +379,7 @@ inline bool isFree() const noexcept {
}
// Member functions:3 ends here
// [[file:../../atrip.org::*Member functions][Member functions:4]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:4]]
inline bool isRecyclable() const noexcept {
return ( info.state == Dispatched
|| info.state == Ready
@@ -390,7 +390,7 @@ inline bool isRecyclable() const noexcept {
}
// Member functions:4 ends here
// [[file:../../atrip.org::*Member functions][Member functions:5]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:5]]
inline bool hasValidDataPointer() const noexcept {
return data != nullptr
&& info.state != Acceptor
@@ -399,7 +399,7 @@ inline bool hasValidDataPointer() const noexcept {
}
// Member functions:5 ends here
// [[file:../../atrip.org::*Member functions][Member functions:6]]
// [[file:~/atrip/atrip.org::*Member%20functions][Member functions:6]]
void unwrapAndMarkReady() {
if (info.state == Ready) return;
if (info.state != Dispatched)
@@ -431,7 +431,7 @@ void unwrapAndMarkReady() {
}
// Member functions:6 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
Slice(size_t size_)
: info({})
, data(nullptr)
@@ -442,7 +442,7 @@ Slice(size_t size_)
}; // struct Slice
// Epilog:1 ends here
// [[file:../../atrip.org::*Debug][Debug:1]]
// [[file:~/atrip/atrip.org::*Debug][Debug:1]]
std::ostream& operator<<(std::ostream& out, Slice::Location const& v) {
// TODO: remove me
out << "{.r(" << v.rank << "), .s(" << v.source << ")};";

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*The slice union][The slice union:1]]
// [[file:~/atrip/atrip.org::*The%20slice%20union][The slice union:1]]
#pragma once
#include <atrip/Debug.hpp>
#include <atrip/Slice.hpp>

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once
#include <vector>
@@ -21,7 +21,7 @@
namespace atrip {
// Prolog:1 ends here
// [[file:../../atrip.org::*Tuples types][Tuples types:1]]
// [[file:~/atrip/atrip.org::*Tuples%20types][Tuples types:1]]
using ABCTuple = std::array<size_t, 3>;
using PartialTuple = std::array<size_t, 2>;
using ABCTuples = std::vector<ABCTuple>;
@@ -29,14 +29,14 @@ using ABCTuples = std::vector<ABCTuple>;
constexpr ABCTuple FAKE_TUPLE = {0, 0, 0};
// Tuples types:1 ends here
// [[file:../../atrip.org::*Distributing the tuples][Distributing the tuples:1]]
// [[file:~/atrip/atrip.org::*Distributing%20the%20tuples][Distributing the tuples:1]]
struct TuplesDistribution {
virtual ABCTuples getTuples(size_t Nv, MPI_Comm universe) = 0;
virtual bool tupleIsFake(ABCTuple const& t) { return t == FAKE_TUPLE; }
};
// Distributing the tuples:1 ends here
// [[file:../../atrip.org::*Naive list][Naive list:1]]
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:1]]
ABCTuples getTuplesList(size_t Nv) {
const size_t n = Nv * (Nv + 1) * (Nv + 2) / 6 - Nv;
ABCTuples result(n);
@@ -54,7 +54,7 @@ ABCTuples getTuplesList(size_t Nv) {
}
// Naive list:1 ends here
// [[file:../../atrip.org::*Naive list][Naive list:2]]
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:2]]
std::pair<size_t, size_t>
getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
@@ -91,7 +91,7 @@ getABCRange(size_t np, size_t rank, ABCTuples const& tuplesList) {
}
// Naive list:2 ends here
// [[file:../../atrip.org::*Naive list][Naive list:3]]
// [[file:~/atrip/atrip.org::*Naive%20list][Naive list:3]]
struct NaiveDistribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
int rank, np;
@@ -113,23 +113,29 @@ struct NaiveDistribution : public TuplesDistribution {
WITH_RANK << "range = "
<< range.first << " -> " << range.second
<< std::endl;
std::vector<ABCTuple> result(range.second - range.first + 1, FAKE_TUPLE);
WITH_RANK << "number of global tuples = " << all.size() << std::endl;
WITH_RANK << "number of local tuples = " << result.size() << std::endl;
std::vector<ABCTuple> result(range.second - range.first, FAKE_TUPLE);
std::copy(all.begin() + range.first,
std::copy(range.first >= all.size()
? all.end()
: all.begin() + range.first,
// --
range.second >= all.size()
? all.end()
: all.begin() + range.first + range.second,
? all.end()
: all.begin() + range.first + range.second,
// --
result.begin());
return result;
}
};
// Naive list:3 ends here
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
namespace group_and_sort {
// Prolog:1 ends here
// [[file:../../atrip.org::*Node information][Node information:1]]
// [[file:~/atrip/atrip.org::*Node%20information][Node information:1]]
std::vector<std::string> getNodeNames(MPI_Comm comm){
int rank, np;
MPI_Comm_rank(comm, &rank);
@@ -169,7 +175,7 @@ std::vector<std::string> getNodeNames(MPI_Comm comm){
}
// Node information:1 ends here
// [[file:../../atrip.org::*Node information][Node information:2]]
// [[file:~/atrip/atrip.org::*Node%20information][Node information:2]]
struct RankInfo {
const std::string name;
const size_t nodeId;
@@ -208,7 +214,7 @@ getNodeInfos(std::vector<string> const& nodeNames) {
}
// Node information:2 ends here
// [[file:../../atrip.org::*Utils][Utils:1]]
// [[file:~/atrip/atrip.org::*Utils][Utils:1]]
// Provides the node on which the slice-element is found
// Right now we distribute the slices in a round robin fashion
// over the different nodes (NOTE: not mpi ranks but nodes)
@@ -239,7 +245,7 @@ std::vector<size_t> getTupleNodes(ABCTuple t, size_t nNodes) {
}
// Utils:1 ends here
// [[file:../../atrip.org::*Distribution][Distribution:1]]
// [[file:~/atrip/atrip.org::*Distribution][Distribution:1]]
std::vector<ABCTuple>
specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
@@ -380,7 +386,7 @@ std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
}
// Distribution:1 ends here
// [[file:../../atrip.org::*Main][Main:1]]
// [[file:~/atrip/atrip.org::*Main][Main:1]]
std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
int rank, np;
@@ -432,7 +438,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
MPI_Comm_split(universe, color, key, &INTRA_COMM);
// Main:1 ends here
// [[file:../../atrip.org::*Main][Main:2]]
// [[file:~/atrip/atrip.org::*Main][Main:2]]
const size_t
tuplesPerRankLocal
= nodeTuples.size() / nodeInfos[rank].ranksPerNode
@@ -456,7 +462,7 @@ MPI_Bcast(&tuplesPerRankGlobal,
universe);
// Main:2 ends here
// [[file:../../atrip.org::*Main][Main:3]]
// [[file:~/atrip/atrip.org::*Main][Main:3]]
size_t const totalTuplesLocal
= tuplesPerRankLocal
* nodeInfos[rank].ranksPerNode;
@@ -467,7 +473,7 @@ if (makeDistribution)
FAKE_TUPLE);
// Main:3 ends here
// [[file:../../atrip.org::*Main][Main:4]]
// [[file:~/atrip/atrip.org::*Main][Main:4]]
{
std::vector<int> const
sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
@@ -503,7 +509,7 @@ if (makeDistribution)
}
// Main:4 ends here
// [[file:../../atrip.org::*Main][Main:5]]
// [[file:~/atrip/atrip.org::*Main][Main:5]]
result.insert(result.end(),
tuplesPerRankGlobal - result.size(),
FAKE_TUPLE);
@@ -513,7 +519,7 @@ result.insert(result.end(),
}
// Main:5 ends here
// [[file:../../atrip.org::*Interface][Interface:1]]
// [[file:~/atrip/atrip.org::*Interface][Interface:1]]
struct Distribution : public TuplesDistribution {
ABCTuples getTuples(size_t Nv, MPI_Comm universe) override {
return main(universe, Nv);
@@ -521,10 +527,10 @@ struct Distribution : public TuplesDistribution {
};
// Interface:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
} // namespace group_and_sort
// Epilog:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Unions][Unions:1]]
// [[file:~/atrip/atrip.org::*Unions][Unions:1]]
#pragma once
#include <atrip/SliceUnion.hpp>

View File

@@ -1,4 +1,4 @@
// [[file:../../atrip.org::*Prolog][Prolog:1]]
// [[file:~/atrip/atrip.org::*Prolog][Prolog:1]]
#pragma once
#include <sstream>
#include <string>
@@ -11,7 +11,7 @@
namespace atrip {
// Prolog:1 ends here
// [[file:../../atrip.org::*Pretty printing][Pretty printing:1]]
// [[file:~/atrip/atrip.org::*Pretty%20printing][Pretty printing:1]]
template <typename T>
std::string pretty_print(T&& value) {
std::stringstream stream;
@@ -22,11 +22,11 @@ template <typename T>
}
// Pretty printing:1 ends here
// [[file:../../atrip.org::*Chrono][Chrono:1]]
#define WITH_CHRONO(__chrono, ...) \
__chrono.start(); \
// [[file:~/atrip/atrip.org::*Chrono][Chrono:1]]
#define WITH_CHRONO(__chrono_name, ...) \
Atrip::chrono[__chrono_name].start(); \
__VA_ARGS__ \
__chrono.stop();
Atrip::chrono[__chrono_name].stop();
struct Timer {
using Clock = std::chrono::high_resolution_clock;
@@ -41,6 +41,6 @@ struct Timer {
using Timings = std::map<std::string, Timer>;
// Chrono:1 ends here
// [[file:../../atrip.org::*Epilog][Epilog:1]]
// [[file:~/atrip/atrip.org::*Epilog][Epilog:1]]
}
// Epilog:1 ends here