Compare commits

...

6 Commits

4 changed files with 477 additions and 21 deletions

View File

@ -4,25 +4,39 @@ include $(top_srcdir)/atrip.mk
AM_CPPFLAGS = -I$(top_srcdir)/include/ -I$(top_srcdir) $(CTF_CPPFLAGS)
AM_LDFLAGS = @LAPACK_LIBS@ @BLAS_LIBS@
bin_PROGRAMS = test_main tuples-distribution
test_main_SOURCES = test_main.cxx
tuples_distribution_SOURCES = tuples-distribution.cxx
test_main_LDADD = $(top_builddir)/src/libatrip.a
tuples_distribution_LDADD = $(top_builddir)/src/libatrip.a
if WITH_BUILD_CTF
test_main_LDADD += $(CTF_BUILD_PATH)/lib/libctf.a
tuples_distribution_LDADD += $(CTF_BUILD_PATH)/lib/libctf.a
ATRIP_CTF = $(CTF_BUILD_PATH)/lib/libctf.a
else
test_main_LDADD += @LIBCTF_LD_LIBRARY_PATH@/libctf.a
tuples_distribution_LDADD += @LIBCTF_LD_LIBRARY_PATH@/libctf.a
ATRIP_CTF = @LIBCTF_LD_LIBRARY_PATH@/libctf.a
endif
ATRIP_LIB = $(top_builddir)/src/libatrip.a $(ATRIP_CTF)
bin_PROGRAMS =
BENCHES_LDADD = $(ATRIP_LIB) $(ATRIP_CTF)
##
## main entry point and bench
##
bin_PROGRAMS += atrip
atrip_SOURCES = test_main.cxx
atrip_CPPFLAGS = $(AM_CPPFLAGS)
atrip_LDADD = $(BENCHES_LDADD)
if !WITH_CUDA
##
## tuples distribution
##
bin_PROGRAMS += tuples-distribution
tuples_distribution_LDADD = $(BENCHES_LDADD)
tuples_distribution_SOURCES = tuples-distribution.cxx
endif
if WITH_CUDA
test_main_CXXFLAGS = $(CUDA_CXXFLAGS)
test_main_LDADD += $(CUDA_LDFLAGS)
AM_CPPFLAGS += $(CUDA_CXXFLAGS)
BENCHES_LDADD += $(CUDA_LDFLAGS)
AM_CXXFLAGS = $(CUDA_CXXFLAGS)
AM_LDFLAGS += $(CUDA_LDFLAGS)

275
misc/naive-tuples.lisp Normal file
View File

@ -0,0 +1,275 @@
(defun tuples-atrip (nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(loop :for a :below nv
:append
(loop :for b :from a :below nv
:append
(loop :for c :from b :below nv
:unless (= a b c)
:collect (list a b c)))))
(defun tuples-half (nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(loop :for a :below nv
:append
(loop :for b :from a :below nv
:append
(loop :for c :from b :below nv
:collect (list a b c)))))
(defun tuples-all (nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(loop :for a :below nv
:append
(loop :for b :below nv
:append
(loop :for c :below nv
:collect (list a b c)))))
(defun tuples-all-nth (i nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(list (floor i (* nv nv))
(mod (floor i nv) nv)
(mod i nv)))
(defparameter tups (tuples-all 10))
(defun compare-all (l)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(let* ((tups (tuples-all l)))
(loop for i below (length tups)
do (let* ((good (nth i tups))
(bad (tuples-all-nth i l))
(eq? (equal good bad)))
(unless eq?
(print (list :|i| i
:good good
:bad bad)))))))
;; (defun a-half (i nv)
;; (let ((divisor t)
;; (j i)
;; (total-blk 0))
;; (loop :for a :below nv
;; :unless (eq divisor 0)
;; :do (let ((blk (a-block a nv)))
;; (multiple-value-bind (d r) (floor j blk)
;; (declare (ignore r))
;; (when (> d 0)
;; (incf total-blk blk))
;; (setq j (- j blk)
;; divisor d)))
;; :else
;; :return (values (- a 1)
;; i
;; total-blk))))
;; (defun b-half (i a nv a-block-sum)
;; "we have
;; \begin{equation}
;; i = \underbrace{B(a_0) +
;; \cdots +
;; B(a_{i-1})}_{\texttt{a-block-sum}}
;; + idx
;; \end{equation}
;; and with this we just have to divide.
;; "
;; (let ((bj (if (> a-block-sum 0)
;; (mod i a-block-sum)
;; i))
;; (total-blk 0))
;; (loop :for b :from a :below Nv
;; :with divisor = 1
;; :unless (eq divisor 0)
;; :do (let ((blk (+ (- nv a)
;; #|because|# 1)))
;; (incf total-blk blk)
;; (if (> blk 0)
;; (multiple-value-bind (d r) (floor bj blk)
;; (declare (ignore r))
;; (setq bj (- bj blk)
;; divisor d))
;; (setq divisor 0)))
;; :else
;; :return (values (- b 1)
;; bj
;; total-blk))))
(defun a-block (a nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(- (* (- nv 1) (- nv (- a 1)))
(- (floor (* (- nv 1) nv)
2)
(floor (* (- a 1) (- a 2))
2))))
(defun a-block-sum (|t| nv)
(macrolet ((ssum (n) `(floor (* ,n (+ ,n 1))
2))
(qsum (n) `(floor (* ,n
(+ ,n 1)
(+ 1 (* 2 ,n)))
6)))
(let ((nv-1 (- nv 1))
(t+1 (+ |t| 1)))
(+ (* t+1 nv-1 nv)
(* nv-1 t+1)
(- (* nv-1
(ssum |t|)))
(- (* t+1
(ssum nv-1)))
(floor (- (qsum |t|)
(* 3 (ssum |t|)))
2)
t+1))))
(defun get-half (i nv &key from block)
(let ((divisor 1)
(j i)
(total-blk 0))
(loop :for α :from from :below nv
:unless (eq divisor 0)
:do (let ((blk (funcall block α nv)))
(multiple-value-bind (d r) (floor j blk)
(declare (ignore r))
(when (> d 0)
(incf total-blk blk)
(setq j (- j blk)))
(setq divisor d)))
:else
:return (values (- α 1)
j
total-blk))))
(defun tuples-half-nth (i nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(flet ((bc-block (x %nv)
(+ 1 (- %nv x))))
(multiple-value-bind (a aj blks) (get-half i nv :from 0 :block #'a-block)
(declare (ignore blks))
(multiple-value-bind (b bj blks) (get-half aj nv
:from a
:block #'bc-block)
(declare (ignore blks))
(multiple-value-bind (c cj blks) (get-half bj nv
:from b
:block #'bc-block)
(declare (ignore cj blks))
(print (list :idxs aj bj cj))
(list a b c))))))
(defun a-block-atrip (a nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(- (a-block a nv) 1))
(defun a-block-sum-atrip (|t| nv)
(declare (optimize (speed 3) (safety 0) (debug 0)))
(- (a-block-sum |t| nv) (+ |t| 1)))
(defun b-block-sum-atrip (a |t| nv)
(- (* nv
(1+ (- |t| a)))
(floor (- (* |t| (1+ |t|))
(* a (- a 1)))
2)
1))
(defun nth-atrip (i nv)
(let ((sums (mapcar (lambda (s) (a-block-sum-atrip s nv))
(loop :for j :below nv :collect j))))
(multiple-value-bind (a ablk)
(loop :for sum :in sums
:with a = -1
:with base = 0
:do (incf a)
:if (eq (floor i sum) 0)
:return (values a base)
:else
:do (setq base sum))
(multiple-value-bind (b bblk)
(let ((sums (mapcar (lambda (s)
(+ ablk
#+nil(- nv s 1)
(b-block-sum-atrip a s nv)))
(loop :for b :from a :below nv
:collect b))))
(loop :for sum :in sums
:with b = (- a 1)
:with base = ablk
:do (incf b)
:if (< i sum)
:return (values b base)
:else
:do (progn
;; (print sums)
(setq base sum))))
(list a b (+ b
(- i bblk)
(if (eq a b)
1
0)))))))
(defun atrip-test (i nv)
(let ((tuples (tuples-atrip nv))
(cheaper (nth-atrip i nv)))
(values (nth i tuples)
cheaper
(print (equal (nth i tuples)
cheaper)))))
(let* ((l 101)
(tuples (tuples-atrip l)))
(loop :for a below l
:do (print (let ((s (a-block-atrip a l))
(c (count-if (lambda (x) (eq (car x) a))
tuples)))
(list :a a
:size s
:real c
:? (eq c s))))))
(ql:quickload 'vgplot)
(import 'vgplot:plot)
(import 'vgplot:replot)
(let ((l 10))
(plot (mapcar (lambda (x) (getf x :size))
(loop :for a upto l
collect (list :a a :size (a-block a l))))
"penis"))
(let* ((l 50)
(tuples (tuples-half l)))
(loop :for a below l
:do (print (let ((s (a-block a l))
(c (count-if (lambda (x) (eq (car x) a))
tuples)))
(list :a a
:size s
:real c
:? (eq c s))))))
(defun range (from to) (loop for i :from from :to to collect i))
(defun half-again (i nv)
(let ((a-block-list (let ((ll (mapcar (lambda (i) (a-block i nv))
(range 0 (- nv 1)))))
(loop :for i :from 1 :to (length ll)
:collect
(reduce #'+
ll
:end i)))))
(loop :for blk :in a-block-list
:with a = 0
:with total-blk = 0
:if (eq 0 (floor i blk))
:do
(let ((i (mod i blk)))
(print (list i (- i total-blk) blk a))
(return))
:else
:do (progn
(incf a)
(setq total-blk blk)))))

View File

@ -307,7 +307,14 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
if (in.tuplesDistribution == Atrip::Input<F>::TuplesDistribution::NAIVE) {
return naiveDatabase<F>(unions, Nv, np, iteration, c);
WITH_CHRONO("db:comm:naive",
auto const& db = naiveDatabase<F>(unions,
Nv,
np,
iteration,
c);
)
return db;
} else {
WITH_CHRONO("db:comm:type:do",
@ -447,6 +454,7 @@ Atrip::Output Atrip::run(Atrip::Input<F> const& in) {
// START MAIN LOOP ======================================================{{{1
MPI_Barrier(universe);
double energy(0.);
size_t first_iteration = 0;
Checkpoint c;

View File

@ -4,8 +4,11 @@
namespace atrip {
/* This function is really too slow, below are more performant
functions to get tuples.
*/
static
ABCTuples get_nth_naive_tuples(size_t Nv, size_t np) {
ABCTuples get_nth_naive_tuples(size_t Nv, size_t np, int64_t i) {
const size_t
// total number of tuples for the problem
@ -17,6 +20,9 @@ namespace atrip {
ABCTuples result(np);
if (i < 0) return result;
std::vector<size_t>
rank_indices(np, 0);
for (size_t a(0), g(0); a < Nv; a++)
for (size_t b(a); b < Nv; b++)
@ -32,7 +38,12 @@ namespace atrip {
, end = tuples_per_rank * (rank + 1)
;
if ( start <= g && g < end) result[rank] = {a, b, c};
if ( start <= g && g < end) {
if (rank_indices[rank] == i) {
result[rank] = {a, b, c};
}
rank_indices[rank] += 1;
}
}
g++;
@ -42,6 +53,122 @@ namespace atrip {
}
static
inline
size_t a_block_atrip(size_t a, size_t nv) {
return (nv - 1) * (nv - (a - 1))
- ((nv - 1) * nv) / 2
+ ((a - 1) * (a - 2)) / 2
- 1;
}
static
inline
size_t a_block_sum_atrip(int64_t T, int64_t nv) {
int64_t nv1 = nv - 1, tplus1 = T + 1;
return tplus1 * nv1 * nv
+ nv1 * tplus1
- (nv1 * (T * (T + 1)) / 2)
- (tplus1 * (nv1 * nv) / 2)
+ (((T * (T + 1) * (1 + 2 * T)) / 6) - 3 * ((T * (T + 1)) / 2)) / 2
;
// + tplus1;
}
static
inline
int64_t b_block_sum_atrip (int64_t a, int64_t T, int64_t nv) {
return nv * ((T - a) + 1)
- (T * (T + 1) - a * (a - 1)) / 2
- 1;
}
static std::vector<size_t> a_sums;
static
inline
ABCTuple nth_atrip(size_t it, size_t nv) {
// build the sums if necessary
if (!a_sums.size()) {
a_sums.resize(nv);
for (size_t _i = 0; _i < nv; _i++) {
a_sums[_i] = a_block_sum_atrip(_i, nv);
/*
std::cout << Atrip::rank << ": " << _i << " " << a_sums[_i] << std::endl;
*/
}
}
int64_t a = -1, block_a = 0;
for (const auto& sum: a_sums) {
++a;
if (sum > it) {
break;
} else {
block_a = sum;
}
}
// build the b_sums
std::vector<int64_t> b_sums(nv - a);
for (size_t t = a, i=0; t < nv; t++) {
b_sums[i++] = b_block_sum_atrip(a, t, nv);
/*
std::cout << Atrip::rank << ": b-sum " << i-1 << " "
<< ":a " << a << " :t " << t << " = " << b_sums[i-1] << std::endl;
*/
}
int64_t b = a - 1, block_b = block_a;
for (const auto& sum: b_sums) {
++b;
if (sum + block_a > it) {
break;
} else {
block_b = sum + block_a;
}
}
const int64_t
c = b + it - block_b + (a == b);
return {(size_t)a, (size_t)b, (size_t)c};
}
static
inline
ABCTuples nth_atrip_distributed(int64_t it, size_t nv, size_t np) {
if (it < 0) {
ABCTuples result(np, {nv, nv, nv});
return result;
}
ABCTuples result(np);
const size_t
// total number of tuples for the problem
n = nv * (nv + 1) * (nv + 2) / 6 - nv
// all ranks should have the same number of tuples_per_rank
, tuples_per_rank = n / np + size_t(n % np != 0)
;
for (size_t rank = 0; rank < np; rank++) {
const size_t
global_iteration = tuples_per_rank * rank + it;
/*
std::cout << Atrip::rank << ":" << "global_bit " << global_iteration << "\n";
*/
result[rank] = nth_atrip(global_iteration, nv);
}
return result;
}
template <typename F>
static
@ -120,8 +247,40 @@ namespace atrip {
using Database = typename Slice<F>::Database;
Database db;
const auto tuples = get_nth_naive_tuples(nv, np);
const auto prev_tuples = get_nth_naive_tuples(nv, np);
#ifdef NAIVE_SLOW
WITH_CHRONO("db:comm:naive:tuples",
const auto tuples = get_nth_naive_tuples(nv,
np,
iteration);
const auto prev_tuples = get_nth_naive_tuples(nv,
np,
(int64_t)iteration - 1);
)
#else
WITH_CHRONO("db:comm:naive:tuples",
const auto tuples = nth_atrip_distributed((int64_t)iteration,
nv,
np);
const auto prev_tuples = nth_atrip_distributed((int64_t)iteration - 1,
nv,
np);
)
if (false)
for (size_t rank = 0; rank < np; rank++) {
std::cout << Atrip::rank << ":"
<< " :tuples< " << rank << ">" << iteration
<< " :abc " << tuples[rank][0]
<< ", " << tuples[rank][1]
<< ", " << tuples[rank][2] << "\n";
std::cout << Atrip::rank << ":"
<< " :prev-tuples< " << rank << ">" << iteration
<< " :abc-prev " << prev_tuples[rank][0]
<< ", " << prev_tuples[rank][1]
<< ", " << prev_tuples[rank][2] << "\n";
}
#endif
for (size_t rank = 0; rank < np; rank++) {
auto abc = tuples[rank];
@ -148,7 +307,7 @@ namespace atrip {
return db;
}
template
template
typename Slice<double>::Database
naiveDatabase<double>(Unions<double> &unions,
size_t nv,
@ -156,7 +315,7 @@ namespace atrip {
size_t iteration,
MPI_Comm const& c);
template
template
typename Slice<Complex>::Database
naiveDatabase<Complex>(Unions<Complex> &unions,
size_t nv,