From c7c6db77dce8003513bec54e595d34cb5426ee29 Mon Sep 17 00:00:00 2001 From: Alejandro Gallo Date: Thu, 27 Jan 2022 20:45:38 +0100 Subject: [PATCH] Templatize doubles --- atrip.org | 136 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 77 insertions(+), 59 deletions(-) diff --git a/atrip.org b/atrip.org index 2d218b7..1fd7d91 100644 --- a/atrip.org +++ b/atrip.org @@ -1608,11 +1608,11 @@ namespace atrip { ( size_t No , size_t Nv , const ABCTuple &abc - , double const* Tph - , double const* VABij - , double const* VACij - , double const* VBCij - , double *Zijk + , F const* Tph + , F const* VABij + , F const* VACij + , F const* VBCij + , F *Zijk ) { const size_t a(abc[0]), b(abc[1]), c(abc[2]); for (size_t k=0; k < No; k++) @@ -1627,31 +1627,32 @@ namespace atrip { } } + template void doublesContribution ( const ABCTuple &abc , size_t const No , size_t const Nv // -- VABCI - , double const* VABph - , double const* VACph - , double const* VBCph - , double const* VBAph - , double const* VCAph - , double const* VCBph + , F const* VABph + , F const* VACph + , F const* VBCph + , F const* VBAph + , F const* VCAph + , F const* VCBph // -- VHHHA - , double const* VhhhA - , double const* VhhhB - , double const* VhhhC + , F const* VhhhA + , F const* VhhhB + , F const* VhhhC // -- TA - , double const* TAphh - , double const* TBphh - , double const* TCphh + , F const* TAphh + , F const* TBphh + , F const* TCphh // -- TABIJ - , double const* TABhh - , double const* TAChh - , double const* TBChh + , F const* TABhh + , F const* TAChh + , F const* TBChh // -- TIJK - , double *Tijk + , F *Tijk , atrip::Timings& chrono ) { @@ -1670,40 +1671,47 @@ namespace atrip { Tijk[_IJK_(i, j, k)] += _t_buffer[_IJK_(__II, __JJ, __KK)]; \ } \ t_reorder.stop(); - #define DGEMM_PARTICLES(__A, __B) \ - atrip::dgemm_( "T" \ - , "N" \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&Nv \ - , &one \ - , __A \ - , (int const*)&Nv \ - , __B \ - , (int const*)&Nv \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); - #define DGEMM_HOLES(__A, __B, __TRANSB) \ - atrip::dgemm_( "N" \ - , __TRANSB \ - , (int const*)&NoNo \ - , (int const*)&No \ - , (int const*)&No \ - , &m_one \ - , __A \ - , (int const*)&NoNo \ - , __B \ - , (int const*)&No \ - , &zero \ - , _t_buffer.data() \ - , (int const*)&NoNo \ - ); + #define DGEMM_PARTICLES(__A, __B) \ + atrip::xgemm( "T" \ + , "N" \ + , (int const*)&NoNo \ + , (int const*)&No \ + , (int const*)&Nv \ + , &one \ + , __A \ + , (int const*)&Nv \ + , __B \ + , (int const*)&Nv \ + , &zero \ + , _t_buffer.data() \ + , (int const*)&NoNo \ + ); + #define DGEMM_HOLES(__A, __B, __TRANSB) \ + atrip::xgemm( "N" \ + , __TRANSB \ + , (int const*)&NoNo \ + , (int const*)&No \ + , (int const*)&No \ + , &m_one \ + , __A \ + , (int const*)&NoNo \ + , __B \ + , (int const*)&No \ + , &zero \ + , _t_buffer.data() \ + , (int const*)&NoNo \ + ); + #define MAYBE_CONJ(_conj, _buffer) \ + if (traits::isComplex()) { \ + for (size_t __i = 0; __i < NoNoNo; ++__i) \ + _conj[__i] = std::conj(_buffer[__i]); \ + } else { \ + for (size_t __i = 0; __i < NoNoNo; ++__i) \ + _conj[__i] = _buffer[__i]; \ + } - using F = double; const size_t NoNoNo = No*NoNo; - std::vector _t_buffer; + std::vector _t_buffer; _t_buffer.reserve(NoNoNo); F one{1.0}, m_one{-1.0}, zero{0.0}; @@ -1716,38 +1724,48 @@ namespace atrip { chrono["doubles:holes"].start(); { // Holes part ============================================================ + + std::vector _vhhh(NoNoNo); + // VhhhC[i + k*No + L*NoNo] * TABhh[L + j*No]; H1 + MAYBE_CONJ(_vhhh, VhhhC) chrono["doubles:holes:1"].start(); - DGEMM_HOLES(VhhhC, TABhh, "N") + DGEMM_HOLES(_vhhh.data(), TABhh, "N") REORDER(i, k, j) chrono["doubles:holes:1"].stop(); // VhhhC[j + k*No + L*NoNo] * TABhh[i + L*No]; H0 chrono["doubles:holes:2"].start(); - DGEMM_HOLES(VhhhC, TABhh, "T") + DGEMM_HOLES(_vhhh.data(), TABhh, "T") REORDER(j, k, i) chrono["doubles:holes:2"].stop(); + // VhhhB[i + j*No + L*NoNo] * TAChh[L + k*No]; H5 + MAYBE_CONJ(_vhhh, VhhhB) chrono["doubles:holes:3"].start(); - DGEMM_HOLES(VhhhB, TAChh, "N") + DGEMM_HOLES(_vhhh.data(), TAChh, "N") REORDER(i, j, k) chrono["doubles:holes:3"].stop(); // VhhhB[k + j*No + L*NoNo] * TAChh[i + L*No]; H3 chrono["doubles:holes:4"].start(); - DGEMM_HOLES(VhhhB, TAChh, "T") + DGEMM_HOLES(_vhhh.data(), TAChh, "T") REORDER(k, j, i) chrono["doubles:holes:4"].stop(); + // VhhhA[j + i*No + L*NoNo] * TBChh[L + k*No]; H1 + MAYBE_CONJ(_vhhh, VhhhA) chrono["doubles:holes:5"].start(); - DGEMM_HOLES(VhhhA, TBChh, "N") + DGEMM_HOLES(_vhhh.data(), TBChh, "N") REORDER(j, i, k) chrono["doubles:holes:5"].stop(); // VhhhA[k + i*No + L*NoNo] * TBChh[j + L*No]; H4 chrono["doubles:holes:6"].start(); - DGEMM_HOLES(VhhhA, TBChh, "T") + DGEMM_HOLES(_vhhh.data(), TBChh, "T") REORDER(k, i, j) chrono["doubles:holes:6"].stop(); + } chrono["doubles:holes"].stop(); + #undef MAYBE_CONJ chrono["doubles:particles"].start(); { // Particle part =========================================================