diff --git a/atrip.org b/atrip.org index 15c06fa..d98c068 100644 --- a/atrip.org +++ b/atrip.org @@ -1694,6 +1694,8 @@ specialDistribution(Info info, std::vector const& allTuples) { container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t); } + if (info.nodeId == 0) + std::cout << "\tBuilding 1-d containers\n"; // DISTRIBUTE 1-d containers // every tuple which is only located at one node belongs to this node { @@ -1702,6 +1704,8 @@ specialDistribution(Info info, std::vector const& allTuples) { std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin()); } + if (info.nodeId == 0) + std::cout << "\tBuilding 2-d containers\n"; // DISTRIBUTE 2-d containers //the tuples which are located at two nodes are half/half given to these nodes for (auto &m: container2d) { @@ -1731,6 +1735,8 @@ specialDistribution(Info info, std::vector const& allTuples) { } + if (info.nodeId == 0) + std::cout << "\tBuilding 3-d containers\n"; // DISTRIBUTE 3-d containers // similar game for the tuples which belong to three different nodes for (auto m: container3d){ @@ -1765,6 +1771,8 @@ specialDistribution(Info info, std::vector const& allTuples) { } + if (info.nodeId == 0) + std::cout << "\tsorting...\n"; // sort part of group-and-sort algorithm // every tuple on a given node is sorted in a way that // the 'home elements' are the fastest index. @@ -1785,15 +1793,23 @@ specialDistribution(Info info, std::vector const& allTuples) { } } } + + if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n"; //now we sort the list of tuples std::sort(nodeTuples.begin(), nodeTuples.end()); + + if (info.nodeId == 0) std::cout << "\trestoring tuples...\n"; // we bring the tuples abc back in the order a fetchElement(ABCTuple cur, ABCTuple suc){ std::vector result; @@ -1821,10 +1837,10 @@ Then we have the following #+begin_example Global rank | 0 1 2 3 4 5 6 7 8 +key | global rank nodeId | 0 1 0 1 1 0 2 2 2 Local rank | 0 0 1 1 2 2 0 1 2 intra color | 0 1 0 1 1 0 2 2 2 -key | global rank #+end_example @@ -1853,11 +1869,8 @@ std::vector main(MPI_Comm universe, size_t Nv) { // We want to construct a communicator which only contains of one // element per node - bool makeDistribution - = nodeInfos[rank].localRank == 0 - ? true - : false - ; + bool const makeDistribution + = nodeInfos[rank].localRank == 0; std::vector nodeTuples = makeDistribution @@ -1870,6 +1883,7 @@ std::vector main(MPI_Comm universe, size_t Nv) { : std::vector() ; + LOG(1,"Atrip") << "got nodeTuples\n"; // now we have to send the data from **one** rank on each node // to all others ranks of this node @@ -1884,7 +1898,7 @@ std::vector main(MPI_Comm universe, size_t Nv) { #+end_src Every node has to distribute **at least** -nodeTuples.size() / nodeInfos[rank].ranksPerNode +=nodeTuples.size() / nodeInfos[rank].ranksPerNode= tuples among the ranks. We have to communicate this quantity among all nodes. @@ -1912,6 +1926,10 @@ We have to communicate this quantity among all nodes. MPI_UINT64_T, 0, universe); + + LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n"; + LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n"; + LOG(1,"Atrip") << "#nodes " << nNodes << "\n"; #+end_src Now we have the tuples that every rank has to have, i.e., @@ -1931,16 +1949,44 @@ resizing that the condition above is met, i.e., so we can resize and add some fake tuples at the end as padding. #+begin_src c++ :tangle (atrip-tuples-h) -size_t const totalTuplesLocal - = tuplesPerRankLocal - ,* nodeInfos[rank].ranksPerNode; +size_t const totalTuples + = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode; -if (makeDistribution) +if (makeDistribution) { nodeTuples.insert(nodeTuples.end(), - totalTuplesLocal - nodeTuples.size(), + totalTuples - nodeTuples.size(), FAKE_TUPLE); +} #+end_src +And now we can simply scatter the tuples in nodeTuples and send +=tuplesPerRankGlobal= to the different ranks in the node, + +#+begin_src c++ :tangle (atrip-tuples-h) +{ + // construct mpi type for abctuple + MPI_Datatype MPI_ABCTUPLE; + MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE); + MPI_Type_commit(&MPI_ABCTUPLE); + + LOG(1,"Atrip") << "scattering tuples \n"; + + result.resize(tuplesPerRankGlobal); + MPI_Scatter(nodeTuples.data(), + tuplesPerRankGlobal, + MPI_ABCTUPLE, + result.data(), + tuplesPerRankGlobal, + MPI_ABCTUPLE, + 0, + INTRA_COMM); + + MPI_Type_free(&MPI_ABCTUPLE); + +} +#+end_src + + The next step is sending the tuples in the local root rank to the other ranks in the node, this we do with the MPI function =MPI_Scatterv=. @@ -1963,7 +2009,8 @@ Therefore, the =displacements= are simply the vector and the =sendCounts= vector is simply the constant vector =tuplesPerRankLocal= of size =ranksPerNode=. -#+begin_src c++ :tangle (atrip-tuples-h) +TODO: Remove +#+begin_src c++ { std::vector const sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal); @@ -1983,6 +2030,7 @@ and the =sendCounts= vector is simply the constant vector MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE); MPI_Type_commit(&MPI_ABCTUPLE); + LOG(1,"Atrip") << "scattering tuples \n"; MPI_Scatterv(nodeTuples.data(), sendCounts.data(), displacements.data(), @@ -2005,9 +2053,7 @@ inserting fake tuples where needed #+begin_src c++ :tangle (atrip-tuples-h) - result.insert(result.end(), - tuplesPerRankGlobal - result.size(), - FAKE_TUPLE); + LOG(1,"Atrip") << "scattering tuples \n"; return result; diff --git a/include/atrip/Tuples.hpp b/include/atrip/Tuples.hpp index 685af36..474b360 100644 --- a/include/atrip/Tuples.hpp +++ b/include/atrip/Tuples.hpp @@ -233,6 +233,8 @@ specialDistribution(Info info, std::vector const& allTuples) { container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t); } + if (info.nodeId == 0) + std::cout << "\tBuilding 1-d containers\n"; // DISTRIBUTE 1-d containers // every tuple which is only located at one node belongs to this node { @@ -241,6 +243,8 @@ specialDistribution(Info info, std::vector const& allTuples) { std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin()); } + if (info.nodeId == 0) + std::cout << "\tBuilding 2-d containers\n"; // DISTRIBUTE 2-d containers //the tuples which are located at two nodes are half/half given to these nodes for (auto &m: container2d) { @@ -270,6 +274,8 @@ specialDistribution(Info info, std::vector const& allTuples) { } + if (info.nodeId == 0) + std::cout << "\tBuilding 3-d containers\n"; // DISTRIBUTE 3-d containers // similar game for the tuples which belong to three different nodes for (auto m: container3d){ @@ -304,6 +310,8 @@ specialDistribution(Info info, std::vector const& allTuples) { } + if (info.nodeId == 0) + std::cout << "\tsorting...\n"; // sort part of group-and-sort algorithm // every tuple on a given node is sorted in a way that // the 'home elements' are the fastest index. @@ -324,29 +332,18 @@ specialDistribution(Info info, std::vector const& allTuples) { } } } + + if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n"; //now we sort the list of tuples std::sort(nodeTuples.begin(), nodeTuples.end()); + + if (info.nodeId == 0) std::cout << "\trestoring tuples...\n"; // we bring the tuples abc back in the order a fetchElement(ABCTuple cur, ABCTuple suc){ - std::vector result; - ABCTuple inter; - std::sort(cur.begin(), cur.end()); - std::sort(suc.begin(), suc.end()); - std::array::iterator rit, cit, sit; - cit = std::unique(cur.begin(), cur.end()); - sit = std::unique(suc.begin(), suc.end()); - rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin()); - result.resize(rit - inter.begin()); - std::copy(inter.begin(), rit, result.begin()); - return result; -} // Distribution:1 ends here // [[file:~/atrip/atrip.org::*Main][Main:1]] @@ -371,11 +368,8 @@ std::vector main(MPI_Comm universe, size_t Nv) { // We want to construct a communicator which only contains of one // element per node - bool makeDistribution - = nodeInfos[rank].localRank == 0 - ? true - : false - ; + bool const makeDistribution + = nodeInfos[rank].localRank == 0; std::vector nodeTuples = makeDistribution @@ -388,6 +382,7 @@ std::vector main(MPI_Comm universe, size_t Nv) { : std::vector() ; + LOG(1,"Atrip") << "got nodeTuples\n"; // now we have to send the data from **one** rank on each node // to all others ranks of this node @@ -423,64 +418,60 @@ MPI_Bcast(&tuplesPerRankGlobal, MPI_UINT64_T, 0, universe); + +LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n"; +LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n"; +LOG(1,"Atrip") << "#nodes " << nNodes << "\n"; // Main:2 ends here // [[file:~/atrip/atrip.org::*Main][Main:3]] -size_t const totalTuplesLocal - = tuplesPerRankLocal - * nodeInfos[rank].ranksPerNode; +size_t const totalTuples + = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode; -if (makeDistribution) +if (makeDistribution) { nodeTuples.insert(nodeTuples.end(), - totalTuplesLocal - nodeTuples.size(), + totalTuples - nodeTuples.size(), FAKE_TUPLE); +} // Main:3 ends here // [[file:~/atrip/atrip.org::*Main][Main:4]] { - std::vector const - sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal); - - std::vector - displacements(nodeInfos[rank].ranksPerNode); - - std::iota(displacements.begin(), - displacements.end(), - tuplesPerRankLocal); - - // important! - result.resize(tuplesPerRankLocal); - // construct mpi type for abctuple MPI_Datatype MPI_ABCTUPLE; MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE); MPI_Type_commit(&MPI_ABCTUPLE); - MPI_Scatterv(nodeTuples.data(), - sendCounts.data(), - displacements.data(), + LOG(1,"Atrip") << "scattering tuples \n"; + + result.resize(tuplesPerRankGlobal); + MPI_Scatter(nodeTuples.data(), + tuplesPerRankGlobal, MPI_ABCTUPLE, result.data(), - tuplesPerRankLocal, + tuplesPerRankGlobal, MPI_ABCTUPLE, 0, INTRA_COMM); - // free type MPI_Type_free(&MPI_ABCTUPLE); } // Main:4 ends here -// [[file:~/atrip/atrip.org::*Main][Main:5]] -result.insert(result.end(), +// [[file:~/atrip/atrip.org::*Main][Main:6]] +/* + result.insert(result.end(), tuplesPerRankGlobal - result.size(), FAKE_TUPLE); +*/ + + LOG(1,"Atrip") << "scattering tuples \n"; return result; } -// Main:5 ends here +// Main:6 ends here // [[file:~/atrip/atrip.org::*Interface][Interface:1]] struct Distribution : public TuplesDistribution {