diff --git a/atrip.org b/atrip.org
index 15c06fa..d98c068 100644
--- a/atrip.org
+++ b/atrip.org
@@ -1694,6 +1694,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
       container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
   }
 
+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 1-d containers\n";
   // DISTRIBUTE 1-d containers
   // every tuple which is only located at one node belongs to this node
   {
@@ -1702,6 +1704,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
     std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
   }
 
+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 2-d containers\n";
   // DISTRIBUTE 2-d containers
   //the tuples which are located at two nodes are half/half given to these nodes
   for (auto &m: container2d) {
@@ -1731,6 +1735,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
 
   }
 
+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 3-d containers\n";
   // DISTRIBUTE 3-d containers
   // similar game for the tuples which belong to three different nodes
   for (auto m: container3d){
@@ -1765,6 +1771,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
   }
 
 
+  if (info.nodeId == 0)
+    std::cout << "\tsorting...\n";
   // sort part of group-and-sort algorithm
   // every tuple on a given node is sorted in a way that
   // the 'home elements' are the fastest index.
@@ -1785,15 +1793,23 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
       }
     }
   }
+
+  if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
   //now we sort the list of tuples
   std::sort(nodeTuples.begin(), nodeTuples.end());
+
+  if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
   // we bring the tuples abc back in the order a<b<c
   for (auto &t: nodeTuples)  std::sort(t.begin(), t.end());
 
   return nodeTuples;
 
 }
+#+end_src
 
+
+TODO: ERASE THIS CODE BLOCK
+#+begin_src c++
 //determine which element has to be fetched from sources for the next iteration
 std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
   std::vector<size_t> result;
@@ -1821,10 +1837,10 @@ Then we have the following
 
 #+begin_example
 Global rank | 0 1 2 3 4 5 6 7 8
+key         | global rank
 nodeId      | 0 1 0 1 1 0 2 2 2
 Local rank  | 0 0 1 1 2 2 0 1 2
 intra color | 0 1 0 1 1 0 2 2 2
-key         | global rank
 #+end_example
 
 
@@ -1853,11 +1869,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
 
   // We want to construct a communicator which only contains of one
   // element per node
-  bool makeDistribution
-    = nodeInfos[rank].localRank == 0
-    ? true
-    : false
-    ;
+  bool const makeDistribution
+    = nodeInfos[rank].localRank == 0;
 
   std::vector<ABCTuple>
     nodeTuples = makeDistribution
@@ -1870,6 +1883,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
                : std::vector<ABCTuple>()
                ;
 
+  LOG(1,"Atrip") << "got nodeTuples\n";
 
   // now we have to send the data from **one** rank on each node
   // to all others ranks of this node
@@ -1884,7 +1898,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
 #+end_src
 
 Every node has to distribute **at least**
-nodeTuples.size() / nodeInfos[rank].ranksPerNode
+=nodeTuples.size() / nodeInfos[rank].ranksPerNode=
 tuples among the ranks.
 
 We have to communicate this quantity among all nodes.
@@ -1912,6 +1926,10 @@ We have to communicate this quantity among all nodes.
             MPI_UINT64_T,
             0,
             universe);
+
+  LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
+  LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
+  LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
 #+end_src
 
 Now we have the tuples that every rank has to have, i.e.,
@@ -1931,16 +1949,44 @@ resizing that the condition above is met, i.e., so we can resize
 and add some fake tuples at the end as padding.
 
 #+begin_src c++ :tangle (atrip-tuples-h)
-size_t const totalTuplesLocal
-  = tuplesPerRankLocal
-  ,* nodeInfos[rank].ranksPerNode;
+size_t const totalTuples
+  = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
 
-if (makeDistribution)
+if (makeDistribution) {
   nodeTuples.insert(nodeTuples.end(),
-                    totalTuplesLocal - nodeTuples.size(),
+                    totalTuples - nodeTuples.size(),
                     FAKE_TUPLE);
+}
 #+end_src
 
+And now we can simply scatter the tuples in nodeTuples and send
+=tuplesPerRankGlobal= to the different ranks in the node,
+
+#+begin_src c++ :tangle (atrip-tuples-h)
+{
+  // construct mpi type for abctuple
+  MPI_Datatype MPI_ABCTUPLE;
+  MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
+  MPI_Type_commit(&MPI_ABCTUPLE);
+
+  LOG(1,"Atrip") << "scattering tuples \n";
+
+  result.resize(tuplesPerRankGlobal);
+  MPI_Scatter(nodeTuples.data(),
+              tuplesPerRankGlobal,
+              MPI_ABCTUPLE,
+              result.data(),
+              tuplesPerRankGlobal,
+              MPI_ABCTUPLE,
+              0,
+              INTRA_COMM);
+
+  MPI_Type_free(&MPI_ABCTUPLE);
+
+}
+#+end_src
+
+
 The next step is sending the tuples in the local root rank
 to the other ranks in the node, this we do with the MPI function
 =MPI_Scatterv=.
@@ -1963,7 +2009,8 @@ Therefore, the =displacements= are simply the vector
 and the =sendCounts= vector is simply the constant vector
 =tuplesPerRankLocal= of size =ranksPerNode=.
 
-#+begin_src c++ :tangle (atrip-tuples-h)
+TODO: Remove
+#+begin_src c++
 {
   std::vector<int> const
     sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
@@ -1983,6 +2030,7 @@ and the =sendCounts= vector is simply the constant vector
   MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
   MPI_Type_commit(&MPI_ABCTUPLE);
 
+  LOG(1,"Atrip") << "scattering tuples \n";
   MPI_Scatterv(nodeTuples.data(),
               sendCounts.data(),
               displacements.data(),
@@ -2005,9 +2053,7 @@ inserting fake tuples where needed
 
 #+begin_src c++ :tangle (atrip-tuples-h)
 
-  result.insert(result.end(),
-                tuplesPerRankGlobal - result.size(),
-                FAKE_TUPLE);
+  LOG(1,"Atrip") << "scattering tuples \n";
 
   return result;
 
diff --git a/include/atrip/Tuples.hpp b/include/atrip/Tuples.hpp
index 685af36..474b360 100644
--- a/include/atrip/Tuples.hpp
+++ b/include/atrip/Tuples.hpp
@@ -233,6 +233,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
       container3d[nt[0] + nNodes*nt[1] + nNodes*nNodes*nt[2]].push_back(t);
   }
 
+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 1-d containers\n";
   // DISTRIBUTE 1-d containers
   // every tuple which is only located at one node belongs to this node
   {
@@ -241,6 +243,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
     std::copy(tuplesVec.begin(), tuplesVec.end(), nodeTuples.begin());
   }
 
+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 2-d containers\n";
   // DISTRIBUTE 2-d containers
   //the tuples which are located at two nodes are half/half given to these nodes
   for (auto &m: container2d) {
@@ -270,6 +274,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
 
   }
 
+  if (info.nodeId == 0)
+    std::cout << "\tBuilding 3-d containers\n";
   // DISTRIBUTE 3-d containers
   // similar game for the tuples which belong to three different nodes
   for (auto m: container3d){
@@ -304,6 +310,8 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
   }
 
 
+  if (info.nodeId == 0)
+    std::cout << "\tsorting...\n";
   // sort part of group-and-sort algorithm
   // every tuple on a given node is sorted in a way that
   // the 'home elements' are the fastest index.
@@ -324,29 +332,18 @@ specialDistribution(Info info, std::vector<ABCTuple> const& allTuples) {
       }
     }
   }
+
+  if (info.nodeId == 0) std::cout << "\tsorting list of tuples...\n";
   //now we sort the list of tuples
   std::sort(nodeTuples.begin(), nodeTuples.end());
+
+  if (info.nodeId == 0) std::cout << "\trestoring tuples...\n";
   // we bring the tuples abc back in the order a<b<c
   for (auto &t: nodeTuples)  std::sort(t.begin(), t.end());
 
   return nodeTuples;
 
 }
-
-//determine which element has to be fetched from sources for the next iteration
-std::vector<size_t> fetchElement(ABCTuple cur, ABCTuple suc){
-  std::vector<size_t> result;
-  ABCTuple inter;
-  std::sort(cur.begin(), cur.end());
-  std::sort(suc.begin(), suc.end());
-  std::array<size_t,3>::iterator rit, cit, sit;
-  cit = std::unique(cur.begin(), cur.end());
-  sit = std::unique(suc.begin(), suc.end());
-  rit = std::set_difference(suc.begin(), sit, cur.begin(), cit, inter.begin());
-  result.resize(rit - inter.begin());
-  std::copy(inter.begin(), rit, result.begin());
-  return result;
-}
 // Distribution:1 ends here
 
 // [[file:~/atrip/atrip.org::*Main][Main:1]]
@@ -371,11 +368,8 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
 
   // We want to construct a communicator which only contains of one
   // element per node
-  bool makeDistribution
-    = nodeInfos[rank].localRank == 0
-    ? true
-    : false
-    ;
+  bool const makeDistribution
+    = nodeInfos[rank].localRank == 0;
 
   std::vector<ABCTuple>
     nodeTuples = makeDistribution
@@ -388,6 +382,7 @@ std::vector<ABCTuple> main(MPI_Comm universe, size_t Nv) {
                : std::vector<ABCTuple>()
                ;
 
+  LOG(1,"Atrip") << "got nodeTuples\n";
 
   // now we have to send the data from **one** rank on each node
   // to all others ranks of this node
@@ -423,64 +418,60 @@ MPI_Bcast(&tuplesPerRankGlobal,
           MPI_UINT64_T,
           0,
           universe);
+
+LOG(1,"Atrip") << "Tuples per rank: " << tuplesPerRankGlobal << "\n";
+LOG(1,"Atrip") << "ranks per node " << nodeInfos[rank].ranksPerNode << "\n";
+LOG(1,"Atrip") << "#nodes " << nNodes << "\n";
 // Main:2 ends here
 
 // [[file:~/atrip/atrip.org::*Main][Main:3]]
-size_t const totalTuplesLocal
-  = tuplesPerRankLocal
-  * nodeInfos[rank].ranksPerNode;
+size_t const totalTuples
+  = tuplesPerRankGlobal * nodeInfos[rank].ranksPerNode;
 
-if (makeDistribution)
+if (makeDistribution) {
   nodeTuples.insert(nodeTuples.end(),
-                    totalTuplesLocal - nodeTuples.size(),
+                    totalTuples - nodeTuples.size(),
                     FAKE_TUPLE);
+}
 // Main:3 ends here
 
 // [[file:~/atrip/atrip.org::*Main][Main:4]]
 {
-  std::vector<int> const
-    sendCounts(nodeInfos[rank].ranksPerNode, tuplesPerRankLocal);
-
-  std::vector<int>
-    displacements(nodeInfos[rank].ranksPerNode);
-
-  std::iota(displacements.begin(),
-            displacements.end(),
-            tuplesPerRankLocal);
-
-  // important!
-  result.resize(tuplesPerRankLocal);
-
   // construct mpi type for abctuple
   MPI_Datatype MPI_ABCTUPLE;
   MPI_Type_vector(nodeTuples[0].size(), 1, 1, MPI_UINT64_T, &MPI_ABCTUPLE);
   MPI_Type_commit(&MPI_ABCTUPLE);
 
-  MPI_Scatterv(nodeTuples.data(),
-              sendCounts.data(),
-              displacements.data(),
+  LOG(1,"Atrip") << "scattering tuples \n";
+
+  result.resize(tuplesPerRankGlobal);
+  MPI_Scatter(nodeTuples.data(),
+              tuplesPerRankGlobal,
               MPI_ABCTUPLE,
               result.data(),
-              tuplesPerRankLocal,
+              tuplesPerRankGlobal,
               MPI_ABCTUPLE,
               0,
               INTRA_COMM);
 
-  // free type
   MPI_Type_free(&MPI_ABCTUPLE);
 
 }
 // Main:4 ends here
 
-// [[file:~/atrip/atrip.org::*Main][Main:5]]
-result.insert(result.end(),
+// [[file:~/atrip/atrip.org::*Main][Main:6]]
+/*
+  result.insert(result.end(),
                 tuplesPerRankGlobal - result.size(),
                 FAKE_TUPLE);
+*/
+
+  LOG(1,"Atrip") << "scattering tuples \n";
 
   return result;
 
 }
-// Main:5 ends here
+// Main:6 ends here
 
 // [[file:~/atrip/atrip.org::*Interface][Interface:1]]
 struct Distribution : public TuplesDistribution {