From 0b53b8817485a47024b19cf054aa9bc9fff0838a Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Mon, 24 Jun 2024 15:44:59 +0000
Subject: [PATCH 1/9] added comments for global numbering (Ryan's version)

---
 .../mesh/generators/BuildPods.cpp             |   2 +-
 .../mesh/generators/NewGhosting.cpp           |   5 +
 .../mesh/generators/NewGlobalNumbering.cpp    | 979 +++++++++---------
 3 files changed, 520 insertions(+), 466 deletions(-)
diff --git a/src/coreComponents/mesh/generators/BuildPods.cpp b/src/coreComponents/mesh/generators/BuildPods.cpp
index 8edfbcd8b1d..0e69f452e55 100644
--- a/src/coreComponents/mesh/generators/BuildPods.cpp
+++ b/src/coreComponents/mesh/generators/BuildPods.cpp
@@ -216,7 +216,7 @@ std::map< integer, array1d< localIndex > > toFlavorlessMapping( std::map< MpiRan
   }
 
   return output;
-};
+}
 
 EdgeMgrImpl makeFlavorlessEdgeMgrImpl( std::size_t const & numEdges,
                                        std::vector< integer > const & ghostRank,
diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index d5a202a164c..da484f44b93 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -1119,6 +1119,11 @@ void doTheNewGhosting( vtkSmartPointer< vtkDataSet > mesh,
                        std::set< MpiRank > const & neighbors,
                        MeshMappingImpl & meshMappings )
 {
+
+  // First step in ghosting is to have each rank create the global IDs for each entry appearing on it
+  // buckets is a group of maps from sets of mpi ranks to shared (nodes, edges, faces)
+  // offsets has the same keys but the values are now the global index for the first entry in the bucket
+  // the rest of the items in a bucket are numered sequentially following the value of offset
   auto const [buckets, offsets] = doTheNewGlobalNumbering( mesh, neighbors );
 
   // Now we exchange the data with our neighbors.
diff --git a/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp b/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp
index 62d6c34e61d..9b28694099b 100644
--- a/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp
+++ b/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp
@@ -31,583 +31,632 @@
 namespace geos::ghosting
 {
 
-struct Exchange
-{
-  std::set< NodeGlbIdx > nodes;
-  std::set< Edge > edges;
-  std::set< Face > faces;
-};
+  struct Exchange
+  {
+    std::set<NodeGlbIdx> nodes;
+    std::set<Edge> edges;
+    std::set<Face> faces;
+  };
 
-void to_json( json & j,
-              const Exchange & v )
-{
-  j = json{ { "nodes", v.nodes },
-            { "edges", v.edges },
-            { "faces", v.faces } };
-}
+  void to_json(json &j,
+               const Exchange &v)
+  {
+    j = json{{"nodes", v.nodes},
+             {"edges", v.edges},
+             {"faces", v.faces}};
+  }
 
-void from_json( const json & j,
-                Exchange & v )
-{
-  v.nodes = j.at( "nodes" ).get< std::set< NodeGlbIdx > >();
-  v.edges = j.at( "edges" ).get< std::set< Edge > >();
-  v.faces = j.at( "faces" ).get< std::set< Face > >();
-}
+  void from_json(const json &j,
+                 Exchange &v)
+  {
+    v.nodes = j.at("nodes").get<std::set<NodeGlbIdx>>();
+    v.edges = j.at("edges").get<std::set<Edge>>();
+    v.faces = j.at("faces").get<std::set<Face>>();
+  }
 
-array1d< std::uint8_t > convertExchange( Exchange const & exchange )
-{
-  std::vector< std::uint8_t > const tmp = json::to_cbor( json( exchange ) );
-  array1d< std::uint8_t > result;
-  result.reserve( tmp.size() );
-  for( std::uint8_t const & t: tmp )
+  array1d<std::uint8_t> convertExchange(Exchange const &exchange)
   {
-    result.emplace_back( t );
+    std::vector<std::uint8_t> const tmp = json::to_cbor(json(exchange));
+    array1d<std::uint8_t> result;
+    result.reserve(tmp.size());
+    for (std::uint8_t const &t : tmp)
+    {
+      result.emplace_back(t);
+    }
+    return result;
   }
-  return result;
-}
 
-Exchange convertExchange( array1d< std::uint8_t > const & input )
-{
-  std::vector< std::uint8_t > const tmp( std::cbegin( input ), std::cend( input ) );
-  return json::from_cbor( tmp, false ).template get< Exchange >();
-}
-
-/**
- * @brief Extract the cells at the boundary of the mesh.
- * @param mesh The vtk mesh.
- * @return The vtk cell ids.
- */
-std::set< vtkIdType > extractBoundaryCells( vtkSmartPointer< vtkDataSet > mesh )
-{
-  // TODO Better handle the boundary information, forgetting about the 3d cells and simply handling the outside shell.
-  auto f = vtkDataSetSurfaceFilter::New();
-  f->PassThroughCellIdsOn();
-  f->PassThroughPointIdsOff();
-  f->FastModeOff();
-
-  string const originalCellsKey = "ORIGINAL_CELLS";
-  f->SetOriginalCellIdsName( originalCellsKey.c_str() );
-  auto boundaryMesh = vtkPolyData::New();
-  f->UnstructuredGridExecute( mesh, boundaryMesh );
-  vtkIdTypeArray const * originalCells = vtkIdTypeArray::FastDownCast( boundaryMesh->GetCellData()->GetArray( originalCellsKey.c_str() ) );
-
-  std::set< vtkIdType > boundaryCellIdxs;
-  for( auto i = 0; i < originalCells->GetNumberOfTuples(); ++i )
+  Exchange convertExchange(array1d<std::uint8_t> const &input)
   {
-    boundaryCellIdxs.insert( originalCells->GetValue( i ) );
+    std::vector<std::uint8_t> const tmp(std::cbegin(input), std::cend(input));
+    return json::from_cbor(tmp, false).template get<Exchange>();
   }
 
-  return boundaryCellIdxs;
-}
+  /**
+   * @brief Extract the cells at the boundary of the mesh.
+   * @param mesh The vtk mesh.
+   * @return The vtk cell ids.
+   */
+  std::set<vtkIdType> extractBoundaryCells(vtkSmartPointer<vtkDataSet> mesh)
+  {
+    // TODO Better handle the boundary information, forgetting about the 3d cells and simply handling the outside shell.
+    auto f = vtkDataSetSurfaceFilter::New();
+    f->PassThroughCellIdsOn();
+    f->PassThroughPointIdsOff();
+    f->FastModeOff();
+
+    string const originalCellsKey = "ORIGINAL_CELLS";
+    f->SetOriginalCellIdsName(originalCellsKey.c_str());
+    auto boundaryMesh = vtkPolyData::New();
+    f->UnstructuredGridExecute(mesh, boundaryMesh);
+    vtkIdTypeArray const *originalCells = vtkIdTypeArray::FastDownCast(boundaryMesh->GetCellData()->GetArray(originalCellsKey.c_str()));
+
+    std::set<vtkIdType> boundaryCellIdxs;
+    for (auto i = 0; i < originalCells->GetNumberOfTuples(); ++i)
+    {
+      boundaryCellIdxs.insert(originalCells->GetValue(i));
+    }
 
-Face reorderFaceNodes( std::vector< NodeGlbIdx > const & nodes, bool & isFlipped, std::uint8_t & start )  // TODO unit test
-{
-  std::size_t const n = nodes.size();
+    return boundaryCellIdxs;
+  }
 
-  std::vector< NodeGlbIdx > f( nodes );
+  Face reorderFaceNodes(std::vector<NodeGlbIdx> const &nodes, bool &isFlipped, std::uint8_t &start) // TODO unit test
+  {
+    std::size_t const n = nodes.size();
 
-  auto const cit = std::min_element( std::cbegin( nodes ), std::cend( nodes ) );
-  auto const minIdx = std::distance( std::cbegin( nodes ), cit );
+    std::vector<NodeGlbIdx> f(nodes);
 
-  std::size_t const prevIdx = minIdx == 0 ? n - 1 : minIdx - 1;
-  std::size_t const nextIdx = minIdx == intConv< int >( n ) - 1 ? 0 : minIdx + 1;
+    auto const cit = std::min_element(std::cbegin(nodes), std::cend(nodes));
+    auto const minIdx = std::distance(std::cbegin(nodes), cit);
 
-  start = intConv< std::uint8_t >( minIdx );
-  isFlipped = nodes[prevIdx] < nodes[nextIdx];
+    std::size_t const prevIdx = minIdx == 0 ? n - 1 : minIdx - 1;
+    std::size_t const nextIdx = minIdx == intConv<int>(n) - 1 ? 0 : minIdx + 1;
 
-  std::size_t const pivot = isFlipped ? n - 1 - minIdx : minIdx;
-  if( isFlipped )
-  {
-    std::reverse( std::begin( f ), std::end( f ) );
-  }
-  std::rotate( std::begin( f ), std::begin( f ) + pivot, std::end( f ) );
+    start = intConv<std::uint8_t>(minIdx);
+    isFlipped = nodes[prevIdx] < nodes[nextIdx];
 
-  return f;
-}
+    std::size_t const pivot = isFlipped ? n - 1 - minIdx : minIdx;
+    if (isFlipped)
+    {
+      std::reverse(std::begin(f), std::end(f));
+    }
+    std::rotate(std::begin(f), std::begin(f) + pivot, std::end(f));
 
-std::vector< NodeLocIdx > resetFaceNodes( std::vector< NodeLocIdx > const & nodes,
-                                          bool const & isFlipped,
-                                          std::uint8_t const & start )
-{
-  std::vector< NodeLocIdx > result( nodes );
-  if( isFlipped )
-  {
-    std::reverse( std::begin( result ), std::end( result ) );
-    std::uint8_t const pivot = std::size( result ) - 1 - start;
-    std::rotate( std::begin( result ), std::begin( result ) + pivot, std::end( result ) );
-  }
-  else
-  {
-    std::rotate( std::rbegin( result ), std::rbegin( result ) + start, std::rend( result ) );
+    return f;
   }
-  return result;
-}
-
-Exchange buildSpecificData( vtkSmartPointer< vtkDataSet > mesh,
-                            std::set< vtkIdType > const & cellIds )
-{
-  vtkIdTypeArray * gids = vtkIdTypeArray::FastDownCast( mesh->GetPointData()->GetGlobalIds() );
-  Span< vtkIdType > const globalPtIds( (vtkIdType *) gids->GetPointer( 0 ), gids->GetNumberOfTuples() );
 
-  std::vector< Edge > tmpEdges;
-  std::vector< Face > tmpFaces;
-
-  // Pre-allocation of the temporary vectors.
+  std::vector<NodeLocIdx> resetFaceNodes(std::vector<NodeLocIdx> const &nodes,
+                                         bool const &isFlipped,
+                                         std::uint8_t const &start)
   {
-    std::size_t numEdges = 0;
-    std::size_t numFaces = 0;
-    for( vtkIdType const & c: cellIds )
+    std::vector<NodeLocIdx> result(nodes);
+    if (isFlipped)
     {
-      vtkCell * cell = mesh->GetCell( c );
-      numEdges += cell->GetNumberOfEdges();
-      numFaces += cell->GetNumberOfFaces();
+      std::reverse(std::begin(result), std::end(result));
+      std::uint8_t const pivot = std::size(result) - 1 - start;
+      std::rotate(std::begin(result), std::begin(result) + pivot, std::end(result));
     }
-    tmpEdges.reserve( numEdges );
-    tmpFaces.reserve( numFaces );
+    else
+    {
+      std::rotate(std::rbegin(result), std::rbegin(result) + start, std::rend(result));
+    }
+    return result;
   }
 
-  // Filling the temporary.
-  for( auto c = 0; c < mesh->GetNumberOfCells(); ++c )
+  Exchange buildSpecificData(vtkSmartPointer<vtkDataSet> mesh,
+                             std::set<vtkIdType> const &cellIds)
   {
-    vtkCell * cell = mesh->GetCell( c );
+    vtkIdTypeArray *gids = vtkIdTypeArray::FastDownCast(mesh->GetPointData()->GetGlobalIds());
+    Span<vtkIdType> const globalPtIds((vtkIdType *)gids->GetPointer(0), gids->GetNumberOfTuples());
 
-    for( auto e = 0; e < cell->GetNumberOfEdges(); ++e )
-    {
-      vtkCell * edge = cell->GetEdge( e );
-      vtkIdType const nli0 = edge->GetPointId( 0 );
-      vtkIdType const nli1 = edge->GetPointId( 1 );
-      vtkIdType const ngi0 = globalPtIds[nli0];
-      vtkIdType const ngi1 = globalPtIds[nli1];
-      tmpEdges.emplace_back( std::minmax( { NodeGlbIdx{ ngi0 }, NodeGlbIdx{ ngi1 } } ) );
-    }
+    std::vector<Edge> tmpEdges;
+    std::vector<Face> tmpFaces;
 
-    for( auto f = 0; f < cell->GetNumberOfFaces(); ++f )
+    // Pre-allocation of the temporary vectors.
     {
-      vtkCell * face = cell->GetFace( f );
-      vtkIdList * pids = face->GetPointIds();
-      std::vector< NodeGlbIdx > nodes( pids->GetNumberOfIds() );
-      for( std::size_t i = 0; i < nodes.size(); ++i )
+      std::size_t numEdges = 0;
+      std::size_t numFaces = 0;
+      for (vtkIdType const &c : cellIds)
       {
-        vtkIdType const nli = face->GetPointId( i );
-        vtkIdType const ngi = globalPtIds[nli];
-        nodes[i] = NodeGlbIdx{ ngi };
+        vtkCell *cell = mesh->GetCell(c);
+        numEdges += cell->GetNumberOfEdges();
+        numFaces += cell->GetNumberOfFaces();
       }
-      bool isFlipped;
-      std::uint8_t start;
-      tmpFaces.emplace_back( reorderFaceNodes( nodes, isFlipped, start ) );
+      tmpEdges.reserve(numEdges);
+      tmpFaces.reserve(numFaces);
     }
-  }
-
-  std::set< NodeGlbIdx > nodes;
-  std::transform( std::cbegin( globalPtIds ), std::cend( globalPtIds ),
-                  std::inserter( nodes, std::end( nodes ) ), []( vtkIdType const & id )
-                  { return NodeGlbIdx{ id }; } );
 
-  // Removing the duplicates by copying into a `std::set`.
-  std::set< Edge > edges{ tmpEdges.cbegin(), tmpEdges.cend() };  // SortedArray requires the input to be sorted already.
-  std::set< Face > faces{ tmpFaces.cbegin(), tmpFaces.cend() };
+    // Filling the temporary.
+    for (auto c = 0; c < mesh->GetNumberOfCells(); ++c)
+    {
+      vtkCell *cell = mesh->GetCell(c);
 
-  return { std::move( nodes ), std::move( edges ), std::move( faces ) };
-}
+      for (auto e = 0; e < cell->GetNumberOfEdges(); ++e)
+      {
+        vtkCell *edge = cell->GetEdge(e);
+        vtkIdType const nli0 = edge->GetPointId(0);
+        vtkIdType const nli1 = edge->GetPointId(1);
+        vtkIdType const ngi0 = globalPtIds[nli0];
+        vtkIdType const ngi1 = globalPtIds[nli1];
+        tmpEdges.emplace_back(std::minmax({NodeGlbIdx{ngi0}, NodeGlbIdx{ngi1}}));
+      }
 
+      for (auto f = 0; f < cell->GetNumberOfFaces(); ++f)
+      {
+        vtkCell *face = cell->GetFace(f);
+        vtkIdList *pids = face->GetPointIds();
+        std::vector<NodeGlbIdx> nodes(pids->GetNumberOfIds());
+        for (std::size_t i = 0; i < nodes.size(); ++i)
+        {
+          vtkIdType const nli = face->GetPointId(i);
+          vtkIdType const ngi = globalPtIds[nli];
+          nodes[i] = NodeGlbIdx{ngi};
+        }
+        bool isFlipped;
+        std::uint8_t start;
+        tmpFaces.emplace_back(reorderFaceNodes(nodes, isFlipped, start));
+      }
+    }
 
-Exchange buildFullData( vtkSmartPointer< vtkDataSet > mesh )
-{
-  std::set< vtkIdType > cellIds;
-  for( vtkIdType i = 0; i < mesh->GetNumberOfCells(); ++i )
-  {
-    cellIds.insert( cellIds.end(), i );
-  }
+    std::set<NodeGlbIdx> nodes;
+    std::transform(std::cbegin(globalPtIds), std::cend(globalPtIds),
+                   std::inserter(nodes, std::end(nodes)), [](vtkIdType const &id)
+                   { return NodeGlbIdx{id}; });
 
-  return buildSpecificData( mesh, cellIds );
-}
+    // Removing the duplicates by copying into a `std::set`.
+    std::set<Edge> edges{tmpEdges.cbegin(), tmpEdges.cend()}; // SortedArray requires the input to be sorted already.
+    std::set<Face> faces{tmpFaces.cbegin(), tmpFaces.cend()};
 
-Exchange buildExchangeData( vtkSmartPointer< vtkDataSet > mesh )
-{
-  return buildSpecificData( mesh, extractBoundaryCells( mesh ) );
-}
-
-/**
- * @brief
- * @tparam GLB_IDX The global index type of the geometrical quantity considered (typically @c EdgeGlbIdx or @c FaceGlbIdx).
- * @param sizes The sizes of the intersection bucket (from the current MPI rank).
- * @param offsets The offsets for each intersection bucket (from the MPI_Scan process, i.e. the previous ranks).
- * @param curRank Current MPI rank.
- * @return The offset buckets, updated with the sizes of the current MPI rank.
- */
-template< typename GLB_IDX >
-std::map< std::set< MpiRank >, GLB_IDX > updateBucketOffsets( std::map< std::set< MpiRank >, localIndex > const & sizes,
-                                                              std::map< std::set< MpiRank >, GLB_IDX > const & offsets,
-                                                              MpiRank curRank )
-{
-  std::map< std::set< MpiRank >, GLB_IDX > reducedOffsets;
+    return {std::move(nodes), std::move(edges), std::move(faces)};
+  }
 
-  // We only keep the offsets that are still relevant to the current and higher ranks.
-  // Note that `reducedOffsets` will be used by the _current_ rank too.
-  // Therefore, we'll send information that may not be useful to higher ranks.
-  // This is surely acceptable because the information is tiny.
-  for( auto const & [ranks, offset]: offsets )
+  Exchange buildFullData(vtkSmartPointer<vtkDataSet> mesh)
   {
-    MpiRank const maxConcerned = *std::max_element( ranks.cbegin(), ranks.cend() );
-    // TODO invert
-//    if( maxConcerned >= curRank )
-//    {
-//      reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, offset );
-//    }
-    if( maxConcerned < curRank )
+    std::set<vtkIdType> cellIds;
+    for (vtkIdType i = 0; i < mesh->GetNumberOfCells(); ++i)
     {
-      continue;
+      cellIds.insert(cellIds.end(), i);
     }
-    reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, offset );
+
+    return buildSpecificData(mesh, cellIds);
+  }
+
+  Exchange buildExchangeData(vtkSmartPointer<vtkDataSet> mesh)
+  {
+    return buildSpecificData(mesh, extractBoundaryCells(mesh));
   }
 
-  // Add the offsets associated to the new size buckets from the current rank.
-  GLB_IDX nextOffset{ 0 };
-  for( auto const & [ranks, size]: sizes )
+  /**
+   * @brief
+   * @tparam GLB_IDX The global index type of the geometrical quantity considered (typically @c EdgeGlbIdx or @c FaceGlbIdx).
+   * @param sizes The sizes of the intersection bucket (from the current MPI rank).
+   * @param offsets The offsets for each intersection bucket (from the MPI_Scan process, i.e. the previous ranks).
+   * @param curRank Current MPI rank.
+   * @return The offset buckets, updated with the sizes of the current MPI rank.
+   */
+  template <typename GLB_IDX>
+  std::map<std::set<MpiRank>, GLB_IDX> updateBucketOffsets(std::map<std::set<MpiRank>, localIndex> const &sizes,
+                                                           std::map<std::set<MpiRank>, GLB_IDX> const &offsets,
+                                                           MpiRank curRank)
   {
-    auto const it = reducedOffsets.find( ranks );
-    if( it == reducedOffsets.end() )
+    std::map<std::set<MpiRank>, GLB_IDX> reducedOffsets;
+
+    // We only keep the offsets that are still relevant to the current and higher ranks.
+    // Note that `reducedOffsets` will be used by the _current_ rank too.
+    // Therefore, we'll send information that may not be useful to higher ranks.
+    // This is surely acceptable because the information is tiny.
+    // We do this by looping through the buckets, finding the beggest rank, and then only adding the bucket and its offset if biggest rank is >= current rank
+    for (auto const &[ranks, offset] : offsets)
     {
-      reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, nextOffset );
-      nextOffset += GLB_IDX{ size };
+      MpiRank const maxConcerned = *std::max_element(ranks.cbegin(), ranks.cend());
+      // TODO invert
+      //    if( maxConcerned >= curRank )
+      //    {
+      //      reducedOffsets.emplace_hint( reducedOffsets.end(), ranks, offset );
+      //    }
+      if (maxConcerned < curRank)
+      {
+        continue;
+      }
+      reducedOffsets.emplace_hint(reducedOffsets.end(), ranks, offset);
     }
-    else
+
+    // Add the offsets associated to the new size buckets from the current rank.
+    GLB_IDX nextOffset{0};
+    // Loop through th buckets in either (edges, faces)
+    for (auto const &[ranks, size] : sizes)
     {
-      nextOffset = it->second + GLB_IDX{ size };  // Define the new offset from the last
+      // See if this bucket was in the previously indexed data
+      // if so, then we just need to update the nextOffset, which will be used for the next new data
+      // if the data is new, then we add its offset to the set of indexed data in reducedOffsets
+      auto const it = reducedOffsets.find(ranks);
+      if (it == reducedOffsets.end())
+      {
+        reducedOffsets.emplace_hint(reducedOffsets.end(), ranks, nextOffset);
+        nextOffset += GLB_IDX{size};
+      }
+      else
+      {
+        nextOffset = it->second + GLB_IDX{size}; // Define the new offset from the last
+      }
     }
-  }
 
-  // Add an extra entry based for the following rank
-  reducedOffsets.emplace_hint( reducedOffsets.end(), std::set< MpiRank >{ curRank + 1_mpi }, nextOffset );
+    // Add an extra entry based for the following rank
+    // There are edge cases where a rank cannot start counting without this
+    reducedOffsets.emplace_hint(reducedOffsets.end(), std::set<MpiRank>{curRank + 1_mpi}, nextOffset);
 
-  return reducedOffsets;
-}
+    return reducedOffsets;
+  }
 
-// TODO Duplicated
-std::map< MpiRank, Exchange > exchange( int commId,
-                                        std::vector< NeighborCommunicator > & neighbors,
-                                        Exchange const & data )
-{
-  MPI_iCommData commData( commId );
-  integer const numNeighbors = LvArray::integerConversion< integer >( neighbors.size() );
-  commData.resize( numNeighbors );
+  // TODO Duplicated
+  std::map<MpiRank, Exchange> exchange(int commId,
+                                       std::vector<NeighborCommunicator> &neighbors,
+                                       Exchange const &data)
+  {
+    MPI_iCommData commData(commId);
+    integer const numNeighbors = LvArray::integerConversion<integer>(neighbors.size());
+    commData.resize(numNeighbors);
 
+    array1d<std::uint8_t> const cv = convertExchange(data);
 
-  array1d< std::uint8_t > const cv = convertExchange( data );
+    for (integer i = 0; i < numNeighbors; ++i)
+    {
+      neighbors[i].mpiISendReceiveSizes(cv,
+                                        commData.mpiSendBufferSizeRequest(i),
+                                        commData.mpiRecvBufferSizeRequest(i),
+                                        commId,
+                                        MPI_COMM_GEOSX);
+    }
 
-  for( integer i = 0; i < numNeighbors; ++i )
-  {
-    neighbors[i].mpiISendReceiveSizes( cv,
-                                       commData.mpiSendBufferSizeRequest( i ),
-                                       commData.mpiRecvBufferSizeRequest( i ),
-                                       commId,
-                                       MPI_COMM_GEOSX );
-  }
+    MpiWrapper::waitAll(numNeighbors, commData.mpiSendBufferSizeRequest(), commData.mpiSendBufferSizeStatus());
+    MpiWrapper::waitAll(numNeighbors, commData.mpiRecvBufferSizeRequest(), commData.mpiRecvBufferSizeStatus());
 
-  MpiWrapper::waitAll( numNeighbors, commData.mpiSendBufferSizeRequest(), commData.mpiSendBufferSizeStatus() );
-  MpiWrapper::waitAll( numNeighbors, commData.mpiRecvBufferSizeRequest(), commData.mpiRecvBufferSizeStatus() );
+    array1d<array1d<std::uint8_t>> rawExchanged(neighbors.size());
 
-  array1d< array1d< std::uint8_t > > rawExchanged( neighbors.size() );
+    for (integer i = 0; i < numNeighbors; ++i)
+    {
+      neighbors[i].mpiISendReceiveData(cv,
+                                       commData.mpiSendBufferRequest(i),
+                                       rawExchanged[i],
+                                       commData.mpiRecvBufferRequest(i),
+                                       commId,
+                                       MPI_COMM_GEOSX);
+    }
+    MpiWrapper::waitAll(numNeighbors, commData.mpiSendBufferRequest(), commData.mpiSendBufferStatus());
+    MpiWrapper::waitAll(numNeighbors, commData.mpiRecvBufferRequest(), commData.mpiRecvBufferStatus());
 
-  for( integer i = 0; i < numNeighbors; ++i )
-  {
-    neighbors[i].mpiISendReceiveData( cv,
-                                      commData.mpiSendBufferRequest( i ),
-                                      rawExchanged[i],
-                                      commData.mpiRecvBufferRequest( i ),
-                                      commId,
-                                      MPI_COMM_GEOSX );
+    std::map<MpiRank, Exchange> output;
+    for (auto i = 0; i < numNeighbors; ++i)
+    {
+      output[MpiRank{neighbors[i].neighborRank()}] = convertExchange(rawExchanged[i]);
+    }
+    return output;
   }
-  MpiWrapper::waitAll( numNeighbors, commData.mpiSendBufferRequest(), commData.mpiSendBufferStatus() );
-  MpiWrapper::waitAll( numNeighbors, commData.mpiRecvBufferRequest(), commData.mpiRecvBufferStatus() );
 
-  std::map< MpiRank, Exchange > output;
-  for( auto i = 0; i < numNeighbors; ++i )
+  /**
+   * @brief
+   * @tparam T Typically NodeGloIdx or a container of NodeGlbIdx (for edges and faces)
+   * @param exchanged
+   * @param curRank
+   * @param neighbors
+   * @return
+   */
+  template <typename T>
+  std::map<std::set<MpiRank>, std::set<T>>
+  buildIntersectionBuckets(std::map<MpiRank, std::set<T> const &> const &exchanged,
+                           MpiRank curRank,
+                           std::set<MpiRank> const &neighbors)
   {
-    output[MpiRank{ neighbors[i].neighborRank() }] = convertExchange( rawExchanged[i] );
-  }
-  return output;
-}
-
-/**
- * @brief
- * @tparam T Typically NodeGloIdx or a container of NodeGlbIdx (for edges and faces)
- * @param exchanged
- * @param curRank
- * @param neighbors
- * @return
- */
-template< typename T >
-std::map< std::set< MpiRank >, std::set< T > >
-buildIntersectionBuckets( std::map< MpiRank, std::set< T > const & > const & exchanged,
-                          MpiRank curRank,
-                          std::set< MpiRank > const & neighbors )
-{
-  std::map< T, std::set< MpiRank > > counts;  // TODO Use better intersection algorithms?
-  // We "register" all the edges of the current rank: they are the only one we're interested in.
-  for( T const & node: exchanged.at( curRank ) )
-  {
-    counts.emplace_hint( counts.end(), node, std::set< MpiRank >{ curRank } );
-  }
+    // counts - which ranks use each (node, edge, face)
+    std::map<T, std::set<MpiRank>> counts; // TODO Use better intersection algorithms?
 
-  // We now loop on the neighbor edges.
-  // If a neighbor has an edge in common with the current rank, they we store it.
-  for( MpiRank const & neighborRank: neighbors )  // This does not include the current rank.
-  {
-    for( T const & node: exchanged.at( neighborRank ) )
+    // We "register" all the edges of the current rank: they are the only one we're interested in.
+    // i.e. now exchanged is just one of (nodes, edges, faces), we add keys for each of the (nodes, edges, faces)
+    // to counts and say it is used by the current rank
+    for (T const &node : exchanged.at(curRank))
+    {
+      counts.emplace_hint(counts.end(), node, std::set<MpiRank>{curRank});
+    }
+
+    // We now loop on the neighbor edges.
+    // If a neighbor has an edge in common with the current rank, they we store it.
+    for (MpiRank const &neighborRank : neighbors) // This does not include the current rank.
     {
-      auto it = counts.find( node );
-      if( it != counts.cend() )  // TODO Extract `counts.cend()` out of the loop.
+      // for each of the (nodes, edges, faces) on the boundary of the other ranks, 
+      // add that that entry is also used by the neighbor rank
+      for (T const &node : exchanged.at(neighborRank))
       {
-        it->second.insert( neighborRank );
+        auto it = counts.find(node);
+        if (it != counts.cend()) // TODO Extract `counts.cend()` out of the loop.
+        {
+          it->second.insert(neighborRank);
+        }
       }
     }
-  }
 
-  std::map< std::set< MpiRank >, std::set< T > > nodeBuckets;
-  for( auto const & [node, ranks]: counts )
-  {
-    if( ranks.find( curRank ) != ranks.cend() )
+    // Counts is essentially the inverse relationship to what we want
+    // now invert so that we have keys as set of MPI ranks sharing and value as the (node, edge, face)
+    std::map<std::set<MpiRank>, std::set<T>> nodeBuckets;
+    for (auto const &[node, ranks] : counts)
     {
-      nodeBuckets[ranks].insert( node );
+      if (ranks.find(curRank) != ranks.cend())
+      {
+        nodeBuckets[ranks].insert(node);
+      }
     }
+    return nodeBuckets;
   }
-  return nodeBuckets;
-}
-
-/**
- * @brief Compute the intersection between for the ranks based on the information @p exchanged.
- * @param exchanged Geometrical information provided by the ranks (including the current rank).
- * @param curRank The current MPI rank.
- * @param neighbors excluding current rank
- * @return The intersection buckets.
- */
-Buckets buildIntersectionBuckets( std::map< MpiRank, Exchange > const & exchanged,
-                                  MpiRank curRank,
-                                  std::set< MpiRank > const & neighbors )
-{
-  std::map< MpiRank, std::set< NodeGlbIdx > const & > nodeInfo;
-  for( auto const & [rank, exchange]: exchanged )
-  {
-    nodeInfo.emplace( rank, exchange.nodes );
-  }
-  std::map< std::set< MpiRank >, std::set< NodeGlbIdx > > const nodeBuckets = buildIntersectionBuckets( nodeInfo, curRank, neighbors );
 
-  std::map< MpiRank, std::set< Edge > const & > edgeInfo;
-  for( auto const & [rank, exchange]: exchanged )
+  /**
+   * @brief Compute the intersection between for the ranks based on the information @p exchanged.
+   * @param exchanged Geometrical information provided by the ranks (including the current rank).
+   * @param curRank The current MPI rank.
+   * @param neighbors excluding current rank
+   * @return The intersection buckets.
+   */
+  Buckets buildIntersectionBuckets(std::map<MpiRank, Exchange> const &exchanged,
+                                   MpiRank curRank,
+                                   std::set<MpiRank> const &neighbors)
   {
-    edgeInfo.emplace( rank, exchange.edges );
-  }
-  std::map< std::set< MpiRank >, std::set< Edge > > const edgeBuckets = buildIntersectionBuckets( edgeInfo, curRank, neighbors );
+    // create storage for the set of nodes owned by each rank, then fill using exchange info
+    // this will be passed to function which computes intersections
+    std::map<MpiRank, std::set<NodeGlbIdx> const &> nodeInfo;
+    for (auto const &[rank, exchange] : exchanged)
+    {
+      nodeInfo.emplace(rank, exchange.nodes);
+    }
+    std::map<std::set<MpiRank>, std::set<NodeGlbIdx>> const nodeBuckets = buildIntersectionBuckets(nodeInfo, curRank, neighbors);
 
-  // For faces, the algorithm can be a tad simpler because faces can be shared by at most 2 ranks.
-  std::map< std::set< MpiRank >, std::set< Face > > faceBuckets;
-  std::set< Face > curFaces = exchanged.at( curRank ).faces;
-  for( MpiRank const & neighborRank: neighbors )  // This does not include the current rank.
-  {
-    for( Face const & face: exchanged.at( neighborRank ).faces )
+    // Repeat the process for the edges
+    std::map<MpiRank, std::set<Edge> const &> edgeInfo;
+    for (auto const &[rank, exchange] : exchanged)
+    {
+      edgeInfo.emplace(rank, exchange.edges);
+    }
+    std::map<std::set<MpiRank>, std::set<Edge>> const edgeBuckets = buildIntersectionBuckets(edgeInfo, curRank, neighbors);
+
+    // For faces, the algorithm can be a tad simpler because faces can be shared by at most 2 ranks.
+    std::map<std::set<MpiRank>, std::set<Face>> faceBuckets;
+    std::set<Face> curFaces = exchanged.at(curRank).faces;
+    for (MpiRank const &neighborRank : neighbors) // This does not include the current rank.
     {
-      auto it = curFaces.find( face );
-      if( it != curFaces.cend() )
+      for (Face const &face : exchanged.at(neighborRank).faces)
       {
-        faceBuckets[{ curRank, neighborRank }].insert( *it );
-        curFaces.erase( it );
+        auto it = curFaces.find(face);
+        if (it != curFaces.cend())
+        {
+          faceBuckets[{curRank, neighborRank}].insert(*it);
+          curFaces.erase(it);
+        }
       }
     }
+    faceBuckets[{curRank}] = curFaces;
+
+    return {nodeBuckets, edgeBuckets, faceBuckets};
   }
-  faceBuckets[{ curRank }] = curFaces;
 
-  return { nodeBuckets, edgeBuckets, faceBuckets };
-}
+  /**
+   * @brief Estimate an upper bound of the serialized size of the size @p buckets.
+   * @param buckets
+   * @return Size in bytes.
+   * @details @c MPI_Scan requires a fixed buffer size.
+   * An a-priori estimation of the maximum size of the buffer leads to crazy amounts of memory
+   * because of the combinatorial pattern of the rank intersections.
+   * Therefore, we use an initial MPI communication to get a better estimation.
+   * @node We don't care about the @c nodes because their global numbering is already done.
+   */
+  std::size_t buildMaxBufferSize(Buckets const &buckets) // TODO give the size buckets instead?
+  {
+    // this is the custom MPI reduction function we use in the sum across ranks
+    auto const f = [](auto const &bucket) -> std::size_t
+    {
+      std::size_t size = std::size(bucket);
+      for (auto const &[ranks, _] : bucket)
+      {
+        size += std::size(ranks) + 1 + 1; // One `+1` for the size of the ranks set, the other one for the offset
+      }
+      return size;
+    };
 
+    return MpiWrapper::sum(f(buckets.edges) + f(buckets.faces)); // Add the ScannedOffsets::{edge, face}Restart
+  }
 
-/**
- * @brief Estimate an upper bound of the serialized size of the size @p buckets.
- * @param buckets
- * @return Size in bytes.
- * @details @c MPI_Scan requires a fixed buffer size.
- * An a-priori estimation of the maximum size of the buffer leads to crazy amounts of memory
- * because of the combinatorial pattern of the rank intersections.
- * Therefore, we use an initial MPI communication to get a better estimation.
- * @node We don't care about the @c nodes because their global numbering is already done.
- */
-std::size_t buildMaxBufferSize( Buckets const & buckets )  // TODO give the size buckets instead?
-{
-  auto const f = []( auto const & bucket ) -> std::size_t
+  struct BucketSizes
   {
-    std::size_t size = std::size( bucket );
-    for( auto const & [ranks, _]: bucket )
-    {
-      size += std::size( ranks ) + 1 + 1;  // One `+1` for the size of the ranks set, the other one for the offset
-    }
-    return size;
+    using mapping = std::map<std::set<MpiRank>, localIndex>;
+    mapping edges;
+    mapping faces;
   };
 
-  return MpiWrapper::sum( f( buckets.edges ) + f( buckets.faces ) );  // Add the ScannedOffsets::{edge, face}Restart
-}
+  void to_json(json &j,
+               const BucketSizes &v)
+  {
+    j = json{{"edges", v.edges},
+             {"faces", v.faces}};
+  }
 
-struct BucketSizes
-{
-  using mapping = std::map< std::set< MpiRank >, localIndex >;
-  mapping edges;
-  mapping faces;
-};
+  void from_json(const json &j,
+                 BucketSizes &v)
+  {
+    v.edges = j.at("edges").get<BucketSizes::mapping>();
+    v.faces = j.at("faces").get<BucketSizes::mapping>();
+  }
 
-void to_json( json & j,
-              const BucketSizes & v )
-{
-  j = json{ { "edges", v.edges },
-            { "faces", v.faces } };
-}
+  std::vector<std::uint8_t> serialize(BucketSizes const &sizes)
+  {
+    return json::to_cbor(json(sizes));
+  }
 
-void from_json( const json & j,
-                BucketSizes & v )
-{
-  v.edges = j.at( "edges" ).get< BucketSizes::mapping >();
-  v.faces = j.at( "faces" ).get< BucketSizes::mapping >();
-}
+  std::vector<std::uint8_t> serialize(BucketOffsets const &offsets)
+  {
+    return json::to_cbor(json(offsets));
+  }
 
-std::vector< std::uint8_t > serialize( BucketSizes const & sizes )
-{
-  return json::to_cbor( json( sizes ) );
-}
+  /**
+   * @brief Compute the sizes of the intersection buckets.
+   * @param buckets The intersection buckets.
+   * @return The buckets of sizes.
+   */
+  BucketSizes getBucketSize(Buckets const &buckets)
+  {
+    BucketSizes output;
 
-std::vector< std::uint8_t > serialize( BucketOffsets const & offsets )
-{
-  return json::to_cbor( json( offsets ) );
-}
+    for (auto const &[ranks, edges] : buckets.edges)
+    {
+      output.edges.emplace_hint(output.edges.end(), ranks, std::size(edges));
+    }
 
-/**
- * @brief Compute the sizes of the intersection buckets.
- * @param buckets The intersection buckets.
- * @return The buckets of sizes.
- */
-BucketSizes getBucketSize( Buckets const & buckets )
-{
-  BucketSizes output;
+    for (auto const &[ranks, faces] : buckets.faces)
+    {
+      output.faces.emplace_hint(output.faces.end(), ranks, std::size(faces));
+    }
 
-  for( auto const & [ranks, edges]: buckets.edges )
-  {
-    output.edges.emplace_hint( output.edges.end(), ranks, std::size( edges ) );
+    return output;
   }
 
-  for( auto const & [ranks, faces]: buckets.faces )
+  /**
+   * @brief
+   * @tparam V Container of std::uint8_t
+   * @param data
+   * @return
+   */
+  template <class V>
+  BucketOffsets deserialize(V const &data)
   {
-    output.faces.emplace_hint( output.faces.end(), ranks, std::size( faces ) );
+    return json::from_cbor(data, false).template get<BucketOffsets>();
   }
 
-  return output;
-}
+  /**
+   * @brief Custom MPI reduction function. Merges the sizes of the bucket from current rank into numbering offsets.
+   * @param[in] in Contains the reduced result from the previous ranks. Needs to be unpacked into a @c BucketOffsets instance.
+   * @param[inout] inout As an @e input, contains a pointer to the @c BucketSizes instance from the current rank.
+   * As an @e output, will contained the (packed) instance of @c BucketOffsets after the @c reduction.
+   * The @e output instance will be used by both current and next ranks.
+   * @param[in] len Number of data.
+   * @param[in] dataType Type of data. Mean to be @c MPI_BYTE for the current case.
+   */
+  void f(void *in,
+         void *inout,
+         int *len,
+         MPI_Datatype *dataType)
+  {
+    GEOS_ASSERT_EQ(*dataType, MPI_BYTE);
 
-/**
- * @brief
- * @tparam V Container of std::uint8_t
- * @param data
- * @return
- */
-template< class V >
-BucketOffsets deserialize( V const & data )
-{
-  return json::from_cbor( data, false ).template get< BucketOffsets >();
-}
-
-/**
- * @brief Custom MPI reduction function. Merges the sizes of the bucket from current rank into numbering offsets.
- * @param[in] in Contains the reduced result from the previous ranks. Needs to be unpacked into a @c BucketOffsets instance.
- * @param[inout] inout As an @e input, contains a pointer to the @c BucketSizes instance from the current rank.
- * As an @e output, will contained the (packed) instance of @c BucketOffsets after the @c reduction.
- * The @e output instance will be used by both current and next ranks.
- * @param[in] len Number of data.
- * @param[in] dataType Type of data. Mean to be @c MPI_BYTE for the current case.
- */
-void f( void * in,
-        void * inout,
-        int * len,
-        MPI_Datatype * dataType )
-{
-  GEOS_ASSERT_EQ( *dataType, MPI_BYTE );
+    MpiRank const curRank{MpiWrapper::commRank()};
+
+    // offsets provided by the previous rank(s)
+    BucketOffsets const offsets = deserialize(Span<std::uint8_t>((std::uint8_t *)in, *len));
+
+    // Sizes provided by the current rank, under the form of a pointer to the data.
+    // No need to serialize, since we're on the same rank.
+    std::uintptr_t addr;
+    std::memcpy(&addr, inout, sizeof(std::uintptr_t));
+    BucketSizes const *sizes = reinterpret_cast<BucketSizes const *>(addr);
 
-  MpiRank const curRank{ MpiWrapper::commRank() };
+    BucketOffsets updatedOffsets;
+    updatedOffsets.edges = updateBucketOffsets<EdgeGlbIdx>(sizes->edges, offsets.edges, curRank);
+    updatedOffsets.faces = updateBucketOffsets<FaceGlbIdx>(sizes->faces, offsets.faces, curRank);
 
-  // offsets provided by the previous rank(s)
-  BucketOffsets const offsets = deserialize( Span< std::uint8_t >( (std::uint8_t *) in, *len ) );
+    // Serialize the updated offsets, so they get sent to the next rank.
+    std::vector<std::uint8_t> const serialized = serialize(updatedOffsets);
+    std::memcpy(inout, serialized.data(), serialized.size());
+  }
 
-  // Sizes provided by the current rank, under the form of a pointer to the data.
-  // No need to serialize, since we're on the same rank.
-  std::uintptr_t addr;
-  std::memcpy( &addr, inout, sizeof( std::uintptr_t ) );
-  BucketSizes const * sizes = reinterpret_cast<BucketSizes const *>(addr);
+  std::tuple<Buckets, BucketOffsets> doTheNewGlobalNumbering(vtkSmartPointer<vtkDataSet> mesh,
+                                                             std::set<MpiRank> const &neighbors)
+  {
+    // Now we exchange the data with our neighbors.
+    MpiRank const curRank{MpiWrapper::commRank()};
 
-  BucketOffsets updatedOffsets;
-  updatedOffsets.edges = updateBucketOffsets< EdgeGlbIdx >( sizes->edges, offsets.edges, curRank );
-  updatedOffsets.faces = updateBucketOffsets< FaceGlbIdx >( sizes->faces, offsets.faces, curRank );
+    // ncs holds communicators to the neighboring ranks
+    std::vector<NeighborCommunicator> ncs;
+    ncs.reserve(neighbors.size());
+    for (MpiRank const &rank : neighbors)
+    {
+      ncs.emplace_back(rank.get());
+    }
 
-  // Serialize the updated offsets, so they get sent to the next rank.
-  std::vector< std::uint8_t > const serialized = serialize( updatedOffsets );
-  std::memcpy( inout, serialized.data(), serialized.size() );
-}
+    CommID const commId = CommunicationTools::getInstance().getCommID();
 
-std::tuple< Buckets, BucketOffsets > doTheNewGlobalNumbering( vtkSmartPointer< vtkDataSet > mesh,
-                                                              std::set< MpiRank > const & neighbors )
-{
-  // Now we exchange the data with our neighbors.
-  MpiRank const curRank{ MpiWrapper::commRank() };
+    // On the current rank, we have a map from the neighbor MPI rank to a corresponding set of exchanged data (sets of nodes, edges, faces)
+    // buildExchangeData(mesh) just extracts the boundary (faces) of the current rank mesh
+    // exchange() actually does the communication, so that the output exchanged maps from nhbr rank to their boundary data
+    std::map<MpiRank, Exchange> exchanged = exchange(int(commId), ncs, buildExchangeData(mesh));
 
-  std::vector< NeighborCommunicator > ncs;
-  ncs.reserve( neighbors.size() );
-  for( MpiRank const & rank: neighbors )
-  {
-    ncs.emplace_back( rank.get() );
-  }
+    // Then I guess we just put the full mesh data for the current rank into the corresponding slot of exchanged, i think this is just convenient
+    exchanged[MpiRank{MpiWrapper::commRank()}] = buildFullData(mesh);
 
-  CommID const commId = CommunicationTools::getInstance().getCommID();
-  std::map< MpiRank, Exchange > exchanged = exchange( int( commId ), ncs, buildExchangeData( mesh ) );
-  exchanged[MpiRank{ MpiWrapper::commRank() }] = buildFullData( mesh );
+     //std::cout << "exchanged on rank " << curRank << " -> " << json( exchanged[MpiRank{ MpiWrapper::commRank() }] ) << std::endl;
 
-//  std::cout << "exchanged on rank " << curRank << " -> " << json( exchanged[MpiRank{ MpiWrapper::commRank() }] ) << std::endl;
+    // Buckets - for each of (nodes, edges, faces), map from set of MPI ranks to the set of (nodes, edges, faces) shared by those ranks
+    Buckets const buckets = buildIntersectionBuckets(exchanged, curRank, neighbors);
 
-  Buckets const buckets = buildIntersectionBuckets( exchanged, curRank, neighbors );
+    // Next step is communication. 
+    // We will be passing around info about of the each ranks' buckets, and to do this we need to know the size of the data to be sent
+    // Technically this changes, as each rank has a different number of buckets
+    // For MPI scan, we need a fixed send/recv size, so we estimate with a large buffer size
+    // We dont want to do this too naively, as just taking the total possible cobinations of rank intersections leads to a massive size
+    // Instead, we do an MPI operation here to estimate an appropriate size
+    // The idea is to just sum the size of buckets across the ranks
+    std::size_t const maxBufferSize = 100 * buildMaxBufferSize(buckets);
 
-  std::size_t const maxBufferSize = 100 * buildMaxBufferSize( buckets );
+    std::vector<std::uint8_t> sendBuffer(maxBufferSize);
+    std::vector<std::uint8_t> recvBuffer(maxBufferSize);
 
-  std::vector< std::uint8_t > sendBuffer( maxBufferSize );
-  std::vector< std::uint8_t > recvBuffer( maxBufferSize );
+    // The idea is that each rank needs to know where to start indexing the (nodes, edges, faces) it owns
+    // To do this, we need BucketOffsets, which are like buckets, but instead associate a global index to each set of MPI ranks, which marks the index of the first (node, edge, face) in that bucket
+    // To get this, we need to know the size of each bucket on the current rank, stored in BucketSizes
+    BucketSizes const sizes = getBucketSize(buckets);
+    BucketOffsets offsets;
+    if (curRank == 0_mpi)
+    {
+      // The `MPI_Scan` process will not call the reduction operator for rank 0.
+      // So we need to reduce for ourselves.
+      // updateBucketOffsets just takes the bucket sizes on the current rank,
+      // the previously computed offsets from other MPI ranks (none in this case),
+      // and then does the additions so that the global indices for the current buckets are known in offsets
+      offsets.edges = updateBucketOffsets<EdgeGlbIdx>(sizes.edges, {{{
+                                                                         0_mpi,
+                                                                     },
+                                                                     0_egi}},
+                                                      curRank);
+      offsets.faces = updateBucketOffsets<FaceGlbIdx>(sizes.faces, {{{
+                                                                         0_mpi,
+                                                                     },
+                                                                     0_fgi}},
+                                                      curRank);
+      // Still we need to send this reduction to the following rank, by copying to it to the send buffer.
+      std::vector<std::uint8_t> const bytes = serialize(offsets);
+      std::memcpy(sendBuffer.data(), bytes.data(), bytes.size());
+    }
+    else
+    {
+      // For the other ranks, the reduction operator will be called during the `Mpi_Scan` process.
+      // So unlike for rank 0, we do not have to do it ourselves.
+      // In order to provide the `sizes` to the reduction operator,
+      // since `sizes` will only be used on the current rank,
+      // we'll provide the information as a pointer to the instance.
+      // The reduction operator will then compute the new offsets and send them to the following rank.
+      std::uintptr_t const addr = reinterpret_cast<std::uintptr_t>(&sizes);
+      std::memcpy(sendBuffer.data(), &addr, sizeof(std::uintptr_t));
+    }
 
-  BucketSizes const sizes = getBucketSize( buckets );
-  BucketOffsets offsets;
-  if( curRank == 0_mpi )
-  {
-    // The `MPI_Scan` process will not call the reduction operator for rank 0.
-    // So we need to reduce ourselves for ourselves.
-    offsets.edges = updateBucketOffsets< EdgeGlbIdx >( sizes.edges, { { { 0_mpi, }, 0_egi } }, curRank );
-    offsets.faces = updateBucketOffsets< FaceGlbIdx >( sizes.faces, { { { 0_mpi, }, 0_fgi } }, curRank );
-    // Still we need to send this reduction to the following rank, by copying to it to the send buffer.
-    std::vector< std::uint8_t > const bytes = serialize( offsets );
-    std::memcpy( sendBuffer.data(), bytes.data(), bytes.size() );
-  }
-  else
-  {
-    // For the other ranks, the reduction operator will be called during the `Mpi_Scan` process.
-    // So unlike for rank 0, we do not have to do it ourselves.
-    // In order to provide the `sizes` to the reduction operator,
-    // since `sizes` will only be used on the current rank,
-    // we'll provide the information as a pointer to the instance.
-    // The reduction operator will then compute the new offsets and send them to the following rank.
-    std::uintptr_t const addr = reinterpret_cast<std::uintptr_t>(&sizes);
-    std::memcpy( sendBuffer.data(), &addr, sizeof( std::uintptr_t ) );
-  }
+    // f is our mpi reduction function, all it essentially does is call the updateBucketOffsets as above,
+    // but it also handles the packing/unpacking of transferred data
+    // Note that, right now, packing/unpacking is done via json, this should be changed to GEOS packing routines
+    MPI_Op op;
+    MPI_Op_create(f, false, &op);
 
-  MPI_Op op;
-  MPI_Op_create( f, false, &op );
+    MPI_Scan(sendBuffer.data(), recvBuffer.data(), maxBufferSize, MPI_BYTE, op, MPI_COMM_WORLD);
 
-  MPI_Scan( sendBuffer.data(), recvBuffer.data(), maxBufferSize, MPI_BYTE, op, MPI_COMM_WORLD );
+    if (curRank != 0_mpi)
+    {
+      offsets = deserialize(recvBuffer);
+    }
 
-  if( curRank != 0_mpi )
-  {
-    offsets = deserialize( recvBuffer );
+    // With the offsets for each bucket, we are done with global numbering, 
+    // as each (edge, face) is numbered by starting from the offset for that bucket and counting up
+    return {std::move(buckets), std::move(offsets)};
   }
 
-  return { std::move( buckets ), std::move( offsets ) };
-}
-
 } // end of namespace
\ No newline at end of file

From 8fb37d7f7d54f6ac416a082288a6d3688c76a81c Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Mon, 24 Jun 2024 18:14:57 +0000
Subject: [PATCH 2/9] comments for constructing owned and present mesh data
 (Ryan's version)

---
 .../mesh/generators/NewGhosting.cpp           | 61 +++++++++++++++++--
 .../mesh/generators/NewGlobalNumbering.cpp    |  7 ++-
 2 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index da484f44b93..413cd7aa9d2 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -84,6 +84,9 @@ MaxGlbIdcs gatherOffset( vtkSmartPointer< vtkDataSet > mesh,
                          EdgeGlbIdx const & maxEdgeId,
                          FaceGlbIdx const & maxFaceId )
 {
+  // MaxGlbIdcs is just a struct of the 4 index types (ints under the hood)
+  // edge and face max (global) index values are passed in, 
+  // we use a little lambda below to get the node and cell max indices
   MaxGlbIdcs offsets{ NodeGlbIdx{ 0 }, maxEdgeId, maxFaceId, CellGlbIdx{ 0 } };
 
   auto const extract = []( vtkDataArray * globalIds ) -> vtkIdType
@@ -96,6 +99,9 @@ MaxGlbIdcs gatherOffset( vtkSmartPointer< vtkDataSet > mesh,
   offsets.nodes = NodeGlbIdx{ extract( mesh->GetPointData()->GetGlobalIds() ) };
   offsets.cells = CellGlbIdx{ extract( mesh->GetCellData()->GetGlobalIds() ) };
 
+  // I would have to look in detail about what these MPI lines do,
+  // But im fairly sure they are just handling type stuff so that you can do communications
+  // with the MaxGlbIdcs type
   // Otherwise, use `MPI_Type_create_struct`.
   static_assert( std::is_same_v< NodeGlbIdx::UnderlyingType, EdgeGlbIdx::UnderlyingType > );
   static_assert( std::is_same_v< NodeGlbIdx::UnderlyingType, FaceGlbIdx::UnderlyingType > );
@@ -110,6 +116,8 @@ MaxGlbIdcs gatherOffset( vtkSmartPointer< vtkDataSet > mesh,
   MPI_Op op;
   MPI_Op_create( g, true, &op );
 
+  // Do an allReduce so that every rank knows the max index for (nodes, edges, faces, cells)
+  // g is the custom reduction function which takes max for each (nodes, edges, faces, cells)
   MaxGlbIdcs result( offsets );
 
   MPI_Allreduce( &offsets, &result, 1, t, op, MPI_COMM_WORLD );
@@ -170,13 +178,25 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet >
                                                    BucketOffsets const & offsets,
                                                    MpiRank curRank )
 {
+  // MeshGraph (defined in BuildPods.hpp) is just a struct of maps which map: 
+  // cell global index to vector of entries of type (face global index, extra stuff defining orientation of face) for each of the adjacent faces
+  // face global index to vector of entries of type (edge global index plus orientation) for each of the adjacent edges
+  // edge global index to tuple of node global indices
+  // node global index to 3d array of position
+  // We distinguish between the owned data and the present data
+  // present data is stuff that is owned by another rank, but is already present on the current rank because of the boundary exchange done previously.
+  // Note that present just includes the 2D boundary between ranks, we still need the cell info, the other faces, edges, nodes which make up that cell
+  // (and further if we wanted deeper ghosting)
   MeshGraph owned, present;
 
+  // Any (node, edge, face, cell) is owned by the lowest rank on which it appears
+  // so we define a lambda to compute if the current rank owns some piece of data
   auto const isCurrentRankOwning = [&curRank]( std::set< MpiRank > const & ranks ) -> bool
   {
     return curRank == *std::min_element( std::cbegin( ranks ), std::cend( ranks ) );
   };
 
+  // Get the nodal positions for nodes on current rank mesh and store in appropriate MeshGraph (owned/present)
   std::map< NodeGlbIdx, vtkIdType > const n2vtk = buildNgiToVtk( mesh );
   for( auto const & [ranks, nodes]: buckets.nodes )
   {
@@ -188,10 +208,13 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet >
     }
   }
 
+  // Loop through edge data and assign to the owned/present graph
   for( auto const & [ranks, edges]: buckets.edges )
   {
     auto & e2n = isCurrentRankOwning( ranks ) ? owned.e2n : present.e2n;
-    EdgeGlbIdx egi = offsets.edges.at( ranks );  // TODO hack
+    // im not sure why this is a 'hack', this seems like the way I would do it
+    // namely get the edge offset for this bucket, then count upwards in global Id for each edge in bucket
+    EdgeGlbIdx egi = offsets.edges.at( ranks );  // TODO hack 
     for( Edge const & edge: edges )
     {
       e2n[egi] = edge;
@@ -206,33 +229,40 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet >
     e2n.insert( std::cbegin( m ), std::cend( m ) );
   }
 
-  // Simple inversion
+  // Simple inversion of the e2n (which didnt care about ownership) to a node2edge (which therefore also doesnt care about ownership)
+  // this is used in constructing the face2edge data below
   std::map< std::tuple< NodeGlbIdx, NodeGlbIdx >, EdgeGlbIdx > n2e;
   for( auto const & [e, n]: e2n )
   {
     n2e[n] = e;  // TODO what about ownership?
   }
 
+  // Loop through the face data to assign to owned/present graph
   for( auto const & [ranks, faces]: buckets.faces )
   {
     auto & f2e = isCurrentRankOwning( ranks ) ? owned.f2e : present.f2e;
 
+    // grab initial index for faces in this bucket and then loop over these faces
     FaceGlbIdx fgi = offsets.faces.at( ranks );
     for( Face face: faces )  // Intentional copy for the future `emplace_back`.
     {
-      face.emplace_back( face.front() );  // Trick to build the edges.
+      face.emplace_back( face.front() );  // Trick to build the edges. Namely you need to build the edge that connects the last node to the first
       for( std::size_t i = 0; i < face.size() - 1; ++i )
       {
+        // An edge is just the pair of node global indices, plus a boolean telling you the order
+        // This is placed into an EdgeInfo struct (defined in buildPods.hpp, but its just this info)
         NodeGlbIdx const & n0 = face[i], & n1 = face[i + 1];
         std::pair< NodeGlbIdx, NodeGlbIdx > const p0 = std::make_pair( n0, n1 );
         std::pair< NodeGlbIdx, NodeGlbIdx > const p1 = std::minmax( n0, n1 );
-        EdgeInfo const info = { n2e.at( p1 ), std::uint8_t{ p0 != p1 } };
-        f2e[fgi].emplace_back( info );
+        EdgeInfo const info = { n2e.at( p1 ), std::uint8_t{ p0 != p1 } }; // n2e is used to get the edge global index from the node global indices
+        f2e[fgi].emplace_back( info ); // assign data to proper graph
       }
-      ++fgi;
+      ++fgi; // increment the offset for the bucket
     }
   }
 
+  // create a map from node global indices to face global index which does not care about ownership, similar to n2e above
+  // this will be used in generating cell2face data, like how n2e was used in generating face2edge
   std::map< std::vector< NodeGlbIdx >, FaceGlbIdx > n2f;
   for( auto const & [ranks, faces]: buckets.faces )
   {
@@ -253,6 +283,7 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet >
     // TODO copy paste?
     for( auto f = 0; f < cell->GetNumberOfFaces(); ++f )
     {
+      // for each of the faces in the cell, get the global ids of the nodes
       vtkCell * face = cell->GetFace( f );
       vtkIdList * pids = face->GetPointIds();
       std::vector< NodeGlbIdx > faceNodes( pids->GetNumberOfIds() );
@@ -262,9 +293,17 @@ std::tuple< MeshGraph, MeshGraph > buildMeshGraph( vtkSmartPointer< vtkDataSet >
         vtkIdType const ngi = globalPtIds->GetValue( nli );
         faceNodes[i] = NodeGlbIdx{ ngi };
       }
+
+      // these nodes will be in the vtk ordering, we want a consistent ordering for each face
+      // reorderFaceNodes (defined in NewGlobalNumbering) does this by starting with the smallest node index,
+      // and then iterating in the direction of the smaller of its neighbors
       bool isFlipped;
       std::uint8_t start;
       std::vector< NodeGlbIdx > const reorderedFaceNodes = reorderFaceNodes( faceNodes, isFlipped, start );
+
+      // add the face data to the vector for this cell in owned graph.
+      // note there will never be any 'present' cells, as the earlier exchange only communicates the 2D boundary btwn ranks
+      // note we are using the n2f built above to go from the vector of nodes (properly ordered) to the face global ID
       owned.c2f[gci].emplace_back( FaceInfo{ n2f.at( reorderedFaceNodes ), isFlipped, start } );
     }
   }
@@ -1131,10 +1170,20 @@ void doTheNewGhosting( vtkSmartPointer< vtkDataSet > mesh,
 
   GEOS_LOG_RANK( "offsets on rank " << curRank << " -> " << json( offsets ) );
 
+  // Now we grab the biggest global indices for each (nodes, edges, faces, cells) as this will help build the adjacency matrix
+  // gatherOffset does an allReduce of max indices on each rank
+  // note we use the extra entry in the offsets for the next rank which specifies where it starts to get the max index on this rank
+  // so we dont have to do any extra counting or anything in the current rank buckets
   MpiRank const nextRank = curRank + 1_mpi;
   MaxGlbIdcs const matrixOffsets = gatherOffset( mesh, offsets.edges.at( { nextRank } ) - 1_egi, offsets.faces.at( { nextRank } ) - 1_fgi );
   std::cout << "matrixOffsets on rank " << curRank << " -> " << json( matrixOffsets ) << std::endl;
 
+  // Now we build to data to describe the mesh data on the current rank. 
+  // owned and present are struct of maps which map: 
+  // cell global index to vector of entries of type (face global index, extra stuff defining orientation of face) for each of the adjacent faces
+  // face global index to vector of entries of type (edge global index plus orientation) for each of the adjacent edges
+  // edge global index to tuple of node global indices
+  // node global index to 3d array of position
   auto const [owned, present] = buildMeshGraph( mesh, buckets, offsets, curRank );  // TODO change into buildOwnedMeshGraph?
 //  if( curRank == 1_mpi )
 //  {
diff --git a/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp b/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp
index 9b28694099b..e9fc77c6cdb 100644
--- a/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp
+++ b/src/coreComponents/mesh/generators/NewGlobalNumbering.cpp
@@ -99,22 +99,27 @@ namespace geos::ghosting
 
     return boundaryCellIdxs;
   }
-
+  // Reorder nodes by starting with the smallest node index, and then iterating in the direction of the smaller of its neighbors
   Face reorderFaceNodes(std::vector<NodeGlbIdx> const &nodes, bool &isFlipped, std::uint8_t &start) // TODO unit test
   {
     std::size_t const n = nodes.size();
 
     std::vector<NodeGlbIdx> f(nodes);
 
+    // Get the smallest global node Id in the face
     auto const cit = std::min_element(std::cbegin(nodes), std::cend(nodes));
     auto const minIdx = std::distance(std::cbegin(nodes), cit);
 
+    // get the adjecent node gloabl IDs in he vector (treated as a circular buffer)
     std::size_t const prevIdx = minIdx == 0 ? n - 1 : minIdx - 1;
     std::size_t const nextIdx = minIdx == intConv<int>(n) - 1 ? 0 : minIdx + 1;
 
+    // we will order by starting with the smallest ID and then iterating in the direction of its smaller neighbor
+    // this is encoded by isFlipped boolean (true if we iterate "left" from the start)
     start = intConv<std::uint8_t>(minIdx);
     isFlipped = nodes[prevIdx] < nodes[nextIdx];
 
+    // return the re-ordered nodes vector, isFlipped and start have also been set by this function
     std::size_t const pivot = isFlipped ? n - 1 - minIdx : minIdx;
     if (isFlipped)
     {

From 02e899ee24a2fd6367a997e147d7c520c6c98b6c Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Tue, 25 Jun 2024 20:30:35 +0000
Subject: [PATCH 3/9] comments for construction of alocal djacency matrix data
 by each rank (Ryan's version)

---
 .../mesh/generators/NewGhosting.cpp           | 70 ++++++++++++++++---
 1 file changed, 61 insertions(+), 9 deletions(-)

diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index 98c8c5b2218..8ac94be79fc 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -398,6 +398,9 @@ TriCrsMatrix multiply( int commSize,
   return result_d0_2;
 }
 
+// Helper class which converts between matrix indices and (node, edge, face, cell) indices. 
+// The matrix rows/cols represent all the nodes, then all the edges, then all the faces, then all the cells
+// so conversion just means adding/subtracting the proper offsets
 class FindGeometricalType
 {
 public:
@@ -491,6 +494,14 @@ class FindGeometricalType
   TriGlbIdx const m_numEntries;
 };
 
+// Helper function to encode geometrical information into ints which can be entered into adjacency matrix
+// For example, a face is connected to multiple edges, so in the face row, there will be a nonzero entry in the col for each edge
+// But if we just put a 1 in each we lose the information like what order the edges were in and what order the nodes were in
+// So instead we put in a value such that we can reconstruct this info if needed later
+// For encoding the edges within a face, N = 2, basis = NumEdges, array = {start node (0,1), index of edge within face},
+// so that result = start_node*numEdges+index_in_face
+// For encoding faces within a cell, N = 3, basis = numFaces,  array =  { isFlipped, start_index, index of face in cell },
+// so that result = (isFlipped*numFaces + start_index)*numFaces + index_in_cell
 template< std::uint8_t N >
 std::size_t encode( std::size_t const & basis,
                     std::array< std::size_t, N > const & array )
@@ -526,6 +537,7 @@ std::array< std::size_t, N > decode( std::size_t const & basis,
   return result;
 }
 
+// Structure which describes the adjacency matrix for the mesh graph with the data on current rank
 struct Adjacency
 {
   std::vector< TriGlbIdx > ownedNodesIdcs;
@@ -536,16 +548,20 @@ struct Adjacency
   std::vector< std::vector< TriScalarInt > > values;
 };
 
+// Function to populate the adjacency struct defined directly above
+// Note that we really build I+A, as in the end we will essentially be doing graph traversal by multiplying by (I+A)^n, ((I+A)^T)^n
+// Inlcuding I gives you everything in the graph 'up to' n steps away, without it you only get things 'exactly' n steps away
 Adjacency buildAdjacency( MeshGraph const & owned,
                           MeshGraph const & present,
                           FindGeometricalType const & convert )
 {
   Adjacency adjacency;
 
+  // Num matrix indices owned by the current rank, and the num indices corresponding to the 'present' data
   std::size_t const numOwned = std::size( owned.n2pos ) + std::size( owned.e2n ) + std::size( owned.f2e ) + std::size( owned.c2f );
   std::size_t const numOther = std::size( present.n2pos ) + std::size( present.e2n ) + std::size( present.f2e );
 
-  // Aliases
+  // Aliases for the data in adjacency
   std::vector< TriGlbIdx > & ownedNodesIdcs = adjacency.ownedNodesIdcs;
   std::vector< TriGlbIdx > & ownedGlbIdcs = adjacency.ownedGlbIdcs;
   std::vector< TriGlbIdx > & otherGlbIdcs = adjacency.otherGlbIdcs;
@@ -553,6 +569,7 @@ Adjacency buildAdjacency( MeshGraph const & owned,
   std::vector< std::vector< TriGlbIdx > > & indices = adjacency.indices;
   std::vector< std::vector< TriScalarInt > > & values = adjacency.values;
 
+  // we can go ahead and set sizes for the current rank matrix data
   ownedNodesIdcs.reserve( std::size( owned.n2pos ) );
   ownedGlbIdcs.reserve( numOwned );
   otherGlbIdcs.reserve( numOther );
@@ -560,6 +577,9 @@ Adjacency buildAdjacency( MeshGraph const & owned,
   indices.reserve( numOwned );
   values.reserve( numOwned );
 
+  // fill the matrix index data for the 'present' gemetrical entities (nodes, edges, faces)
+  // Note that here we just note the indices, we dont fill any matrix entry or anything
+  // The filling of the matrix entries is done by the owning rank
   for( auto const & [ngi, _]: present.n2pos )
   {
     otherGlbIdcs.emplace_back( convert.fromNodeGlbIdx( ngi ) );
@@ -572,9 +592,10 @@ Adjacency buildAdjacency( MeshGraph const & owned,
   {
     otherGlbIdcs.emplace_back( convert.fromFaceGlbIdx( fgi ) );
   }
-  std::sort( std::begin( otherGlbIdcs ), std::end( otherGlbIdcs ) );
-  GEOS_ASSERT_EQ( numOther, std::size( otherGlbIdcs ) );
+  std::sort( std::begin( otherGlbIdcs ), std::end( otherGlbIdcs ) ); // I think that the data in n2pos, e2n, f2e was not necessarily sorted
+  GEOS_ASSERT_EQ( numOther, std::size( otherGlbIdcs ) ); // ensure we added the correct amount of stuff
 
+  // Now do owned data
   for( auto const & [ngi, _]: owned.n2pos )
   {
     // Nodes depend on no other geometrical entity,
@@ -594,11 +615,15 @@ Adjacency buildAdjacency( MeshGraph const & owned,
     // To keep the symmetry with the faces (see below),
     // - we store the local index (0 or 1) to express this information.
     // - we store the number of underlying nodes (2) on the diagonal.
-    size_t constexpr numNodes = std::tuple_size_v< decltype( nodes ) >;
+    size_t constexpr numNodes = std::tuple_size_v< decltype( nodes ) >; // should always be 2
 
+    // This rank owns this row of the matrix
     ownedGlbIdcs.emplace_back( convert.fromEdgeGlbIdx( egi ) );
 
     numEntriesPerRow.push_back( numNodes + 1 );  // `+1` comes from the diagonal
+    
+    // for the row correxponding to this edge global index, have entries in col corresponding to the
+    // node global indices it connects, and an entry on the diagonal
     indices.emplace_back( std::vector< TriGlbIdx >{ convert.fromNodeGlbIdx( std::get< 0 >( nodes ) ),
                                                     convert.fromNodeGlbIdx( std::get< 1 >( nodes ) ),
                                                     ownedGlbIdcs.back() } );
@@ -612,24 +637,33 @@ Adjacency buildAdjacency( MeshGraph const & owned,
     // Faces point to multiple edges, but their edges can be flipped w.r.t. the canonical edges
     // (ie minimal node global index comes first).
     // In order not to lose this information (held by an `EdgeInfo` instance), we serialise
-    // - the `EdgeInfo` instance,
+    // - the `EdgeInfo` instance, (recall `EdgeInfo` is just struct with edge global ID and start node (0,1), see BuildPods.hpp)
     // - plus the order in which the edges are appearing,
     // as an integer and store it as an entry in the matrix.
     // On the diagonal, we'll store the number of edges of the face.
     std::size_t const numEdges = std::size( edges );
 
+    // This rank owns this row of the matrix
     ownedGlbIdcs.emplace_back( convert.fromFaceGlbIdx( fgi ) );
 
     numEntriesPerRow.push_back( numEdges + 1 );  // `+1` comes from the diagonal
+
+    // go ahead and add a vector of the correct size to represent the col indices and values for this row of the matrix (not yet populated)
     std::vector< TriGlbIdx > & ind = indices.emplace_back( numEntriesPerRow.back() );
     std::vector< TriScalarInt > & val = values.emplace_back( numEntriesPerRow.back() );
+
+    // populate the data for this row of the matrix
     for( std::size_t i = 0; i < numEdges; ++i )
     {
+      // col index is just obtained from edge global id in EdgeInfo for this edge
       EdgeInfo const & edgeInfo = edges[i];
       ind[i] = convert.fromEdgeGlbIdx( edgeInfo.index );
+      // use a single int for each edge col index to represent if the edge is flipped and position of edge in face
+      // in this case v = 1 + start_node*numEdges + index_in_face (again add 1 so that zero indexing becomes 1 indexing)
       std::size_t const v = 1 + encode< 2 >( numEdges, { edgeInfo.start, i } );
       val[i] = intConv< TriScalarInt >( v );
     }
+    // Add diagonal data
     ind.back() = ownedGlbIdcs.back();
     val.back() = intConv< TriScalarInt >( numEdges );
   }
@@ -637,26 +671,40 @@ Adjacency buildAdjacency( MeshGraph const & owned,
   {
     // The same comment as for faces and edges applies for cells and faces (see above).
     // The main differences being that
-    // - the `FaceInfo` as a little more information,
-    // - the diagonal stores the type of the cell which conveys the number of face, nodes, ordering...
+    // - the `FaceInfo` as a little more information, 
+    //     (recall FaceInfo is a struct which holds the global face index, the starting node index, and boolean telling which direction the nodes should be traversed)
+    // - the diagonal stores the type of the cell which conveys the number of face, nodes, ordering... (note this is still a TODO, right now its just numFaces)
     std::size_t const numFaces = std::size( faces );
 
+    // This rank owns this row of the matrix
     ownedGlbIdcs.emplace_back( convert.fromCellGlbIdx( cgi ) );
 
     numEntriesPerRow.push_back( numFaces + 1 );  // `+1` comes from the diagonal
+
+    // go ahead and add a vector of the correct size to represent the col indices and values for this row of the matrix (not yet populated)
     std::vector< TriGlbIdx > & ind = indices.emplace_back( numEntriesPerRow.back() );
     std::vector< TriScalarInt > & val = values.emplace_back( numEntriesPerRow.back() );
+
+    // populate the data for this row of the matrix
     for( std::size_t i = 0; i < numFaces; ++i )
     {
+      // col index is just gotten from the global id in FaceInfo for this face
       FaceInfo const & faceInfo = faces[i];
       ind[i] = convert.fromFaceGlbIdx( faceInfo.index );
+
+      // Encode data with a single int like above, but now int tells us start_node, direction (flipped), and position of face in cell
+      // in this case v = 1 + (isFlipped*numFaces + start_index)*numFaces + index_in_cell (again add 1 so that zero indexing becomes 1 indexing)
       std::size_t const v = 1 + encode< 3 >( numFaces, { faceInfo.isFlipped, faceInfo.start, i } );
       val[i] = intConv< TriScalarInt >( v );
     }
+    // add diagonal data
     ind.back() = ownedGlbIdcs.back();
     val.back() = intConv< TriScalarInt >( numFaces );  // TODO This should be Hex and the not the number of faces...
   }
-  std::sort( std::begin( ownedGlbIdcs ), std::end( ownedGlbIdcs ) );
+  std::sort( std::begin( ownedGlbIdcs ), std::end( ownedGlbIdcs ) ); 
+  // I think that the data in n2pos, e2n, f2e was not necessarily sorted
+  // Im not sure why you dont need to sort the other data? - Ask Thomas
+
 
   GEOS_ASSERT_EQ( numOwned, std::size( ownedGlbIdcs ) );
   GEOS_ASSERT_EQ( numOwned, intConv< std::size_t >( numEntriesPerRow.size() ) );
@@ -677,14 +725,18 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
 {
   using Teuchos::RCP;
 
+  // instantiate class which converts between matrix index and (node, edge, face, cell) index
+  // the max global indices of (node, edge, face, cell) describe where we change geometrical elements in the matrix indices,
+  // where all the geometrical entities are stacked one after the other
   FindGeometricalType const convert( gis.nodes, gis.edges, gis.faces, gis.cells );
   using Geom = FindGeometricalType::Geom;
 
   std::size_t const n = convert.numEntries();  // Total number of entries in the graph.
 
+  // Build the adjacency matrix data for this rank (each rank fills in the values for geometry that it 'owns', but notes the geometry that is 'present')
   Adjacency const adjacency = buildAdjacency( owned, present, convert );
   std::size_t const numOwned = std::size( adjacency.ownedGlbIdcs );
-  std::size_t const numOther = std::size( adjacency.otherGlbIdcs );
+  std::size_t const numOther = std::size( adjacency.otherGlbIdcs ); // Corresponds to the `present` geometrical entities
 
   // Aliases
   std::vector< TriGlbIdx > const & ownedNodesIdcs = adjacency.ownedNodesIdcs;

From 28b15416b5062eaa532681ff54e19476930b9204 Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Tue, 25 Jun 2024 23:48:42 +0000
Subject: [PATCH 4/9] Comments through ghostingFootprint (Ryan's version)

---
 .../mesh/generators/NewGhosting.cpp           | 90 +++++++++++++++----
 1 file changed, 74 insertions(+), 16 deletions(-)

diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index 8ac94be79fc..56e31384497 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -359,13 +359,27 @@ TriCrsMatrix multiply( int commSize,
   Teuchos::RCP< TriMap const > ownedMap = upward.getRowMap();
   Teuchos::RCP< TriMap const > mpiMap = indicator.getRangeMap();
 
-  // Upward (n -> e -> f -> c)
+  // row j of indicator was just ones in columns corresponding to present entities on rank j
+  // row i of upward was constructed as having nonzeros in the columns of entities on which the entity corresponding to row i directly depended 
 
+  // Begin graph traversal by matrix matrix multiplying
+  // Trilinos matrix matrix multiply https://docs.trilinos.org/dev/packages/tpetra/doc/html/namespaceTpetra_1_1MatrixMatrix.html#a4c3132fa56ba6d5b071f053486529111
+
+  // Upward (n -> e -> f -> c) 
+
+  // u0_0 (num_entities x num_rank) = upward (num_entities x num_entities) * indicator^T (num_entities x num_rank)
+  // Using the `upward` interpretation, multiplying upward by each column of indicator^T would give 
+  // a column vector noting the all the faces in the global mesh which contain the edges owned by that rank, all the global cells which contain the owned faces, etc
+  // These are the colums of the result `result_u0_0`
   TriCrsMatrix result_u0_0( ownedMap, commSize );
   Tpetra::MatrixMatrix::Multiply( upward, false, indicator, true, result_u0_0, false );
   result_u0_0.fillComplete( mpiMap, ownedMap );
   Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/result-0.mat", result_u0_0 );
 
+  // We repeat this process to make sure we catch all the dependencies, but im not convinced once isnt enough
+  // In theory, you iterate until the matrix stops changing
+  // TODO: test this hypothesis
+  // TODO: you might also be able to just do one multiplication with upward + upward^T to do everything at once, but perhaps decoding becomes an issue
   TriCrsMatrix result_u0_1( ownedMap, commSize );
   Tpetra::MatrixMatrix::Multiply( upward, false, result_u0_0, false, result_u0_1, false );
   result_u0_1.fillComplete( mpiMap, ownedMap );
@@ -376,8 +390,14 @@ TriCrsMatrix multiply( int commSize,
   result_u0_2.fillComplete( mpiMap, ownedMap );
   Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/result-2.mat", result_u0_2 );
 
-  // Downward (c -> f -> e -> n)
+  // Now we know, for each rank, all the geometrical quantities in the global mesh which include something from that rank
 
+  // Downward (c -> f -> e -> n)
+  // Note that we still need to do a traversal in the "opposite direction"
+  // Consider starting with some edge, and applying the process above. 
+  // You will recover the faces containing it, and the cells which contain those
+  // What you wont get, for example, is any other (faces, edges, nodes) in that cell
+  // Thus we repeat the process with downward = upward^T
   TriCrsMatrix result_d0_0( ownedMap, commSize );
   Tpetra::MatrixMatrix::Multiply( upward, true, result_u0_2, false, result_d0_0, false );
   result_d0_0.fillComplete( mpiMap, ownedMap );
@@ -395,6 +415,10 @@ TriCrsMatrix multiply( int commSize,
 
   MpiWrapper::barrier();
 
+  // Now we know, for each rank, the set of all global geometric entities which are included by any geometric entity that includes something from that rank
+  // (what a mouthful)
+  // Note that we have NOT made a distinction between present and owned on the rank though
+
   return result_d0_2;
 }
 
@@ -550,7 +574,7 @@ struct Adjacency
 
 // Function to populate the adjacency struct defined directly above
 // Note that we really build I+A, as in the end we will essentially be doing graph traversal by multiplying by (I+A)^n, ((I+A)^T)^n
-// Inlcuding I gives you everything in the graph 'up to' n steps away, without it you only get things 'exactly' n steps away
+// Including I gives you everything in the graph 'up to' n steps away, without it you only get things 'exactly' n steps away
 Adjacency buildAdjacency( MeshGraph const & owned,
                           MeshGraph const & present,
                           FindGeometricalType const & convert )
@@ -704,6 +728,10 @@ Adjacency buildAdjacency( MeshGraph const & owned,
   std::sort( std::begin( ownedGlbIdcs ), std::end( ownedGlbIdcs ) ); 
   // I think that the data in n2pos, e2n, f2e was not necessarily sorted
   // Im not sure why you dont need to sort the other data? - Ask Thomas
+  // TODO: this may be a mistake, what we should do is make a copy and sort that, or sort the other data in the same order
+  // The sorted version is used later to compared the needed indices with the already available indices to determine what needs to be ghosted.
+  // However, sorting here I think makes it so that you no longer know which row in the global matrix each entry in the vectors corresponds to
+  // May have been working if everything was already sorted, so the sort was a no-op
 
 
   GEOS_ASSERT_EQ( numOwned, std::size( ownedGlbIdcs ) );
@@ -723,6 +751,9 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
                                                                MaxGlbIdcs const & gis,
                                                                MpiRank curRank )
 {
+  // Trilinos tools package smart reference counting pointer, see
+  // https://docs.trilinos.org/dev/packages/teuchos/doc/html/classTeuchos_1_1RCP.html#details
+  // https://docs.trilinos.org/dev/packages/teuchos/doc/html/RefCountPtrBeginnersGuideSAND.pdf
   using Teuchos::RCP;
 
   // instantiate class which converts between matrix index and (node, edge, face, cell) index
@@ -746,45 +777,68 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   std::vector< std::vector< TriGlbIdx > > const & indices = adjacency.indices;
   std::vector< std::vector< TriScalarInt > > const  & values = adjacency.values;
 
+  // Makes a Trilinos tools MPI communicator and return smart pointer to it
   RCP< TriComm const > const comm = make_rcp< Teuchos::MpiComm< int > const >( MPI_COMM_GEOSX );
+  // Makes a const TriMap, passing the arguments to the constructor of TriMap, returns smart pointer to it 
+  // note TriMap = Tpetra::Map< TriLocIdx, TriGlbIdx >, created using this constructor https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1Map.html#a4a21c6654fc9fb6db05644a52cc0128d
+  // Ultimately just collecting who owns what parts of the global matrix
   auto const ownedMap = make_rcp< TriMap const >( Tpetra::global_size_t( n ), ownedGlbIdcs.data(), TriLocIdx( numOwned ), TriGlbIdx{ 0 }, comm );
 
   // The `upward` matrix offers a representation of the graph connections.
   // The matrix is square. Each size being the number of geometrical entities.
+  // The matrix is just the assembed, global adjacency.
+  // Note that the matrix will end up being lower triangular, and banded because edge rows only have nonzeros in the node columns, faces only have nonzeros with edges, etc
+  // Quick note on why we call it upward:
+  // Consider a vector where we put a 1 in the row corresponding to some edge with matrix index i. 
+  // Then multiply upward times this vector. The result has nonzeros in the same row I (because we filled the diagonal of the matrix)
+  // and also any other rows where column i had nonzeros, which is exactly the indices of the faces containing the edge.
+  // Note TriCrsMatrix = Tpetra::CrsMatrix< TriScalarInt , TriLocIdx, TriGlbIdx >; https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html
   TriCrsMatrix upward( ownedMap, numEntriesPerRow() );
 
+  // Fill the global matrix rows that this rank owns 
+  //(note that this is where the sorting things I mentioned above becomes an issue, if you reorder only the ownedGlobalIDs you assign the wrong rows)
   for( std::size_t i = 0; i < numOwned; ++i )
   {
     std::vector< TriGlbIdx > const & rowIndices = indices[i];
     std::vector< TriScalarInt > const & rowValues = values[i];
     GEOS_ASSERT_EQ( std::size( rowIndices ), numEntriesPerRow[i] );
     GEOS_ASSERT_EQ( std::size( rowValues ), numEntriesPerRow[i] );
+    //https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html#a8d4784081c59f27391ad825c4dae5e86
     upward.insertGlobalValues( ownedGlbIdcs[i], TriLocIdx( std::size( rowIndices ) ), rowValues.data(), rowIndices.data() );
   }
 
+  // Signal that we are done changing the values/structure of the global adjacency matrix
+  // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html#aa985b225a24d2f74602e25b38b4430af
   upward.fillComplete( ownedMap, ownedMap );
+
+  // Note this can cause some issues if /tmp doesnt exist, etc (at least it did in my codespace)
   Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/upward.mat", upward );
 
   int const commSize( MpiWrapper::commSize() );
 
+  // Now we are going to build the various other matrices which will be used in conjunction with the adjacency to ghost
+
   // Now let's build the domain indicator matrix.
-  // It's rectangular, number of rows being the number of MPI ranks,
-  // number of columns being the number of nodes in the mesh graph,
-  // ie the number of geometrical entities in the mesh.
+  // It's rectangular, number of rows being the number of MPI ranks (so each rank builds its row),
+  // number of columns being the number of nodes in the mesh graph, ie the number of geometrical entities in the mesh.
   // It contains one for every time the geometrical entity is present on the rank.
-  // By present, we do not meant that the ranks owns it: just that it's already available on the rank.
-  auto const mpiMap = make_rcp< TriMap const >( Tpetra::global_size_t( commSize ), TriGlbIdx{ 0 }, comm );  // Let the current rank get the appropriate index in this map.
+  // By present, we do not mean that the ranks owns it: just that it's already available on the rank.
+  auto const mpiMap = make_rcp< TriMap const >( Tpetra::global_size_t( commSize ), TriGlbIdx{ 0 }, comm );  // Let the current rank get the appropriate index in this map, total size is just num ranks.
   TriCrsMatrix indicator( mpiMap, numOwned + numOther );
 
+  // ones is a vector of size numOwned of 1s, a vector of size numOther of 1s, or a vector with a single entry of 1, whichever is biggest
+  // this is just a little trick to ensure you have a long enough vector or 1 to enter into the matrix
   std::vector< TriScalarInt > const ones( std::max( { numOwned, numOther, std::size_t( 1 ) } ), 1 );
+
+  // Insert into the row corresponding to my rank, putting ones in column for every owned and present geometrical entity
   indicator.insertGlobalValues( TriGlbIdx( curRank.get() ), TriLocIdx( numOwned ), ones.data(), ownedGlbIdcs.data() );
   indicator.insertGlobalValues( TriGlbIdx( curRank.get() ), TriLocIdx( numOther ), ones.data(), otherGlbIdcs.data() );
-  indicator.fillComplete( ownedMap, mpiMap );
+  indicator.fillComplete( ownedMap, mpiMap ); // Done modifying the entries/structure of matrix
 
   // The `ownership` matrix is a diagonal square matrix.
-  // Each size being the number of geometrical entities.
+  // Each size being the number of geometrical entities (so same size as adjacency).
   // The value of the diagonal term will be the owning rank.
-  // By means of matrices multiplications, it will be possible to exchange the ownerships information across the ranks.
+  // By means of matrix multiplications, it will be possible to exchange the ownerships information across the ranks.
   // TODO Could we use an Epetra_Vector as a diagonal matrix?
   std::vector< TriScalarInt > myRank( 1, curRank.get() );
   TriCrsMatrix ownership( ownedMap, 1 );
@@ -795,6 +849,7 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   ownership.fillComplete( ownedMap, ownedMap );
   Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/ownership.mat", ownership );
 
+  // Log some info on these last 2 matrices created for debugging
   if( curRank == 0_mpi )
   {
     GEOS_LOG_RANK( "indicator.NumGlobalCols() = " << indicator.getGlobalNumCols() );
@@ -806,24 +861,27 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
 
   // The `ghostingFootprint` matrix is rectangular,
   // the number of columns being the number of MPI ranks,
-  // the number of rows being the number of nodes in the mesh graph.
+  // the number of rows being the number of nodes in the mesh graph, ie the total number of mesh geometric quantities
+  // (So it is the same size as the transpose of the domain indicator matrix)
   //
-  // For any MPI rank (ie column), a non-zero entry means
+  // For any MPI rank (ie column), a non-zero entry in a row means
   // that the corresponding geometrical entry has to be eventually present onto the rank
   // for the ghosting to be effective.
   //
   // From `ghostingFootprint` we can extract where the current rank has to send any owned graph node.
+  // Computed using the custom multiply function defined above,
+  // which does the equivalent of graph tranversal by repeated multiplications of upward and upward^T
   TriCrsMatrix ghostingFootprint( multiply( commSize, indicator, upward ) );
   ghostingFootprint.resumeFill();
-  ghostingFootprint.setAllToScalar( 1. );
+  ghostingFootprint.setAllToScalar( 1. ); // We put `1` everywhere there's a non-zero entry, so we'll be able to compose with the `ownership` matrix.
   ghostingFootprint.fillComplete( mpiMap, ownedMap );
   Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/ghostingFootprint.mat", ghostingFootprint );
-  // We put `1` everywhere there's a non-zero entry, so we'll be able to compose with the `ownership` matrix.
 
   // FIXME TODO WARNING From `ghostingFootprint` extract where I have to send
   // Then, perform the ghostingFootprint * ownership matrix multiplication,
   // so I get to know from whom I'll receive.
 
+  // just some output for sanity checks
   if( curRank == 0_mpi )
   {
     GEOS_LOG_RANK( "ghosted->NumGlobalCols() = " << ghostingFootprint.getGlobalNumCols() );
@@ -831,7 +889,7 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   }
 
   // The `ghostExchange` matrix is rectangular,
-  // the number of columns being the number of nodes in the mesh graph,
+  // the number of columns being the number of nodes in the mesh graph, ie the total number of mesh geometric quantities
   // the number of rows being the number of MPI ranks.
   //
   // As the result of the multiplication between the `ghostingFootprint` matrix and the `ownership` matrix,

From 1e6a07f117a17556bc3ddc0d10b97758abf3e822 Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Wed, 26 Jun 2024 20:25:23 +0000
Subject: [PATCH 5/9] Comments through construction of GhostSend/GhostRecv for
 each rank (Ryan's version)

---
 .../mesh/generators/NewGhosting.cpp           | 71 ++++++++++++++++---
 1 file changed, 61 insertions(+), 10 deletions(-)

diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index 56e31384497..ea67c36a30a 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -891,19 +891,26 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   // The `ghostExchange` matrix is rectangular,
   // the number of columns being the number of nodes in the mesh graph, ie the total number of mesh geometric quantities
   // the number of rows being the number of MPI ranks.
+  // ghostExchange (num_ranks x num_entities) = ghostingFootprint^T (num_ranks x num_entities) * ownership (num_entities x num_entities, diagonal)
+  // recall that ghostExchange has all the nonzeros set to 1
+  // diagonal matrix just multiplies each col of ghostingFootprint^T (row of ghostingFootprint) by the corresponding diagonal entry
+  // So each column of ghostExchange will have nonzeros which are all the same value, with the value being the rank owning that entity
+  // Thus the nonzeros in row i tell us the ranks which own the entities "adjacent" to the entities on rank i
   //
   // As the result of the multiplication between the `ghostingFootprint` matrix and the `ownership` matrix,
   // for each row owned (ie at the current MPI rank index),
-  // the value of the `ghostExchange` matrix term will provide the actual owning rank for all the .
+  // the value of the `ghostExchange` matrix term will provide the actual owning rank for all the needed geometric entities.
   //
   // From `ghostExchange` we can extract which other rank will send to the current rank any graph node.
-  TriCrsMatrix ghostExchange( mpiMap, 10000 ); // TODO having `0` there should be working!
+  TriCrsMatrix ghostExchange( mpiMap, 10000 ); // TODO having `0` there should be working! We need to get a better estimate of number of entries
   Tpetra::MatrixMatrix::Multiply( ghostingFootprint, true, ownership, false, ghostExchange, false );
   ghostExchange.fillComplete( ownedMap, mpiMap );
 
   // TODO Do I have to work with `ghostingFootprint` if I already have `ghostExchange` which may convey more information?
+  // As we shall see, below we go back to working with ghostingFootprint
   // TODO Maybe because of the ownership of the ranks: one is also the "scaled" transposed of the other.
 
+  // debug output
   if( curRank == 0_mpi )
   {
     GEOS_LOG_RANK( "ghostExchange->NumGlobalCols() = " << ghostExchange.getGlobalNumCols() );
@@ -911,20 +918,32 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   }
   Tpetra::MatrixMarket::Writer< TriCrsMatrix >::writeSparseFile( "/tmp/matrices/ghostInfo.mat", ghostExchange );
 
+  // Now, for each of the entities owned by this current rank, we collect all the other ranks to which this entity is sent
+
+  // GhostSend defined in BuildPods.hpp, 
+  // struct of maps for each (node, edge, face, cell) giving the set of ranks each entity (given by node, edge, face, cell global index) must be sent to
+  GhostSend send;
+  
+  // temp data structures used in process
   std::size_t extracted = 0;
   Teuchos::Array< TriScalarInt > extractedValues;
   Teuchos::Array< TriGlbIdx > extractedIndices;
 
-  GhostSend send;
+  // Loop over the owned indiced in global matrix (owned geometric entities)
   for( TriGlbIdx const & index: ownedGlbIdcs )
   {
+    // get the number of ranks which needed this entity and resize our temp data storage
     std::size_t const length = ghostingFootprint.getNumEntriesInGlobalRow( index );
-    extractedValues.resize( length );
+    extractedValues.resize( length ); // note we never actually use the values, just needed the indices where the nonzeros were
     extractedIndices.resize( length );
+    // copy data into temp https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1RowMatrix.html#a5fd9651c8e5d1cc7dead6cadac342978
     ghostingFootprint.getGlobalRowCopy( index, extractedIndices(), extractedValues(), extracted );
     GEOS_ASSERT_EQ( extracted, length );
 
+    // neighbors will be the set of mpi ranks which need this entity
     std::set< MpiRank > neighbors;
+
+    // the ranks are the columns of ghostingFootprint, we just extracted the nonzeros
     for( std::size_t i = 0; i < extracted; ++i )
     {
       MpiRank const rank( extractedIndices[i] );
@@ -934,11 +953,14 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
       }
     }
 
+    // if this entity is not needed by anyone else, great, we're done
     if( std::empty( neighbors ) )
     {
       continue;
     }
 
+    // if there ranks which need this entity, we convert the global matrix index back to the (node, edge, face cell) global index
+    // and add to the GhostSend struct
     switch( convert.getGeometricalType( index ) )
     {
       case Geom::NODE:
@@ -966,41 +988,69 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
         GEOS_ERROR( "Internal error" );
       }
     }
-  }
+  } // end loop over owned entities
+
+  // Now we can look at the dual problem, namely for this current rank, what data is needed from other ranks
+  // Now we also have to pay attention to what non-owned data was already present on the rank
 
+  // Note that we are back to working with ghostExchange, not ghostingFootprint
+  // recall that the nonzeros in each row of this matrix told us which other ranks' data was needed for the current row rank
   std::size_t const numNeededIndices = ghostExchange.getNumEntriesInGlobalRow( TriGlbIdx( curRank.get() ) );
+
+  // copy into same temp vars as above
   extractedValues.resize( numNeededIndices );
   extractedIndices.resize( numNeededIndices );
   ghostExchange.getGlobalRowCopy( TriGlbIdx( curRank.get() ), extractedIndices(), extractedValues(), extracted );
   GEOS_ASSERT_EQ( extracted, numNeededIndices );
 
+  // create a set containing the indices needed, i.e. the geometric entities belonging to other ranks (we use a set so we can later use set operations)
   std::set< TriGlbIdx > const allNeededIndices( std::cbegin( extractedIndices ), std::cend( extractedIndices ) );
-  std::set< TriGlbIdx > receivedIndices;  // The graph nodes that my neighbors will send me.
+  // create a set which will contain the geometric entities that my neighbors will send me.
+  std::set< TriGlbIdx > receivedIndices;
+
+    GEOS_ASSERT_EQ( std::size( allNeededIndices ), numNeededIndices ); // we should not have lost anythng by converting to a set
+
+  // Fill receivedIndices by finding the needed indices for this rank which were not owned by this rank
   std::set_difference( std::cbegin( allNeededIndices ), std::cend( allNeededIndices ),
                        std::cbegin( ownedGlbIdcs ), std::cend( ownedGlbIdcs ),
                        std::inserter( receivedIndices, std::end( receivedIndices ) ) );
-  std::vector< TriGlbIdx > notPresentIndices;  // The graphs nodes that are nor owned neither present by/on the current rank.
+
+  // of course, some of these indices were already communicated, they were the `present` data
+  // create a set of entities which are needed, but neither owned nor present by/on the current rank.
+  std::vector< TriGlbIdx > notPresentIndices;
   std::set_difference( std::cbegin( receivedIndices ), std::cend( receivedIndices ),
                        std::cbegin( otherGlbIdcs ), std::cend( otherGlbIdcs ),
                        std::back_inserter( notPresentIndices ) );
+
+  // a bit below we will see that we need to actually send the positions of the ghosted nodes (everything else is just indices so far)
+  // so here we go through the notPresentIndices and make note of which are nodes (as opposed to edges, faces, cells)
   std::vector< TriGlbIdx > notPresentNodes;
   std::copy_if( std::cbegin( notPresentIndices ), std::cend( notPresentIndices ),
                 std::back_inserter( notPresentNodes ), [&]( TriGlbIdx const & i )
                 {
                   return convert.getGeometricalType( i ) == Geom::NODE;
                 } );
-  GEOS_ASSERT_EQ( std::size( allNeededIndices ), numNeededIndices );
+  
 
+  // Finally we can construct our received entity information
+  // GhostRecv defined in BuildPods.hpp, 
+  // struct of maps for each (node, edge, face, cell) giving the rank each entity (given by node, edge, face, cell global index) is coming from
   GhostRecv recv;
   for( std::size_t i = 0; i < extracted; ++i )
   {
+    // matrix index of required entity
     TriGlbIdx const & index = extractedIndices[i];
+
+    // TODO: should this be notPresentIndices??
     if( receivedIndices.find( index ) == std::cend( receivedIndices ) )  // TODO make a map `receivedIndices -> mpi rank`
     {
       continue;
     }
 
-    MpiRank const sender( extractedValues[i] );
+    // get the owning rank of the required entity
+    MpiRank const sender( extractedValues[i] ); // finally making use of the values from ghostExchange
+
+    // convert matrix index to geometry index and populate GhostRecv
     switch( convert.getGeometricalType( index ) )
     {
       case Geom::NODE:
@@ -1030,13 +1080,14 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
     }
   }
 
+ // debug output
 //  if( curRank == 1_mpi or curRank == 2_mpi )
 //  {
 //    GEOS_LOG_RANK( "recv.edges = " << json( recv.edges ) );
 //    GEOS_LOG_RANK( "send.edges = " << json( send.edges ) );
 //  }
 
-  // At this point, each rank knows what it has to send to and what it is going to receive from the other ranks.
+  // At this point, each rank knows what it has to send and to whom and what it is going to receive from the other ranks.
   //
   // The remaining action is about
   // - retrieving the additional graph information

From 716a0827a4ed945e3ad02437f2a26a818bcd991a Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Wed, 26 Jun 2024 22:47:05 +0000
Subject: [PATCH 6/9] complete comments for performGhosting (Ryan's version)

---
 .../mesh/generators/NewGhosting.cpp           | 101 +++++++++++++++---
 1 file changed, 87 insertions(+), 14 deletions(-)

diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index ea67c36a30a..da66f5120e1 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -1041,7 +1041,8 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
     // matrix index of required entity
     TriGlbIdx const & index = extractedIndices[i];
 
-    // TODO: should this be notPresentIndices??
+    // Note this is not notPresentIndices, as information which is defined on geometric entities 
+    // which happen to already be present will still need to be communicated in the future
     if( receivedIndices.find( index ) == std::cend( receivedIndices ) )  // TODO make a map `receivedIndices -> mpi rank`
     {
       continue;
@@ -1096,10 +1097,13 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   //   knowing the index of the nodes is not enough.
   //
   // In order to do that, we build the `missingIndicator` matrix, which is rectangular:
-  // - The number of columns is the number of graph nodes in the mesh graph.
+  // - The number of columns is the number of graph nodes in the mesh graph, ie the total number of geometrical entities in the mesh.
   // - The number of rows is the total number of graph nodes that are missing on ranks.
+  //   For me this was confusing, as its a little different from what we've done before
+  //   Each rank is missing some quantity of mesh entities, and we are going to MpiAllreduce to get the total across all ranks, and this is the number of rows in the global matrix
+  //   Its going to be incredible sparse, in fact only one nonzero per row marking the index of the entity in the graph
   //   Note that the same quantity can be missing on multiple ranks, and that's handled.
-  // - The non-zero terms equal `1`, meaning that a given quantity (column) is missing on a given rank (row).
+  // - The non-zero terms equal `1`, meaning that a given quantity (column) is missing on a given rank (row, each rank owns a chunk of rows).
   //
   // Combining `missingIndicator` with the adjacency matrix which conveys a lot of connections information,
   // we'll be able to create the final `missingIndices` matrix,
@@ -1120,21 +1124,31 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   //   that actually are physical nodes in the mesh.
   // - The number of rows is the total number of graph nodes (that actually are physical nodes in the mesh) that are missing on ranks.
   // - The non-zero terms equal `1`, meaning that a given quantity (column) is missing on a given rank (row).
+
+  // Group together the number of missing mesh entities and the number of missing mesh nodes in particular for this rank
   std::size_t const numLocMissingCompound[2] = { std::size( notPresentIndices ), std::size( notPresentNodes ) };
+  // Now we can communicate to get the sum over missing entities and missing mesh nodes over all the ranks with simple reduction
   std::size_t numGlbMissingCompound[2] = { 0, 0 };
   MpiWrapper::allReduce( numLocMissingCompound, numGlbMissingCompound, 2, MPI_SUM );
+
+  // break the above back into individual variables now that parallel reduction is done
   std::size_t const & numLocMissingIndices = numLocMissingCompound[0];
   std::size_t const & numLocMissingNodes = numLocMissingCompound[1];
   Tpetra::global_size_t const numGlbMissingIndices = numGlbMissingCompound[0];
   Tpetra::global_size_t const numGlbMissingNodes = numGlbMissingCompound[1];
 
-  std::size_t const numGlbNodes = gis.nodes.get() + 1;  // TODO Use strongly typed integers
+  std::size_t const numGlbNodes = gis.nodes.get() + 1;  // TODO Use strongly typed integers, note gis was the input variable desribing global index offsets
   std::size_t const numOwnedNodes = std::size( ownedNodesIdcs );  // TODO Use strongly typed integers
 
-  auto const missingIndicesMap = make_rcp< TriMap const >( numGlbMissingIndices, numGlbMissingIndices, TriGlbIdx{ 0 }, comm );
+  // Now we are going to make new ownership maps for the new matrices describing missing indices/nodes
+  // Different constructor, just using sizes
+  // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1Map.html#a7984262c1e6ab71ad3717fd96ed76052
+  auto const missingIndicesMap = make_rcp< TriMap const >( numGlbMissingIndices, numGlbMissingIndices, TriGlbIdx{ 0 }, comm ); // TODO: Should second one be local??
   auto const missingNodesMap = make_rcp< TriMap const >( numGlbMissingNodes, numLocMissingNodes, TriGlbIdx{ 0 }, comm );
 
   // Following information is needed to compute the global row.
+  // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1Map.html#a0124c1b96f046c824123fb0ff5a9c978
+  // getting the index in the global matrix corresponding to local index 0 on current rank
   TriGlbIdx const missingIndicesOffset = missingIndicesMap->getGlobalElement( TriLocIdx{ 0 } );  // TODO Hocus Pocus
   TriGlbIdx const missingNodesOffset = missingNodesMap->getGlobalElement( TriLocIdx{ 0 } );  // TODO Hocus Pocus
 
@@ -1142,18 +1156,30 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   TriCrsMatrix missingIndicator( missingIndicesMap, 1 );
   for( std::size_t i = 0; i < numLocMissingIndices; ++i )
   {
-    missingIndicator.insertGlobalValues( missingIndicesOffset + TriGlbIdx( i ), TriLocIdx( 1 ), ones.data(), &notPresentIndices[i] );
+    // every row corresponds to a missing entity on this rank
+    // simply insert a 1 into the column of the entity given by notPresentIndices
+    // note each rank owns a chunk of consecutive rows
+    // https://docs.trilinos.org/dev/packages/tpetra/doc/html/classTpetra_1_1CrsMatrix.html#a8d4784081c59f27391ad825c4dae5e86
+    missingIndicator.insertGlobalValues( missingIndicesOffset + TriGlbIdx( i ), TriLocIdx( 1 ), ones.data(), &notPresentIndices[i] ); // note we are re-using ones from wayy back, just need a pointer to something with 1
   }
   missingIndicator.fillComplete( ownedMap, missingIndicesMap );
 
-  // `missingIndices` will contain the missing connectivity information.
-  TriCrsMatrix missingIndices( missingIndicesMap, 1000 ); // TODO having `0` there should be working!
+  // Now build `missingIndices` which will contain the missing connectivity information. Same size as missingIndicator
+  // missingIndices (num_global_missing_indices x num_entities) = missingIndicator (num_global_missing_indices x num_entities) * upward (num_entities x num_entities)
+  // every row of missingIndicator just had a 1 in the column which denotes the missing entity, upward is or adjacency matrix
+  // Similar to how if we do upward * (col indicator vector) we go up the topological chain (1 in edge slot yields faces containing edge)
+  // when we do (indicator row vector) * upward we get the row of upward corresponding to the indicated column, i.e. the indicated mesh entitiy and everything it depends on
+  // so if the indicator was on a face, you would get back the edges it is composed of as these are the nonzeros in the result vector
+  // So the full matrix just has this for every row, where each row is one of the missing entities for one of the ranks
+  // Complete tangent (sort of) - these pictures always help me with the visualizations of matrix omultiplcations that are very helpful here: https://dzone.com/articles/visualizing-matrix
+  TriCrsMatrix missingIndices( missingIndicesMap, 1000 ); // TODO having `0` there should be working! (epetra could dynamically resize) This is the same TODO as before, need a way to tell tpetra how many nonzeros
   Tpetra::MatrixMatrix::Multiply( missingIndicator, false, upward, false, missingIndices, false );
   missingIndices.fillComplete( ownedMap, missingIndicesMap );
 
+  // Now we can repeat the process but only consider the nodes, as discussed before
   // Indicator matrix only for the nodes.
   // Note that it should be an integer matrix full of ones,
-  // but it's a double matrix because it's going to be multiplied a double matrix.
+  // but it's a double matrix because it's going to be multiplied by a double matrix (nodal coordinates).
   TriDblCrsMatrix missingNodesIndicator( missingNodesMap, 1 );
   for( std::size_t i = 0; i < numLocMissingNodes; ++i )
   {
@@ -1168,6 +1194,8 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   // - Its number of columns is 3: the x, y, and z coordinates of the nodes.
   TriDblCrsMatrix nodePositions( ownedNodesMap, 3 );
   std::vector< TriGlbIdx > const zot{ TriGlbIdx( 0 ), TriGlbIdx( 1 ), TriGlbIdx( 2 ) };  // zot: zero, one, two.
+
+  // each rank fills the nodePositions for the nodes it owns
   for( auto const & [ngi, pos]: owned.n2pos )
   {
     nodePositions.insertGlobalValues( convert.fromNodeGlbIdx( ngi ), TriLocIdx( 3 ), pos.data(), zot.data() );
@@ -1176,22 +1204,36 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
   nodePositions.fillComplete( threeMap, ownedNodesMap );
 
   // `missingNodePos` will contain the missing node positions.
-  TriDblCrsMatrix missingNodePos( missingNodesMap, 1000 ); // TODO having `0` there should be working!
+  // Analogous to missingIndices, each row is a node missing from one of the ranks, and the cols are the position of that node
+  TriDblCrsMatrix missingNodePos( missingNodesMap, 1000 ); // TODO having `0` there should be working! Same TODO for sizing
   Tpetra::MatrixMatrix::Multiply( missingNodesIndicator, false, nodePositions, false, missingNodePos, false );
   missingNodePos.fillComplete( threeMap, missingNodesMap );
 
+  // Okie dokie, with all that we can finally construct the last output, which is the MeshGraph corresponding to ghosted info
+  // (recall we already had MeshGraphs for the owned and present info)
+  // (and recall a MeshGraph is just a struct of maps which take you from node/edge/face/cell global index to its data,
+  //  ex: face global id maps to a vector of edges, each of which are encoded by EdgeInfo (so it has id, start node))
+  // Notice that this information is exactly what is in missingIndices and missingNodePos!
+  // Moreoever, we will be able to get back 'all' the information by decoding the entries of missingIndices!
   MeshGraph ghosts;
   Teuchos::Array< double > extractedNodePos( 3 );
+
+  // Loop over the missing mesh entities
   for( std::size_t i = 0; i < numLocMissingIndices; ++i )
   {
+    // This is the missing entity that will be ghosted, this will be the key in one of the ghosted maps
     TriGlbIdx const index = notPresentIndices[i];
     Geom const geometricalType = convert.getGeometricalType( index );
 
+    // get the number of entities the missing one depends on (downward, c -> f -> e -> n)
     std::size_t const length = missingIndices.getNumEntriesInGlobalRow( missingIndicesOffset + TriGlbIdx( i ) );
+
+    // reuse same temp vars from before to store the row data
     extractedValues.resize( length );
     extractedIndices.resize( length );
     missingIndices.getGlobalRowCopy( missingIndicesOffset + TriGlbIdx( i ), extractedIndices(), extractedValues(), extracted );
     GEOS_ASSERT_EQ( extracted, length );
+
     if( geometricalType == Geom::NODE )
     {
       // The case of nodes is a bit different from the other cases,
@@ -1212,32 +1254,40 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
       continue;
     }
 
-    auto const cit = std::find( std::cbegin( extractedIndices ), std::cend( extractedIndices ), index );
+    // Now we are either edge, face, or cell
+
+    // Find the index of the current missing entity in the data from the row of the matrix
+    auto const cit = std::find( std::cbegin( extractedIndices ), std::cend( extractedIndices ), index ); // I kind of think it should always be the last one, since upward is (lower) triangular
+    // Get the diagonal value
     std::size_t const numGeomQuantitiesIdx = intConv< std::size_t >( std::distance( std::cbegin( extractedIndices ), cit ) );
     TriScalarInt const & numGeomQuantities = extractedValues[numGeomQuantitiesIdx];
-    GEOS_ASSERT_EQ( extracted, intConv< std::size_t >( numGeomQuantities + 1 ) );
+    GEOS_ASSERT_EQ( extracted, intConv< std::size_t >( numGeomQuantities + 1 ) ); // extracted is the output from Tilinos telling you how many entries were pulled out when we copy row
 
     switch( geometricalType )
     {
       case Geom::EDGE:
       {
         TriScalarInt const & numNodes = numGeomQuantities;  // Alias
-        GEOS_ASSERT_EQ( numNodes, 2 );
+        GEOS_ASSERT_EQ( numNodes, 2 ); // an edge always has 2 nodes
         std::array< NodeGlbIdx, 2 > order{};
 
+        // loop over the entries extracted from the row of the matrix
         for( std::size_t ii = 0; ii < extracted; ++ii )
         {
+          // skip diagonal
           if( ii == numGeomQuantitiesIdx )
           {
             continue;
           }
 
+          // use `order` to store [start node index, end node index]
           NodeGlbIdx const ngi = convert.toNodeGlbIdx( extractedIndices[ii] );
           TriScalarInt const ord = extractedValues[ii] - 1;
           GEOS_ASSERT( ord == 0 or ord == 1 );
           order[ord] = ngi;
         }
         GEOS_ASSERT_EQ( std::size( order ), intConv< std::size_t >( numNodes ) );
+        // populate this info into ghosts
         EdgeGlbIdx const egi = convert.toEdgeGlbIdx( index );
         std::tuple< NodeGlbIdx, NodeGlbIdx > & tmp = ghosts.e2n[egi];
         std::get< 0 >( tmp ) = order[0];
@@ -1248,19 +1298,27 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
       {
         TriScalarInt const & numEdges = numGeomQuantities;  // Alias
         std::map< integer, EdgeInfo > order;
+
+        // loop over the entries extracted from the row of the matrix
         for( std::size_t ii = 0; ii < extracted; ++ii )
         {
+          // skip diagonal
           if( ii == numGeomQuantitiesIdx )
           {
             continue;
           }
 
+          // get the edge global index and decode the matrix entry to get each edge's start node and the edges index in the face
           EdgeGlbIdx const egi = convert.toEdgeGlbIdx( extractedIndices[ii] );
+          // array is [start node, index within face]
           std::array< std::size_t, 2 > const decoded = decode< 2 >( numEdges, extractedValues[ii] - 1 );
+          // `order` holds all the edge infos in the order the edges appeared in the original face
           order[decoded[1]] = { egi, intConv< std::uint8_t >( decoded[0] ) };
           GEOS_ASSERT( decoded[0] == 0 or decoded[0] == 1 );
         }
         GEOS_ASSERT_EQ( std::size( order ), intConv< std::size_t >( numEdges ) );
+
+        // use face global index and `order` to populate ghosts
         FaceGlbIdx const fgi = convert.toFaceGlbIdx( index );
         std::vector< EdgeInfo > & tmp = ghosts.f2e[fgi];
         tmp.resize( numEdges );
@@ -1275,18 +1333,26 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
       {
         TriScalarInt const & numFaces = numGeomQuantities;  // Alias // TODO This should receive the cell type instead.
         std::map< integer, FaceInfo > order;
+
+        // loop over the entries extracted from the row of the matrix
         for( std::size_t ii = 0; ii < extracted; ++ii )
         {
+          // skip diagonal
           if( ii == numGeomQuantitiesIdx )
           {
             continue;
           }
 
+          // get the face global index and decode the entries to get isFlipped, start index, and index of face within cell
           FaceGlbIdx const fgi = convert.toFaceGlbIdx( extractedIndices[ii] );
+          // array is [isFlipped, start index, and index of face within cell]
           std::array< std::size_t, 3 > const decoded = decode< 3 >( numFaces, extractedValues[ii] - 1 );
+          // `order` holds the face info in the order the faces appeared in the original cell
           order[decoded[2]] = { fgi, intConv< bool >( decoded[0] ), intConv< std::uint8_t >( decoded[1] ) };
         }
         GEOS_ASSERT_EQ( std::size( order ), intConv< std::size_t >( numFaces ) );
+
+        // use cell global index and `order` to populate ghosts
         CellGlbIdx const cgi = convert.toCellGlbIdx( index );
         std::vector< FaceInfo > & tmp = ghosts.c2f[cgi];
         tmp.resize( numFaces );
@@ -1298,7 +1364,7 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
       }
       default:
       {
-        GEOS_ERROR( "Internal error." );
+        GEOS_ERROR( "Internal error in performGhosting. Could not recognize geometric type of mesh entity" );
       }
     }
   }
@@ -1323,6 +1389,8 @@ std::tuple< MeshGraph, GhostRecv, GhostSend > performGhosting( MeshGraph const &
 //    GEOS_LOG_RANK( "ghosts_n2ps = " << json( ghosts.n2pos ) );
 //  }
 
+  // Now we are done!!!
+
   return { std::move( ghosts ), std::move( recv ), std::move( send ) };
 }
 
@@ -1364,6 +1432,11 @@ void doTheNewGhosting( vtkSmartPointer< vtkDataSet > mesh,
 //  }
 //  MpiWrapper::barrier();
 
+  // Next do the atual ghosting - i.e. figure out the information each rank must send to and receive from other ranks
+  // This is where we use parallel linear algebra package (right now, Trilinos) to build an adjacency matrix for the entire mesh (treated as a graph)
+  // By doing some clever matrix products we can figure out which ranks need what automagically
+  // `ghosts` is also a MeshGraph like owned and present, but described the geometric entities which are ghosted onto the rank from another owner
+  // recv and send are of type GhostRecv and GhostSend (in BuildPods.hpp) and describe what (nodes, edges, faces, cells) each rank needs to receive from and send to others
   auto const [ghosts, recv, send] = performGhosting( owned, present, matrixOffsets, curRank );
 
   buildPods( owned, present, ghosts, recv, send, meshMappings );

From 0826d669b0757cb017bad543a3d0d933dcfc2203 Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Thu, 27 Jun 2024 09:12:33 -0700
Subject: [PATCH 7/9] Update comments about sorting following fix

---
 src/coreComponents/mesh/generators/NewGhosting.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index e310546223c..ba79af76ce4 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -616,7 +616,7 @@ Adjacency buildAdjacency( MeshGraph const & owned,
   {
     otherGlbIdcs.emplace_back( convert.fromFaceGlbIdx( fgi ) );
   }
-  std::sort( std::begin( otherGlbIdcs ), std::end( otherGlbIdcs ) ); // I think that the data in n2pos, e2n, f2e was not necessarily sorted
+  std::sort( std::begin( otherGlbIdcs ), std::end( otherGlbIdcs ) ); // I think that the data in n2pos, e2n, f2e was not necessarily sorted, should double check this as sorting caused a bug in owned data
   GEOS_ASSERT_EQ( numOther, std::size( otherGlbIdcs ) ); // ensure we added the correct amount of stuff
 
   // Now do owned data
@@ -725,13 +725,6 @@ Adjacency buildAdjacency( MeshGraph const & owned,
     ind.back() = ownedGlbIdcs.back();
     val.back() = intConv< TriScalarInt >( numFaces );  // TODO This should be Hex and the not the number of faces...
   }
-  
-  // I think that the data in n2pos, e2n, f2e was not necessarily sorted
-  // Im not sure why you dont need to sort the other data? - Ask Thomas
-  // TODO: this may be a mistake, what we should do is make a copy and sort that, or sort the other data in the same order
-  // The sorted version is used later to compared the needed indices with the already available indices to determine what needs to be ghosted.
-  // However, sorting here I think makes it so that you no longer know which row in the global matrix each entry in the vectors corresponds to
-  // May have been working if everything was already sorted, so the sort was a no-op
 
   GEOS_ASSERT_EQ( numOwned, std::size( ownedGlbIdcs ) );
   GEOS_ASSERT_EQ( numOwned, intConv< std::size_t >( numEntriesPerRow.size() ) );

From 9cf0c5ab9cdf6c20db13b1f70a10b34af2fa6522 Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Fri, 28 Jun 2024 17:54:43 +0000
Subject: [PATCH 8/9] finished comments of doTheNewGhosting (Ryan's version)

---
 .../mesh/generators/BuildPods.cpp             | 41 +++++++++++++++++--
 .../mesh/generators/NewGhosting.cpp           |  6 ++-
 2 files changed, 42 insertions(+), 5 deletions(-)

diff --git a/src/coreComponents/mesh/generators/BuildPods.cpp b/src/coreComponents/mesh/generators/BuildPods.cpp
index f41e984e341..1bb51c61785 100644
--- a/src/coreComponents/mesh/generators/BuildPods.cpp
+++ b/src/coreComponents/mesh/generators/BuildPods.cpp
@@ -55,11 +55,14 @@ std::map< GI, toLocIdx_t< GI > > buildGlobalToLocalMap( std::set< GI > const & g
   return g2l;
 }
 
+// Consolidate the mesh graphs on the current rank into one object
 MeshGraph mergeMeshGraph( MeshGraph const & owned,
                           MeshGraph const & present,
                           MeshGraph const & ghosts )
 {
+  // inititalize the consolidated version with owned
   MeshGraph result{ owned };
+  // insert the present and ghost data
   for( MeshGraph const & graph: { present, ghosts } )
   {
     result.c2f.insert( std::cbegin( graph.c2f ), std::cend( graph.c2f ) );
@@ -89,6 +92,7 @@ struct UpwardMappings
   std::map< NodeLocIdx, std::vector< CellLocIdx > > n2c;
 };
 
+// converts generic map to arrayOfArrays
 template< class T, class U >
 ArrayOfArrays< localIndex > convertToAoA( std::map< T, std::vector< U > > const & t2u )
 {
@@ -138,6 +142,7 @@ array2d< localIndex, P > convertToA2d( std::map< T, std::vector< U > > const & t
   return t2u_;
 }
 
+// converts map to unordered map
 template< class GI >
 unordered_map< globalIndex, localIndex > convertGlobalToLocalMap( std::map< GI, toLocIdx_t< GI > > const & g2l )
 {
@@ -150,6 +155,7 @@ unordered_map< globalIndex, localIndex > convertGlobalToLocalMap( std::map< GI,
   return g2l_;
 }
 
+// removes MPIRank typed int, changes from std::vector to array1d
 template< typename LI >
 std::map< integer, array1d< localIndex > > toFlavorlessMapping( std::map< MpiRank, std::vector< LI > > const & input )
 {
@@ -219,6 +225,8 @@ FaceMgrImpl makeFlavorlessFaceMgrImpl( std::size_t const & numFaces,
                       toFlavorlessMapping( recv ) );
 }
 
+// Function to create a nodeManager from our local upward mapping data
+// By flavorless, we are indicating that we are changing data-structures to GEOS ones, etc
 NodeMgrImpl makeFlavorlessNodeMgrImpl( std::size_t const & numNodes,
                                        std::map< NodeGlbIdx, std::array< double, 3 > > const & n2pos,
                                        std::map< NodeLocIdx, std::vector< EdgeLocIdx > > const & n2e,
@@ -282,6 +290,7 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l,
   DownwardMappings res;
 
   // Building the `e2n` (edges to nodes) mapping
+  // Simply looping through the meshgraph and converting to local indices
   for( auto const & [egi, ngis]: graph.e2n )
   {
     NodeLocIdx const nli0 = g2l.nodes.at( std::get< 0 >( ngis ) );
@@ -290,8 +299,10 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l,
   }
 
   // Building the `f2n` (face to nodes) and `f2e` (faces to edges) mappings
+  // Loop through meshgraph
   for( auto const & [fgi, edgeInfos]: graph.f2e )
   {
+    // convert to face local index
     FaceLocIdx const & fli = g2l.faces.at( fgi );
 
     std::vector< NodeLocIdx > & nodes = res.f2n[fli];
@@ -300,18 +311,23 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l,
     nodes.reserve( std::size( edgeInfos ) );
     edges.reserve( std::size( edgeInfos ) );
 
+    // Loop over the edges connected to the face
     for( EdgeInfo const & edgeInfo: edgeInfos )
     {
+      // use the meshgraph again to go from edge to node, convert to local indices to fill face2node
       std::tuple< NodeGlbIdx, NodeGlbIdx > const & ngis = graph.e2n.at( edgeInfo.index );
       nodes.emplace_back( edgeInfo.start == 0 ? g2l.nodes.at( std::get< 0 >( ngis ) ) : g2l.nodes.at( std::get< 1 >( ngis ) ) );
 
+      // just convert edge global id to local to fill face2edge
       edges.emplace_back( g2l.edges.at( edgeInfo.index ) );
     }
   }
 
   // Building the `c2n` (cell to nodes), `c2e` (cell to edges) and `c2f` (cell to faces) mappings
+  // loop thorugh meshgraph cells2face
   for( auto const & [cgi, faceInfos]: graph.c2f )
   {
+    // convert to cell local index
     CellLocIdx const & cli = g2l.cells.at( cgi );
 
     std::vector< FaceLocIdx > & faces = res.c2f[cli];
@@ -321,12 +337,14 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l,
     edges.reserve( std::size( faceInfos ) );
 
     // c2f
+    // simple conversion of global to local index is all thats needed
     for( FaceInfo const & faceInfo: faceInfos )
     {
       faces.emplace_back( g2l.faces.at( faceInfo.index ) );
     }
 
     // c2e
+    // loop over the faces in the cell and collect all their edges
     std::set< EdgeLocIdx > tmpEdges;
     for( FaceLocIdx const & fli: faces )
     {
@@ -336,16 +354,19 @@ DownwardMappings buildDownwardMappings( GlobalToLocal const & g2l,
     edges.assign( std::cbegin( tmpEdges ), std::cend( tmpEdges ) );
 
     // c2n
+    // Note how we are hard-coded for hexs
     FaceInfo const & bottomFace = faceInfos.at( 4 ); // (0, 3, 2, 1) // TODO depends on element type.
     FaceInfo const & topFace = faceInfos.at( 5 ); // (4, 5, 6, 7)
 
     std::vector< NodeLocIdx > const & bottomNodes = res.f2n.at( g2l.faces.at( bottomFace.index ) );
     std::vector< NodeLocIdx > const & topNodes = res.f2n.at( g2l.faces.at( topFace.index ) );
 
+    // reserFaceNodes resets the order using the flipped and start info
     std::vector< NodeLocIdx > const bn = resetFaceNodes( bottomNodes, bottomFace.isFlipped, bottomFace.start );
     std::vector< NodeLocIdx > const tn = resetFaceNodes( topNodes, topFace.isFlipped, topFace.start );
 
     // TODO carefully check the ordering...
+    // looks like this is geos order
     std::array< NodeLocIdx, 8 > const tmp{ bn[0], bn[3], bn[2], bn[1], tn[0], tn[1], tn[2], tn[3] };
     res.c2n[cli] = { tmp[0], tmp[1], tmp[3], tmp[2], tmp[4], tmp[5], tmp[7], tmp[6] };
   }
@@ -494,8 +515,12 @@ void buildPods( MeshGraph const & owned,
                 GhostSend const & send,
                 MeshMappingImpl & meshMappings )
 {
+  // start by merging the 3 mesh graphs for the different types of data on the graph into 1 struct
   MeshGraph const graph = mergeMeshGraph( owned, present, ghosts );
 
+  // create GlobalToLocal, which maps global IDs to rank local IDs for (nodes, edges, faces, cells)
+  // mapKeys is just a utility which extracts keys
+  // buildGlobalToLocalMap just takes the global ids and maps them to 1, 2, 3, ...
   GlobalToLocal const g2l{
     buildGlobalToLocalMap( mapKeys< std::set >( graph.n2pos ) ),
     buildGlobalToLocalMap( mapKeys< std::set >( graph.e2n ) ),
@@ -503,9 +528,17 @@ void buildPods( MeshGraph const & owned,
     buildGlobalToLocalMap( mapKeys< std::set >( graph.c2f ) )
   };
 
+  // Now we build the mappings used by GEOS
+  // Downward - edges2nodes, faces2edges, faces2nodes, cells2faces, cells2edges, cells2nodes
+  // The difference is that these use local indices, thus we built g2l
+  // there are assumptions that we are working on hexs here
   DownwardMappings const downwardMappings = buildDownwardMappings( g2l, graph );
+  // invert to downward mappings to get e2f, f2c, n2e, n2f, n2c
   UpwardMappings const upwardMappings = buildUpwardMappings( downwardMappings );
 
+  // Now we can make our node manager
+  // NodeMgrImpl inherits from nodeManager (see pods.hpp) (interface with getters and setters)
+  // We basically have all the data, this function just casts it into the proper GEOS types
   NodeMgrImpl nodeMgr = makeFlavorlessNodeMgrImpl( std::size( g2l.nodes ),
                                                    graph.n2pos,
                                                    upwardMappings.n2e,
@@ -514,14 +547,14 @@ void buildPods( MeshGraph const & owned,
                                                    g2l.nodes,
                                                    invertGhostSend( send.nodes, g2l.nodes ),
                                                    invertGhostRecv( recv.nodes, g2l.nodes ) );
-
+  // same for edge manager
   EdgeMgrImpl edgeMgr = makeFlavorlessEdgeMgrImpl( std::size( g2l.edges ),
                                                    downwardMappings.e2n,
                                                    upwardMappings.e2f,
                                                    g2l.edges,
                                                    invertGhostSend( send.edges, g2l.edges ),
                                                    invertGhostRecv( recv.edges, g2l.edges ) );
-
+  // same for face manager
   FaceMgrImpl faceMgr = makeFlavorlessFaceMgrImpl( std::size( g2l.faces ),
                                                    downwardMappings.f2n,
                                                    downwardMappings.f2e,
@@ -529,7 +562,7 @@ void buildPods( MeshGraph const & owned,
                                                    g2l.faces,
                                                    invertGhostSend( send.faces, g2l.faces ),
                                                    invertGhostRecv( recv.faces, g2l.faces ) );
-
+  // same for cell block
   CellBlkImpl cellBlock = makeFlavorlessCellBlkImpl( std::size( g2l.cells ),
                                                      downwardMappings.c2n,
                                                      downwardMappings.c2e,
@@ -537,9 +570,9 @@ void buildPods( MeshGraph const & owned,
                                                      g2l.cells,
                                                      invertGhostSend( send.cells, g2l.cells ),
                                                      invertGhostRecv( recv.cells, g2l.cells ) );
-
   CellMgrImpl cellMgr( std::move( cellBlock ) );
 
+  // populate meshMappings with the created managers
   meshMappings.setCellMgr( std::move( cellMgr ) );
   meshMappings.setEdgeMgr( std::move( edgeMgr ) );
   meshMappings.setFaceMgr( std::move( faceMgr ) );
diff --git a/src/coreComponents/mesh/generators/NewGhosting.cpp b/src/coreComponents/mesh/generators/NewGhosting.cpp
index da66f5120e1..47ffc7e38e4 100644
--- a/src/coreComponents/mesh/generators/NewGhosting.cpp
+++ b/src/coreComponents/mesh/generators/NewGhosting.cpp
@@ -1432,13 +1432,17 @@ void doTheNewGhosting( vtkSmartPointer< vtkDataSet > mesh,
 //  }
 //  MpiWrapper::barrier();
 
-  // Next do the atual ghosting - i.e. figure out the information each rank must send to and receive from other ranks
+  // Next do the actual ghosting - i.e. figure out the information each rank must send to and receive from other ranks
   // This is where we use parallel linear algebra package (right now, Trilinos) to build an adjacency matrix for the entire mesh (treated as a graph)
   // By doing some clever matrix products we can figure out which ranks need what automagically
   // `ghosts` is also a MeshGraph like owned and present, but described the geometric entities which are ghosted onto the rank from another owner
   // recv and send are of type GhostRecv and GhostSend (in BuildPods.hpp) and describe what (nodes, edges, faces, cells) each rank needs to receive from and send to others
   auto const [ghosts, recv, send] = performGhosting( owned, present, matrixOffsets, curRank );
 
+  // Finally, we use everything we have to populate the mappings that interface with the rest of GEOS
+  // Note that we have already done the ghosting, so these mappings are set once and for all
+  // Unlike previous implementation, where we populated these based on the current rank info, and then tried to fix any inconsistencies with ghosting
+  // output is the populated meshMappings, which contains node manager, edge manager, etc. as well as the ghost send/recv
   buildPods( owned, present, ghosts, recv, send, meshMappings );
 }
 

From 4e56707ba6861e1bad1856763d18da387b3e7339 Mon Sep 17 00:00:00 2001
From: Ryan Aronson <raronson0031@gmail.com>
Date: Mon, 1 Jul 2024 23:46:00 +0000
Subject: [PATCH 9/9] make new ghosting active/deactive by setting command line
 flag (-g 1)

---
 .../common/initializeEnvironment.hpp          |  3 +++
 .../mainInterface/ProblemManager.cpp          | 20 ++++++++++---------
 .../mainInterface/ProblemManager.hpp          |  7 +------
 .../mainInterface/initialization.cpp          |  7 +++++++
 4 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/coreComponents/common/initializeEnvironment.hpp b/src/coreComponents/common/initializeEnvironment.hpp
index 044b1977294..b91ac24dc80 100644
--- a/src/coreComponents/common/initializeEnvironment.hpp
+++ b/src/coreComponents/common/initializeEnvironment.hpp
@@ -88,6 +88,9 @@ struct CommandLineOptions
 
   /// Print memory usage in data repository
   real64 printMemoryUsage = -1.0;
+
+  /// Use new ghosting strategy
+  integer useNewGhosting = 0; 
 };
 
 /**
diff --git a/src/coreComponents/mainInterface/ProblemManager.cpp b/src/coreComponents/mainInterface/ProblemManager.cpp
index cf71bee0eff..4fbd8eb5869 100644
--- a/src/coreComponents/mainInterface/ProblemManager.cpp
+++ b/src/coreComponents/mainInterface/ProblemManager.cpp
@@ -142,11 +142,11 @@ ProblemManager::ProblemManager( conduit::Node & root ):
     setRestartFlags( RestartFlags::WRITE ).
     setDescription( "Whether to disallow using pinned memory allocations for MPI communication buffers." );
 
-  m_useNewGhosting = 1;
-//  registerWrapper( viewKeysStruct::useNewGhostingString(), &m_useNewGhosting ).
-//    setInputFlag( InputFlags::OPTIONAL ).
-//    setApplyDefaultValue( 0 ).
-//    setDescription( "Controls the use of the new ghosting implementation." );
+  commandLine.registerWrapper< integer >( viewKeys.useNewGhosting.key( ) ).
+    setApplyDefaultValue( 0 ).
+    setRestartFlags( RestartFlags::WRITE ).
+    setDescription( "Whether to use new ghosting strategy" );
+
 }
 
 ProblemManager::~ProblemManager()
@@ -191,6 +191,7 @@ void ProblemManager::parseCommandLineInput()
   commandLine.getReference< integer >( viewKeys.overridePartitionNumbers ) = opts.overridePartitionNumbers;
   commandLine.getReference< integer >( viewKeys.useNonblockingMPI ) = opts.useNonblockingMPI;
   commandLine.getReference< integer >( viewKeys.suppressPinned ) = opts.suppressPinned;
+  commandLine.getReference< integer >( viewKeys.useNewGhosting ) = opts.useNewGhosting;
 
   string & outputDirectory = commandLine.getReference< string >( viewKeys.outputDirectory );
   outputDirectory = opts.outputDirectory;
@@ -633,10 +634,12 @@ void ProblemManager::generateMesh()
   GEOS_MARK_FUNCTION;
   DomainPartition & domain = getDomainPartition();
 
+  Group const & commandLine = getGroup< Group >( groupKeys.commandLine );
+
   MeshManager & meshManager = this->getGroup< MeshManager >( groupKeys.meshManager );
 
-  GEOS_LOG_RANK( "m_useNewGhosting = " << m_useNewGhosting );
-  meshManager.generateMeshes( m_useNewGhosting, domain );
+  GEOS_LOG_RANK( "useNewGhosting = " << commandLine.getReference< integer >( viewKeys.useNewGhosting ) );
+  meshManager.generateMeshes( commandLine.getReference< integer >( viewKeys.useNewGhosting ), domain );
 
   // get all the discretizations from the numerical methods.
   // map< pair< mesh body name, pointer to discretization>, array of region names >
@@ -658,7 +661,7 @@ void ProblemManager::generateMesh()
     }
     else
     {
-      if( m_useNewGhosting )
+      if( commandLine.getReference< integer >( viewKeys.useNewGhosting ) )
       {
         GEOS_LOG_RANK_0( "Generating the mesh levels for the new ghosting." );
         generateMeshLevelFreeFct( meshBody.getMeshMappings(),
@@ -685,7 +688,6 @@ void ProblemManager::generateMesh()
     }
   } );
 
-  Group const & commandLine = this->getGroup< Group >( groupKeys.commandLine );
   integer const useNonblockingMPI = commandLine.getReference< integer >( viewKeys.useNonblockingMPI );
 //  domain.setupBaseLevelMeshGlobalInfo();
 
diff --git a/src/coreComponents/mainInterface/ProblemManager.hpp b/src/coreComponents/mainInterface/ProblemManager.hpp
index 24da2841f82..f4fe46d05e7 100644
--- a/src/coreComponents/mainInterface/ProblemManager.hpp
+++ b/src/coreComponents/mainInterface/ProblemManager.hpp
@@ -230,9 +230,7 @@ class ProblemManager : public dataRepository::Group
     dataRepository::ViewKey outputDirectory          = {"outputDirectory"};          ///< Output directory key
     dataRepository::ViewKey useNonblockingMPI        = {"useNonblockingMPI"};        ///< Flag to use non-block MPI key
     dataRepository::ViewKey suppressPinned           = {"suppressPinned"};           ///< Flag to suppress use of pinned memory key
-    constexpr static char const * useNewGhostingString()
-    { return "useNewGhosting"; }
-
+    dataRepository::ViewKey useNewGhosting           = {"useNewGhosting"};           ///< Flag to use new ghosting strategies
   } viewKeys; ///< Command line input viewKeys
 
   /// Child group viewKeys
@@ -385,9 +383,6 @@ class ProblemManager : public dataRepository::Group
 
   /// The FieldSpecificationManager
   FieldSpecificationManager * m_fieldSpecificationManager;
-
-  /// Whether we should use the new ghosting implementation.
-  integer m_useNewGhosting = 0;
 };
 
 } /* namespace geos */
diff --git a/src/coreComponents/mainInterface/initialization.cpp b/src/coreComponents/mainInterface/initialization.cpp
index 75ea5de3b61..0d28cfaf534 100644
--- a/src/coreComponents/mainInterface/initialization.cpp
+++ b/src/coreComponents/mainInterface/initialization.cpp
@@ -103,6 +103,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char *
     TRACE_DATA_MIGRATION,
     MEMORY_USAGE,
     PAUSE_FOR,
+    NEW_GHOST,
   };
 
   const option::Descriptor usage[] =
@@ -123,6 +124,7 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char *
     { TRACE_DATA_MIGRATION, 0, "", "trace-data-migration", Arg::None, "\t--trace-data-migration, \t Trace host-device data migration" },
     { MEMORY_USAGE, 0, "m", "memory-usage", Arg::nonEmpty, "\t-m, --memory-usage, \t Minimum threshold for printing out memory allocations in a member of the data repository." },
     { PAUSE_FOR, 0, "", "pause-for", Arg::numeric, "\t--pause-for, \t Pause geosx for a given number of seconds before starting execution" },
+    { NEW_GHOST, 0, "g", "new-ghosting", Arg::numeric, "\t-g, --new-ghosting, \t Use new ghosting strategy" },
     { 0, 0, nullptr, nullptr, nullptr, nullptr }
   };
 
@@ -237,6 +239,11 @@ std::unique_ptr< CommandLineOptions > parseCommandLineOptions( int argc, char *
         std::this_thread::sleep_for( std::chrono::seconds( duration ) );
       }
       break;
+      case NEW_GHOST:
+      {
+        commandLineOptions->useNewGhosting = 1;
+      }
+      break;
     }
   }