From 91acdafcf7d5cc0754ca7b31f286ad6f6f950eca Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Thu, 13 Jun 2024 12:33:47 -0500
Subject: [PATCH 1/9] Implement linear solve

---
 .../interfaces/blaslapack/BlasLapackLA.cpp    | 171 +++++++++++++-----
 .../interfaces/blaslapack/BlasLapackLA.hpp    |  74 ++++++++
 2 files changed, 203 insertions(+), 42 deletions(-)

diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
index d05bbf8aef2..3b57caeeedf 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
@@ -379,6 +379,97 @@ void matrixLeastSquaresSolutionSolve( arraySlice2d< real64, USD > const & A,
   GEOS_ERROR_IF( INFO != 0, "The algorithm computing matrix linear system failed to converge." );
 }
 
+template< int USD, bool INPLACE >
+struct MatrixInPlace
+{
+  using type = arraySlice2d< real64, USD >;
+};
+
+template< int USD >
+struct MatrixInPlace< USD, false >
+{
+  using type = arraySlice2d< real64 const, USD >;
+};
+
+template< bool INPLACE, int USD >
+void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
+                        arraySlice2d< real64 const, USD > const & B,
+                        arraySlice2d< real64, USD > const & X )
+{
+  // --- Check that source matrix is square
+  int const N = LvArray::integerConversion< int >( A.size( 0 ) );
+  GEOS_ASSERT_MSG( N > 0 &&
+                   N == A.size( 1 ),
+                   "Matrix must be square" );
+
+  // --- Check that rhs B has appropriate dimensions
+  GEOS_ASSERT_MSG( B.size( 0 ) == N,
+                   "right-hand-side matrix has wrong dimensions" );
+  int const M = LvArray::integerConversion< int >( B.size( 1 ) );
+
+  // --- Check that solution X has appropriate dimensions
+  GEOS_ASSERT_MSG( X.size( 0 ) == N &&
+                   X.size( 1 ) == M,
+                   "solution matrix has wrong dimensions" );
+
+  real64 * matrixData = nullptr;
+  array2d< real64 > LU;   // Space for LU-factors
+  if constexpr ( INPLACE )
+  {
+    matrixData = A.dataIfContiguous();
+  }
+  else
+  {
+    LU.resize( N, N );
+    matrixData = LU.data();
+    // Direct copy here ignoring permutation
+    int const INCX = 1;
+    int const INCY = 1;
+    int const K = LvArray::integerConversion< int >( A.size( ) );
+    GEOS_dcopy( &K, A.dataIfContiguous(), &INCX, matrixData, &INCY );
+  }
+
+  array1d< int > IPIV( N );
+  int INFO;
+  char const TRANS = (USD == MatrixLayout::ROW_MAJOR) ? 'T' : 'N';
+
+  GEOS_dgetrf( &N, &N, matrixData, &N, IPIV.data(), &INFO );
+
+  GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrf error code: " << INFO );
+
+  // If copyB is false then it means that on entry B == X
+  if constexpr ( !INPLACE )
+  {
+    int const INCX = 1;
+    int const INCY = 1;
+    int const K = LvArray::integerConversion< int >( B.size( ) );
+    GEOS_dcopy( &K, B.dataIfContiguous(), &INCX, X.dataIfContiguous(), &INCY );
+  }
+
+  GEOS_dgetrs( &TRANS, &N, &M, matrixData, &N, IPIV.data(), X.dataIfContiguous(), &N, &INFO );
+
+  GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrs error code: " << INFO );
+}
+
+template< bool INPLACE, int USD >
+void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
+                        arraySlice1d< real64 const > const & b,
+                        arraySlice1d< real64 > const & x )
+{
+  // --- Check that b and x have the same size
+  int const N = LvArray::integerConversion< int >( b.size( 0 ) );
+  GEOS_ASSERT_MSG( 0 < N && x.size() == N,
+                   "right-hand-side and/or solution have wrong dimensions" );
+
+  // Create 2d slices
+  int const dims[2] = {N, 1};
+  int const strides[2] = {1, 1};
+  arraySlice2d< real64 const, USD > B( b.dataIfContiguous(), dims, strides );
+  arraySlice2d< real64, USD > X( x.dataIfContiguous(), dims, strides );
+
+  solveLinearSystem< INPLACE >( A, B, X );
+}
+
 } // namespace detail
 
 real64 BlasLapackLA::determinant( arraySlice2d< real64 const, MatrixLayout::ROW_MAJOR > const & A )
@@ -885,60 +976,56 @@ void BlasLapackLA::matrixEigenvalues( MatRowMajor< real64 const > const & A,
   matrixEigenvalues( AT.toSliceConst(), lambda );
 }
 
-void BlasLapackLA::solveLinearSystem( MatColMajor< real64 const > const & A,
+void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 const > const & A,
                                       arraySlice1d< real64 const > const & rhs,
                                       arraySlice1d< real64 > const & solution )
 {
-  // --- Check that source matrix is square
-  int const NN = LvArray::integerConversion< int >( A.size( 0 ));
-  GEOS_ASSERT_MSG( NN > 0 &&
-                   NN == A.size( 1 ),
-                   "Matrix must be square" );
-
-  // --- Check that rhs and solution have appropriate dimension
-  GEOS_ASSERT_MSG( rhs.size( 0 ) == NN,
-                   "right-hand-side vector has wrong dimensions" );
-
-  GEOS_ASSERT_MSG( solution.size( 0 ) == NN,
-                   "solution vector has wrong dimensions" );
-
-  array1d< int > IPIV;
-  IPIV.resize( NN );
-  int const NRHS = 1; // we only allow for 1 rhs vector.
-  int INFO;
-
-  // make a copy of A, since dgeev destroys contents
-  array2d< real64, MatrixLayout::COL_MAJOR_PERM > ACOPY( A.size( 0 ), A.size( 1 ) );
-  BlasLapackLA::matrixCopy( A, ACOPY );
-
-  // copy the rhs in the solution vector
-  BlasLapackLA::vectorCopy( rhs, solution );
-
-  GEOS_dgetrf( &NN, &NN, ACOPY.data(), &NN, IPIV.data(), &INFO );
+  detail::solveLinearSystem< false, MatrixLayout::ROW_MAJOR >( A, rhs, solution );
+}
 
-  GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrf error code: " << INFO );
+void BlasLapackLA::solveLinearSystem( MatColMajor< real64 const > const & A,
+                                      arraySlice1d< real64 const > const & rhs,
+                                      arraySlice1d< real64 > const & solution )
+{
+  detail::solveLinearSystem< false, MatrixLayout::COL_MAJOR >( A, rhs, solution );
+}
 
-  GEOS_dgetrs( "N", &NN, &NRHS, ACOPY.data(), &NN, IPIV.data(), solution.dataIfContiguous(), &NN, &INFO );
+void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 > const & A,
+                                      Vec< real64 > const & rhs )
+{
+  detail::solveLinearSystem< true, MatrixLayout::ROW_MAJOR >( A, rhs.toSliceConst(), rhs );
+}
 
-  GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrs error code: " << INFO );
+void BlasLapackLA::solveLinearSystem( MatColMajor< real64 > const & A,
+                                      Vec< real64 > const & rhs )
+{
+  detail::solveLinearSystem< true, MatrixLayout::COL_MAJOR >( A, rhs.toSliceConst(), rhs );
 }
 
 void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 const > const & A,
-                                      arraySlice1d< real64 const > const & rhs,
-                                      arraySlice1d< real64 > const & solution )
+                                      MatRowMajor< real64 const > const & rhs,
+                                      MatRowMajor< real64 > const & solution )
 {
-  array2d< real64, MatrixLayout::COL_MAJOR_PERM > AT( A.size( 0 ), A.size( 1 ) );
+  detail::solveLinearSystem< false, MatrixLayout::ROW_MAJOR >( A, rhs, solution );
+}
 
-  // convert A to a column major format
-  for( int i = 0; i < A.size( 0 ); ++i )
-  {
-    for( int j = 0; j < A.size( 1 ); ++j )
-    {
-      AT( i, j ) = A( i, j );
-    }
-  }
+void BlasLapackLA::solveLinearSystem( MatColMajor< real64 const > const & A,
+                                      MatColMajor< real64 const > const & rhs,
+                                      MatColMajor< real64 > const & solution )
+{
+  detail::solveLinearSystem< false, MatrixLayout::COL_MAJOR >( A, rhs, solution );
+}
 
-  solveLinearSystem( AT.toSliceConst(), rhs, solution );
+void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 > const & A,
+                                      MatRowMajor< real64 > const & rhs )
+{
+  detail::solveLinearSystem< true, MatrixLayout::ROW_MAJOR >( A, rhs.toSliceConst(), rhs );
+}
+
+void BlasLapackLA::solveLinearSystem( MatColMajor< real64 > const & A,
+                                      MatColMajor< real64 > const & rhs )
+{
+  detail::solveLinearSystem< true, MatrixLayout::COL_MAJOR >( A, rhs.toSliceConst(), rhs );
 }
 
 void BlasLapackLA::matrixLeastSquaresSolutionSolve( arraySlice2d< real64 const, MatrixLayout::ROW_MAJOR > const & A,
diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
index adc72007e49..63bd7fad626 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
@@ -467,6 +467,80 @@ struct BlasLapackLA
                                  Vec< real64 const > const & rhs,
                                  Vec< real64 > const & solution );
 
+  /**
+   * @brief Solves the linear system ;
+   * \p A  \p solution = \p rhs.
+   *
+   * @details The method is intended for the solution of a small dense linear system.
+   * This solves the system in-place without allocating extra memory for the matrix or the solution. This means
+   * that at on exit the matrix is modified replaced by the LU factors and the right hand side vector is
+   * replaced by the solution.
+   * It employs lapack method dgetr.
+   *
+   * @param [in/out]  A GEOSX array2d. The matrix. On exit this will be replaced by the factorisation of A
+   * @param [in/out]  rhs GEOSX array1d. The right hand side. On exit this will be the solution
+   */
+  static void solveLinearSystem( MatRowMajor< real64 > const & A,
+                                 Vec< real64 > const & rhs );
+
+  /**
+   * @copydoc solveLinearSystem( MatRowMajor< real64 > const &, Vec< real64 > const & )
+   *
+   * @note this function first applies a matrix permutation and then calls the row major version of the function.
+   */
+  static void solveLinearSystem( MatColMajor< real64 > const & A,
+                                 Vec< real64 > const & rhs );
+
+  /**
+   * @brief Solves the linear system ;
+   * \p A  \p solution = \p rhs.
+   *
+   * @details The method is intended for the solution of a small dense linear system in which A is an NxN matrix, the
+   * right-hand-side and the solution are matrices of size NxM.
+   * It employs lapack method dgetr.
+   *
+   * @param [in]  A GEOSX array2d.
+   * @param [in]  rhs GEOSX array2d.
+   * @param [out] solution GEOSX array2d.
+   */
+  static void solveLinearSystem( MatRowMajor< real64 const > const & A,
+                                 MatRowMajor< real64 const > const & rhs,
+                                 MatRowMajor< real64 > const & solution );
+
+  /**
+   * @copydoc solveLinearSystem( MatRowMajor< real64 const > const &, MatRowMajor< real64 const > const &, MatRowMajor< const > const & )
+   *
+   * @note this function first applies a matrix permutation and then calls the row major version of the function.
+   */
+  static void solveLinearSystem( MatColMajor< real64 const > const & A,
+                                 MatColMajor< real64 const > const & rhs,
+                                 MatColMajor< real64 > const & solution );
+
+  /**
+   * @brief Solves the linear system ;
+   * \p A  \p solution = \p rhs.
+   *
+   * @details The method is intended for the solution of a small dense linear system in which A is an NxN matrix, the
+   * right-hand-side and the solution are matrices of size NxM.
+   * This solves the system in-place without allocating extra memory for the matrix or the solution. This means
+   * that at on exit the matrix is modified replaced by the LU factors and the right hand side vector is
+   * replaced by the solution.
+   * It employs lapack method dgetr.
+   *
+   * @param [in/out]  A GEOSX array2d. The matrix. On exit this will be replaced by the factorisation of A
+   * @param [in/out]  rhs GEOSX array1d. The right hand side. On exit this will be the solution
+   */
+  static void solveLinearSystem( MatRowMajor< real64 > const & A,
+                                 MatRowMajor< real64 > const & rhs );
+
+  /**
+   * @copydoc solveLinearSystem( MatRowMajor< real64 > const &, MatRowMajor< real64 > const & )
+   *
+   * @note this function first applies a matrix permutation and then calls the row major version of the function.
+   */
+  static void solveLinearSystem( MatColMajor< real64 > const & A,
+                                 MatColMajor< real64 > const & rhs );
+
   /**
    * @brief Vector copy;
    * \p Y = \p X.

From a6e7fba0b4d1b4a560a6befd9b52c38d2c812012 Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Mon, 17 Jun 2024 14:37:06 -0500
Subject: [PATCH 2/9] Add some unit tests

---
 .../blaslapack/BlasLapackFunctions.h          |  13 +
 .../interfaces/blaslapack/BlasLapackLA.cpp    |  70 ++-
 .../unitTests/CMakeLists.txt                  |   3 +-
 .../unitTests/testSolveLinearSystem.cpp       | 499 ++++++++++++++++++
 4 files changed, 580 insertions(+), 5 deletions(-)
 create mode 100644 src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp

diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h
index fe66dcfc2ce..706b805da05 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h
@@ -175,6 +175,19 @@ void GEOS_dgels( char const * TRANS,
                  int const * LWORK,
                  int * INFO );
 
+#define GEOS_dtrsm FORTRAN_MANGLE( dtrsm )
+void GEOS_dtrsm( char const * SIDE,
+char const * UPLO,
+char const * TRANSA,
+char const * DIAG,
+int const * M,
+                     int const * N,
+                     double const * ALPHA,
+                 double * A,
+                 int const * LDA,
+                 double * B,
+                 int const * LDB );
+
 }
 #endif
 
diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
index 3b57caeeedf..c93113c7687 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
@@ -412,6 +412,11 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
                    X.size( 1 ) == M,
                    "solution matrix has wrong dimensions" );
 
+  // --- Check that everything is contiguous
+  GEOS_ASSERT_MSG( A.isContiguous(), "Matrix is not contiguous" );
+  GEOS_ASSERT_MSG( B.isContiguous(), "right-hand-side matrix is not contiguous" );
+  GEOS_ASSERT_MSG( X.isContiguous(), "solution matrix is not contiguous" );
+
   real64 * matrixData = nullptr;
   array2d< real64 > LU;   // Space for LU-factors
   if constexpr ( INPLACE )
@@ -431,13 +436,22 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
 
   array1d< int > IPIV( N );
   int INFO;
-  char const TRANS = (USD == MatrixLayout::ROW_MAJOR) ? 'T' : 'N';
+  char transA[3] = {'N','T','T'};
+  char matA[2] = {'L','U'};
+  char const TRANS = (USD == MatrixLayout::ROW_MAJOR) ? transA[0] : 'N';
 
   GEOS_dgetrf( &N, &N, matrixData, &N, IPIV.data(), &INFO );
 
   GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrf error code: " << INFO );
 
-  // If copyB is false then it means that on entry B == X
+  std::cout << "IPIV:";
+  for( integer i = 0; i < N; ++i )
+  {
+    std::cout << " " << IPIV[i];
+  }
+  std::cout << "\n";
+
+  // If INPLACE is false then it means that on entry B == X
   if constexpr ( !INPLACE )
   {
     int const INCX = 1;
@@ -446,7 +460,55 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
     GEOS_dcopy( &K, B.dataIfContiguous(), &INCX, X.dataIfContiguous(), &INCY );
   }
 
-  GEOS_dgetrs( &TRANS, &N, &M, matrixData, &N, IPIV.data(), X.dataIfContiguous(), &N, &INFO );
+  auto printMatrix = []( auto const & AA )
+  {
+    int const MA = LvArray::integerConversion< int >( AA.size( 0 ) );
+    int const NA = LvArray::integerConversion< int >( AA.size( 1 ) );
+    std::cout << std::fixed << std::setprecision( 4 );
+    for( int i = 0; i < MA; i++ )
+    {
+      for( int j = 0; j < NA; j++ )
+      {
+        std::cout << " " << std::setw( 10 ) << AA( i, j );
+      }
+      std::cout << "\n";
+    }
+    std::cout << "------------------------------------------------\n";
+  };
+
+  if constexpr (USD == MatrixLayout::ROW_MAJOR)
+  {
+    printMatrix( LU );
+    //int const K1 = 1;
+    //int const K2 = N;
+    //int const INCX = 1;
+    //GEOS_dlaswp( &M, X.dataIfContiguous(), &N, &K1, &K2, IPIV.data(), &INCX );
+
+    printMatrix( X );
+    double const ALPHA = 1.0;
+    GEOS_dtrsm( "R", &matA[0], &transA[1], "N", &N, &M, &ALPHA, matrixData, &N, X.dataIfContiguous(), &N );
+    printMatrix( X );
+    for( int i = N-1; i >= 0; i-- )
+    {
+      int j = IPIV[i] - 1;
+      for( int k = 0; k < M; ++k )
+      {
+        real64 const t = X( i, k );
+        X( i, k ) = X( j, k );
+        X( j, k ) = t;
+      }
+    }
+    printMatrix( X );    
+    GEOS_dtrsm( "R", &matA[1], &transA[2], "N", &N, &M, &ALPHA, matrixData, &N, X.dataIfContiguous(), &N );
+    printMatrix( X );
+
+//CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
+  }
+  else
+  {
+    GEOS_UNUSED_VAR( printMatrix );
+    GEOS_dgetrs( &TRANS, &N, &M, matrixData, &N, IPIV.data(), X.dataIfContiguous(), &N, &INFO );
+  }
 
   GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrs error code: " << INFO );
 }
@@ -459,7 +521,7 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
   // --- Check that b and x have the same size
   int const N = LvArray::integerConversion< int >( b.size( 0 ) );
   GEOS_ASSERT_MSG( 0 < N && x.size() == N,
-                   "right-hand-side and/or solution have wrong dimensions" );
+                   "right-hand-side and/or solution has wrong dimensions" );
 
   // Create 2d slices
   int const dims[2] = {N, 1};
diff --git a/src/coreComponents/denseLinearAlgebra/unitTests/CMakeLists.txt b/src/coreComponents/denseLinearAlgebra/unitTests/CMakeLists.txt
index 1b7d0eefb3d..114dee90aac 100644
--- a/src/coreComponents/denseLinearAlgebra/unitTests/CMakeLists.txt
+++ b/src/coreComponents/denseLinearAlgebra/unitTests/CMakeLists.txt
@@ -1,5 +1,6 @@
 set( serial_tests
-     testBlasLapack.cpp )
+     testBlasLapack.cpp
+     testSolveLinearSystem.cpp )
 
 set( dependencyList gtest denseLinearAlgebra )
 
diff --git a/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
new file mode 100644
index 00000000000..9de9984a2fd
--- /dev/null
+++ b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
@@ -0,0 +1,499 @@
+/*
+ * ------------------------------------------------------------------------------------------------------------
+ * SPDX-License-Identifier: LGPL-2.1-only
+ *
+ * Copyright (c) 2018-2020 Lawrence Livermore National Security LLC
+ * Copyright (c) 2018-2020 The Board of Trustees of the Leland Stanford Junior University
+ * Copyright (c) 2018-2020 TotalEnergies
+ * Copyright (c) 2019-     GEOSX Contributors
+ * All rights reserved
+ *
+ * See top level LICENSE, COPYRIGHT, CONTRIBUTORS, NOTICE, and ACKNOWLEDGEMENTS files for details.
+ * ------------------------------------------------------------------------------------------------------------
+ */
+
+/**
+ * @file testSolveLinearSystem.cpp
+ */
+
+// Source includes
+#include "denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp"
+
+#include "gtest/gtest.h"
+
+using namespace geos;
+
+using INDEX_TYPE = std::ptrdiff_t;
+
+constexpr INDEX_TYPE MAX_SIZE = 20;
+constexpr real64 machinePrecision = 1.0e2 * LvArray::NumericLimits< real64 >::epsilon;
+
+// Test matrices
+enum TestMatrixType
+{
+  LAPLACE,
+  SAMPLING,
+  REDHEFFER,
+  GRCAR
+};
+
+void random_permutation( arraySlice1d< INDEX_TYPE > const & pivot )
+{
+  INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( pivot.size() );
+  GEOS_ASSERT( 2 < N );
+
+  for( INDEX_TYPE j = 0; j < N; j++ )
+  {
+    pivot[j] = j;
+  }
+
+  StackArray< real64, 1, MAX_SIZE > ordering( N );
+  BlasLapackLA::vectorRand( ordering.toSlice() );
+
+  for( INDEX_TYPE j = 0; j < N; j++ )
+  {
+    INDEX_TYPE const i = static_cast< INDEX_TYPE >(ordering[j]*N);
+    INDEX_TYPE const pi = pivot[i];
+    pivot[i] = pivot[j];
+    pivot[j] = pi;
+  }
+}
+
+template< TestMatrixType MATRIX_TYPE >
+struct TestMatrix {};
+
+template<>
+struct TestMatrix< TestMatrixType::LAPLACE >
+{
+  template< int USD >
+  static void create( arraySlice2d< real64, USD > const & A )
+  {
+    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
+    GEOS_ASSERT( 2 < N );
+    LvArray::forValuesInSlice( A, []( real64 & a ){ a = 0.0; } );
+    for( INDEX_TYPE i = 0; i < N-1; i++ )
+    {
+      A( i+1, i ) = -1.0;
+      A( i+1, i+1 ) = 2.0;
+      A( i, i+1 ) = -1.0;
+    }
+    A( 0, 0 ) = 2.0;
+  }
+};
+
+template<>
+struct TestMatrix< TestMatrixType::SAMPLING >
+{
+  template< int USD >
+  static void create( arraySlice2d< real64, USD > const & A )
+  {
+    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
+    GEOS_ASSERT( 2 < N );
+    StackArray< INDEX_TYPE, 1, MAX_SIZE > pivot( N );
+    random_permutation( pivot.toSlice() );
+
+    real64 minajj = LvArray::NumericLimits< real64 >::max;
+    for( INDEX_TYPE j = 1; j <= N; j++ )
+    {
+      real64 ajj = 0.0;
+      INDEX_TYPE const pj = pivot[j-1];
+      for( INDEX_TYPE i = 1; i <= N; i++ )
+      {
+        if( i != j )
+        {
+          A( i-1, pj ) = 0.0; //static_cast< real64 >(i)/static_cast< real64 >(i-j);
+          ajj += A( i-1, pj );
+        }
+      }
+      A( j-1, pj ) = ajj;
+      minajj = LvArray::math::min( ajj, minajj );
+    }
+    for( INDEX_TYPE j = 0; j < N; j++ )
+    {
+      A( j, pivot[j] ) -= minajj;
+      A( j, pivot[j] ) += 1.0;
+    }
+  }
+};
+
+template<>
+struct TestMatrix< TestMatrixType::REDHEFFER >
+{
+  template< int USD >
+  static void create( arraySlice2d< real64, USD > const & A )
+  {
+    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
+    GEOS_ASSERT( 2 < N );
+    for( INDEX_TYPE i = 1; i <= N; i++ )
+    {
+      for( INDEX_TYPE j = 1; j <= N; j++ )
+      {
+        A( i-1, j-1 ) = (j==1) || ((j%i)==0) ? 1.0 : 0.0;
+      }
+    }
+  }
+};
+
+template<>
+struct TestMatrix< TestMatrixType::GRCAR >
+{
+  template< int USD >
+  static void create( arraySlice2d< real64, USD > const & A )
+  {
+    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
+    GEOS_ASSERT( 4 < N );
+    LvArray::forValuesInSlice( A, []( real64 & a ){ a = 0.0; } );
+    for( INDEX_TYPE i = 0; i < N-1; i++ )
+    {
+      A( i+1, i ) = -1.0;
+    }
+    for( INDEX_TYPE c = 0; c < 4; c++ )
+    {
+      for( INDEX_TYPE i = 0; i < N-c; i++ )
+      {
+        A( i, i+c ) = 1.0;
+      }
+    }
+  }
+};
+
+// Local naive matrix-vector multiply
+template< int USD >
+void matrix_vector_multiply( arraySlice2d< real64 const, USD > const & A,
+                             arraySlice1d< real64 const > const & x,
+                             arraySlice1d< real64 > const & b )
+{
+  int const N = LvArray::integerConversion< int >( A.size( 0 ) );
+  for( INDEX_TYPE i = 0; i < N; ++i )
+  {
+    real64 bi = 0.0;
+    for( INDEX_TYPE j = 0; j < N; ++j )
+    {
+      bi += A( i, j )*x( j );
+    }
+    b( i ) = bi;
+  }
+}
+
+// Local naive matrix-matrix multiply
+template< int USD >
+void matrix_matrix_multiply( arraySlice2d< real64 const, USD > const & A,
+                             arraySlice2d< real64 const, USD > const & X,
+                             arraySlice2d< real64, USD > const & B )
+{
+  INDEX_TYPE const K = LvArray::integerConversion< INDEX_TYPE >( A.size( 1 ) );
+  INDEX_TYPE const M = LvArray::integerConversion< INDEX_TYPE >( X.size( 0 ) );
+  INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( X.size( 1 ) );
+  for( INDEX_TYPE i = 0; i < M; ++i )
+  {
+    for( INDEX_TYPE j = 0; j < N; ++j )
+    {
+      real64 bij = 0.0;
+      for( INDEX_TYPE k = 0; k < K; ++k )
+      {
+        bij += A( i, k )*X( k, j );
+      }
+      B( i, j ) = bij;
+    }
+  }
+}
+
+template< typename MatrixType >
+struct ArrayType {};
+
+template<>
+struct ArrayType< Array< real64, 2, MatrixLayout::COL_MAJOR_PERM > >
+{
+  using type = Array< real64, 1 >;
+};
+template<>
+struct ArrayType< Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM > >
+{
+  using type = Array< real64, 1 >;
+};
+template<>
+struct ArrayType< StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::COL_MAJOR_PERM > >
+{
+  using type = StackArray< real64, 1, MAX_SIZE >;
+};
+template<>
+struct ArrayType< StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::ROW_MAJOR_PERM > >
+{
+  using type = StackArray< real64, 1, MAX_SIZE >;
+};
+
+template< typename MatrixType >
+class LinearSolveFixture : public ::testing::Test
+{
+public:
+  using VectorType = typename ArrayType< MatrixType >::type;
+public:
+  LinearSolveFixture() = default;
+  ~LinearSolveFixture() override = default;
+
+  template< TestMatrixType TEST_MATRIX, INDEX_TYPE N >
+  void test_matrix_vector_solve() const
+  {
+    static_assert( 1 < N && N < MAX_SIZE );
+
+    MatrixType A( N, N );
+    TestMatrix< TEST_MATRIX >::create( A.toSlice() );
+
+    VectorType b ( N );
+    VectorType x ( N );
+    VectorType x0 ( N );
+
+    // Save selected values of A to check later
+    real64 const a00 = A( 0, 0 );
+    real64 const a01 = A( 0, 1 );
+    real64 const a10 = A( N-1, N-2 );
+    real64 const a11 = A( N-1, N-1 );
+
+    // Create actual solution
+    LvArray::forValuesInSlice( x0.toSlice(), []( real64 & a ){ a = 1.0; } );
+
+    // Multiply to get rhs
+    matrix_vector_multiply( A.toSliceConst(), x0.toSliceConst(), b.toSlice() );
+
+    // Save selected values of b to check later
+    real64 const b0 = b( 0 );
+    real64 const b1 = b( N-1 );
+
+    // Solve
+    BlasLapackLA::solveLinearSystem( A.toSliceConst(), b.toSliceConst(), x.toSlice() );
+
+    // Check solution
+    for( INDEX_TYPE i = 0; i < N; ++i )
+    {
+      EXPECT_NEAR( x( i ), x0( i ), machinePrecision );
+    }
+
+    // Check that we have not destroyed A
+    EXPECT_NEAR( A( 0, 0 ), a00, machinePrecision );
+    EXPECT_NEAR( A( 0, 1 ), a01, machinePrecision );
+    EXPECT_NEAR( A( N-1, N-2 ), a10, machinePrecision );
+    EXPECT_NEAR( A( N-1, N-1 ), a11, machinePrecision );
+
+    // Check that we have not destroyed b
+    EXPECT_NEAR( b( 0 ), b0, machinePrecision );
+    EXPECT_NEAR( b( N-1 ), b1, machinePrecision );
+  }
+
+  template< TestMatrixType TEST_MATRIX, INDEX_TYPE N >
+  void test_matrix_vector_solve_inplace( ) const
+  {
+    static_assert( 1 < N && N < MAX_SIZE );
+
+    MatrixType A( N, N );
+    TestMatrix< TEST_MATRIX >::create( A.toSlice() );
+
+    VectorType x ( N );
+    VectorType x0 ( N );
+
+    // Create actual solution
+    LvArray::forValuesInSlice( x0.toSlice(), []( real64 & a ){ a = 1.0; } );
+
+    // Multiply to get rhs
+    matrix_vector_multiply( A.toSliceConst(), x0.toSliceConst(), x.toSlice() );
+
+    // Solve
+    BlasLapackLA::solveLinearSystem( A, x.toSlice() );
+
+    // Check in place
+    for( INDEX_TYPE i = 0; i < N; ++i )
+    {
+      EXPECT_NEAR( x( i ), x0( i ), machinePrecision );
+    }
+  }
+
+  template< TestMatrixType TEST_MATRIX, int N, int M >
+  void test_matrix_matrix_solve( ) const
+  {
+    static_assert( 1 < N && N < MAX_SIZE );
+    static_assert( 1 <= M && M < MAX_SIZE );
+
+    MatrixType A ( N, N );
+    TestMatrix< TEST_MATRIX >::create( A.toSlice() );
+    A( 0, 1 ) = 0.3141;
+    std::cout << std::fixed << std::setprecision( 4 );
+    for( int i = 0; i < N; i++ )
+    {
+      for( int j = 0; j < N; j++ )
+      {
+        std::cout << " " << std::setw( 10 ) << A( i, j );
+      }
+      std::cout << "\n";
+    }
+    std::cout << "\n";
+
+    MatrixType B ( N, M );
+    MatrixType X ( N, M );
+    MatrixType X0 ( N, M );
+
+    real64 const a00 = A( 0, 0 );
+    real64 const a01 = A( 0, 1 );
+    real64 const a10 = A( N-1, N-2 );
+    real64 const a11 = A( N-1, N-1 );
+
+    // Create actual solution
+    // Populate matrix with random coefficients
+    BlasLapackLA::matrixRand( X0,
+                              BlasLapackLA::RandomNumberDistribution::UNIFORM_m1p1 );
+    for( int i = 0; i < N; i++ )
+    {
+      for( int j = 0; j < M; j++ )
+      {
+        X0( i, j ) = 0.1*(j+1) + (i+1);
+      }
+    }
+    std::cout << "---------------------------------------\n";
+
+
+    // Multiply to get rhs
+    matrix_matrix_multiply( A.toSliceConst(), X0.toSliceConst(), B.toSlice() );
+
+    // Save selected values of B to check later
+    real64 const b00 = B( 0, 0 );
+    real64 const b01 = B( N-1, M-1 );
+
+    // Solve
+    BlasLapackLA::solveLinearSystem( A.toSliceConst(), B.toSliceConst(), X.toSlice() );
+    for( int i = 0; i < N; i++ )
+    {
+      for( int j = 0; j < M; j++ )
+      {
+        std::cout << " " << std::setw( 10 ) << X0( i, j );
+      }
+      std::cout << "\n";
+    }
+    std::cout << "\n";
+    // Check
+    /**
+       for( INDEX_TYPE i = 0; i < N; ++i )
+       {
+       for( INDEX_TYPE j = 0; j < M; ++j )
+       {
+        EXPECT_NEAR( X( i, j ), X0( i, j ), machinePrecision );
+       }
+       }
+     */
+    // Check that we have not destroyed A
+    EXPECT_NEAR( A( 0, 0 ), a00, machinePrecision );
+    EXPECT_NEAR( A( 0, 1 ), a01, machinePrecision );
+    EXPECT_NEAR( A( N-1, N-2 ), a10, machinePrecision );
+    EXPECT_NEAR( A( N-1, N-1 ), a11, machinePrecision );
+
+    // Check that we have not destroyed b
+    EXPECT_NEAR( B( 0, 0 ), b00, machinePrecision );
+    EXPECT_NEAR( B( N-1, M-1 ), b01, machinePrecision );
+  }
+
+  template< TestMatrixType TEST_MATRIX, int N, int M >
+  void test_matrix_matrix_solve_inplace( ) const
+  {
+    static_assert( 1 < N && N < MAX_SIZE );
+    static_assert( 1 <= M && M < MAX_SIZE );
+
+    MatrixType A ( N, N );
+    TestMatrix< TEST_MATRIX >::create( A.toSlice() );
+
+    MatrixType X ( N, M );
+    MatrixType X0 ( N, M );
+
+    // Create actual solution
+    // Populate matrix with random coefficients
+    BlasLapackLA::matrixRand( X0,
+                              BlasLapackLA::RandomNumberDistribution::UNIFORM_m1p1 );
+
+    // Multiply to get rhs
+    matrix_matrix_multiply( A.toSliceConst(), X0.toSliceConst(), X.toSlice() );
+
+    // Solve
+    BlasLapackLA::solveLinearSystem( A.toSlice(), X.toSlice() );
+
+    // Check
+    for( INDEX_TYPE i = 0; i < N; ++i )
+    {
+      for( INDEX_TYPE j = 0; j < M; ++j )
+      {
+        EXPECT_NEAR( X( i, j ), X0( i, j ), machinePrecision );
+      }
+    }
+  }
+};
+
+/**
+   using LinearSolveTypes = ::testing::Types<
+   Array< real64, 2, MatrixLayout::COL_MAJOR_PERM >,
+   Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM >,
+   StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::COL_MAJOR_PERM >,
+   StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::ROW_MAJOR_PERM > >;*/
+using LinearSolveTypes = ::testing::Types<
+  Array< real64, 2, MatrixLayout::COL_MAJOR_PERM >,
+  Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM > >;
+TYPED_TEST_SUITE( LinearSolveFixture, LinearSolveTypes );
+
+/**
+   TYPED_TEST( LinearSolveFixture, matrix_vector_solve_laplace )
+   {
+   this->template test_matrix_vector_solve< TestMatrixType::LAPLACE, 5 >();
+   this->template test_matrix_vector_solve_inplace< TestMatrixType::LAPLACE, 5 >();
+   this->template test_matrix_vector_solve< TestMatrixType::LAPLACE, 12 >();
+   this->template test_matrix_vector_solve_inplace< TestMatrixType::LAPLACE, 12 >();
+   }
+
+   TYPED_TEST( LinearSolveFixture, matrix_vector_solve_grcar )
+   {
+   this->template test_matrix_vector_solve< TestMatrixType::GRCAR, 6 >();
+   this->template test_matrix_vector_solve_inplace< TestMatrixType::GRCAR, 6 >();
+   this->template test_matrix_vector_solve< TestMatrixType::GRCAR, 10 >();
+   this->template test_matrix_vector_solve_inplace< TestMatrixType::GRCAR, 10 >();
+   }
+
+
+   TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_laplace )
+   {
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
+   this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 3 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
+   }
+
+   TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_grcar )
+   {
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
+   this->template test_matrix_matrix_solve< TestMatrixType::GRCAR, 5, 3 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
+   }
+
+   TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_redheffer )
+   {
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
+   this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 12, 3 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
+   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
+   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
+   }*/
+TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_sampling )
+{
+  //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
+  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::SAMPLING, 12, 3 >();
+  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
+  //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
+  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
+  //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
+  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
+}

From 84da4b05331e8b7880e259144bae1c26dbbce474 Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Tue, 18 Jun 2024 14:34:58 -0500
Subject: [PATCH 3/9] Use temporary space to transpose

---
 .../interfaces/blaslapack/BlasLapackLA.cpp    | 140 ++++----
 .../unitTests/testSolveLinearSystem.cpp       | 323 +++++++++---------
 2 files changed, 215 insertions(+), 248 deletions(-)

diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
index c93113c7687..12081de3fdc 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
@@ -379,20 +379,8 @@ void matrixLeastSquaresSolutionSolve( arraySlice2d< real64, USD > const & A,
   GEOS_ERROR_IF( INFO != 0, "The algorithm computing matrix linear system failed to converge." );
 }
 
-template< int USD, bool INPLACE >
-struct MatrixInPlace
-{
-  using type = arraySlice2d< real64, USD >;
-};
-
-template< int USD >
-struct MatrixInPlace< USD, false >
-{
-  using type = arraySlice2d< real64 const, USD >;
-};
-
-template< bool INPLACE, int USD >
-void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
+template< typename T, int USD >
+void solveLinearSystem( arraySlice2d< T, USD > const & A,
                         arraySlice2d< real64 const, USD > const & B,
                         arraySlice2d< real64, USD > const & X )
 {
@@ -419,7 +407,7 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
 
   real64 * matrixData = nullptr;
   array2d< real64 > LU;   // Space for LU-factors
-  if constexpr ( INPLACE )
+  if constexpr ( !std::is_const< T >::value )
   {
     matrixData = A.dataIfContiguous();
   }
@@ -436,23 +424,13 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
 
   array1d< int > IPIV( N );
   int INFO;
-  char transA[3] = {'N','T','T'};
-  char matA[2] = {'L','U'};
-  char const TRANS = (USD == MatrixLayout::ROW_MAJOR) ? transA[0] : 'N';
+  char const TRANS = (USD == MatrixLayout::ROW_MAJOR) ? 'T' : 'N';
 
   GEOS_dgetrf( &N, &N, matrixData, &N, IPIV.data(), &INFO );
 
   GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrf error code: " << INFO );
 
-  std::cout << "IPIV:";
-  for( integer i = 0; i < N; ++i )
-  {
-    std::cout << " " << IPIV[i];
-  }
-  std::cout << "\n";
-
-  // If INPLACE is false then it means that on entry B == X
-  if constexpr ( !INPLACE )
+  if constexpr ( std::is_const< T >::value )
   {
     int const INCX = 1;
     int const INCY = 1;
@@ -460,61 +438,69 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
     GEOS_dcopy( &K, B.dataIfContiguous(), &INCX, X.dataIfContiguous(), &INCY );
   }
 
-  auto printMatrix = []( auto const & AA )
+  // For row-major form, we need to reorder into col-major form
+  // This might require an extra allocation
+  real64 * solutionData = X.dataIfContiguous();
+  array2d< real64, MatrixLayout::COL_MAJOR_PERM > X0;
+  if constexpr ( USD == MatrixLayout::ROW_MAJOR )
   {
-    int const MA = LvArray::integerConversion< int >( AA.size( 0 ) );
-    int const NA = LvArray::integerConversion< int >( AA.size( 1 ) );
-    std::cout << std::fixed << std::setprecision( 4 );
-    for( int i = 0; i < MA; i++ )
+    if( 1 < M && M == N )
     {
-      for( int j = 0; j < NA; j++ )
+      // Square case: swap in place
+      for( int i = 0; i < N; i++ )
       {
-        std::cout << " " << std::setw( 10 ) << AA( i, j );
+        for( int j = i+1; j < N; j++ )
+        {
+          std::swap( X( i, j ), X( j, i ) );
+        }
       }
-      std::cout << "\n";
     }
-    std::cout << "------------------------------------------------\n";
-  };
-
-  if constexpr (USD == MatrixLayout::ROW_MAJOR)
-  {
-    printMatrix( LU );
-    //int const K1 = 1;
-    //int const K2 = N;
-    //int const INCX = 1;
-    //GEOS_dlaswp( &M, X.dataIfContiguous(), &N, &K1, &K2, IPIV.data(), &INCX );
-
-    printMatrix( X );
-    double const ALPHA = 1.0;
-    GEOS_dtrsm( "R", &matA[0], &transA[1], "N", &N, &M, &ALPHA, matrixData, &N, X.dataIfContiguous(), &N );
-    printMatrix( X );
-    for( int i = N-1; i >= 0; i-- )
+    else if( 1 < M )
     {
-      int j = IPIV[i] - 1;
-      for( int k = 0; k < M; ++k )
+      X0.resize( N, M );
+      for( int i = 0; i < N; i++ )
       {
-        real64 const t = X( i, k );
-        X( i, k ) = X( j, k );
-        X( j, k ) = t;
+        for( int j = 0; j < M; j++ )
+        {
+          X0( i, j ) = X( i, j );
+        }
       }
+      solutionData = X0.data();
     }
-    printMatrix( X );    
-    GEOS_dtrsm( "R", &matA[1], &transA[2], "N", &N, &M, &ALPHA, matrixData, &N, X.dataIfContiguous(), &N );
-    printMatrix( X );
-
-//CALL DLASWP( NRHS, B, LDB, 1, N, IPIV, 1 )
-  }
-  else
-  {
-    GEOS_UNUSED_VAR( printMatrix );
-    GEOS_dgetrs( &TRANS, &N, &M, matrixData, &N, IPIV.data(), X.dataIfContiguous(), &N, &INFO );
   }
 
+  GEOS_dgetrs( &TRANS, &N, &M, matrixData, &N, IPIV.data(), solutionData, &N, &INFO );
+
   GEOS_ASSERT_MSG( INFO == 0, "LAPACK dgetrs error code: " << INFO );
+
+  if constexpr ( USD == MatrixLayout::ROW_MAJOR )
+  {
+    if( 1 < M && M == N )
+    {
+      // Square case: swap in place
+      for( int i = 0; i < N; i++ )
+      {
+        for( int j = i+1; j < N; j++ )
+        {
+          std::swap( X( i, j ), X( j, i ) );
+        }
+      }
+    }
+    else if( 1 < M )
+    {
+      for( int i = 0; i < N; i++ )
+      {
+        for( int j = 0; j < M; j++ )
+        {
+          X( i, j ) = X0( i, j );
+        }
+      }
+    }
+  }
 }
 
-template< bool INPLACE, int USD >
-void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
+template< typename T, int USD >
+void solveLinearSystem( arraySlice2d< T, USD > const & A,
                         arraySlice1d< real64 const > const & b,
                         arraySlice1d< real64 > const & x )
 {
@@ -529,7 +515,7 @@ void solveLinearSystem( typename MatrixInPlace< USD, INPLACE >::type const & A,
   arraySlice2d< real64 const, USD > B( b.dataIfContiguous(), dims, strides );
   arraySlice2d< real64, USD > X( x.dataIfContiguous(), dims, strides );
 
-  solveLinearSystem< INPLACE >( A, B, X );
+  solveLinearSystem( A, B, X );
 }
 
 } // namespace detail
@@ -1042,52 +1028,52 @@ void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 const > const & A,
                                       arraySlice1d< real64 const > const & rhs,
                                       arraySlice1d< real64 > const & solution )
 {
-  detail::solveLinearSystem< false, MatrixLayout::ROW_MAJOR >( A, rhs, solution );
+  detail::solveLinearSystem( A, rhs, solution );
 }
 
 void BlasLapackLA::solveLinearSystem( MatColMajor< real64 const > const & A,
                                       arraySlice1d< real64 const > const & rhs,
                                       arraySlice1d< real64 > const & solution )
 {
-  detail::solveLinearSystem< false, MatrixLayout::COL_MAJOR >( A, rhs, solution );
+  detail::solveLinearSystem( A, rhs, solution );
 }
 
 void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 > const & A,
                                       Vec< real64 > const & rhs )
 {
-  detail::solveLinearSystem< true, MatrixLayout::ROW_MAJOR >( A, rhs.toSliceConst(), rhs );
+  detail::solveLinearSystem( A, rhs.toSliceConst(), rhs );
 }
 
 void BlasLapackLA::solveLinearSystem( MatColMajor< real64 > const & A,
                                       Vec< real64 > const & rhs )
 {
-  detail::solveLinearSystem< true, MatrixLayout::COL_MAJOR >( A, rhs.toSliceConst(), rhs );
+  detail::solveLinearSystem( A, rhs.toSliceConst(), rhs );
 }
 
 void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 const > const & A,
                                       MatRowMajor< real64 const > const & rhs,
                                       MatRowMajor< real64 > const & solution )
 {
-  detail::solveLinearSystem< false, MatrixLayout::ROW_MAJOR >( A, rhs, solution );
+  detail::solveLinearSystem( A, rhs, solution );
 }
 
 void BlasLapackLA::solveLinearSystem( MatColMajor< real64 const > const & A,
                                       MatColMajor< real64 const > const & rhs,
                                       MatColMajor< real64 > const & solution )
 {
-  detail::solveLinearSystem< false, MatrixLayout::COL_MAJOR >( A, rhs, solution );
+  detail::solveLinearSystem( A, rhs, solution );
 }
 
 void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 > const & A,
                                       MatRowMajor< real64 > const & rhs )
 {
-  detail::solveLinearSystem< true, MatrixLayout::ROW_MAJOR >( A, rhs.toSliceConst(), rhs );
+  detail::solveLinearSystem( A, rhs.toSliceConst(), rhs );
 }
 
 void BlasLapackLA::solveLinearSystem( MatColMajor< real64 > const & A,
                                       MatColMajor< real64 > const & rhs )
 {
-  detail::solveLinearSystem< true, MatrixLayout::COL_MAJOR >( A, rhs.toSliceConst(), rhs );
+  detail::solveLinearSystem( A, rhs.toSliceConst(), rhs );
 }
 
 void BlasLapackLA::matrixLeastSquaresSolutionSolve( arraySlice2d< real64 const, MatrixLayout::ROW_MAJOR > const & A,
diff --git a/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
index 9de9984a2fd..94a2726fbc9 100644
--- a/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
+++ b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
@@ -23,9 +23,7 @@
 
 using namespace geos;
 
-using INDEX_TYPE = std::ptrdiff_t;
-
-constexpr INDEX_TYPE MAX_SIZE = 20;
+constexpr int MAX_SIZE = 20;
 constexpr real64 machinePrecision = 1.0e2 * LvArray::NumericLimits< real64 >::epsilon;
 
 // Test matrices
@@ -37,28 +35,6 @@ enum TestMatrixType
   GRCAR
 };
 
-void random_permutation( arraySlice1d< INDEX_TYPE > const & pivot )
-{
-  INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( pivot.size() );
-  GEOS_ASSERT( 2 < N );
-
-  for( INDEX_TYPE j = 0; j < N; j++ )
-  {
-    pivot[j] = j;
-  }
-
-  StackArray< real64, 1, MAX_SIZE > ordering( N );
-  BlasLapackLA::vectorRand( ordering.toSlice() );
-
-  for( INDEX_TYPE j = 0; j < N; j++ )
-  {
-    INDEX_TYPE const i = static_cast< INDEX_TYPE >(ordering[j]*N);
-    INDEX_TYPE const pi = pivot[i];
-    pivot[i] = pivot[j];
-    pivot[j] = pi;
-  }
-}
-
 template< TestMatrixType MATRIX_TYPE >
 struct TestMatrix {};
 
@@ -68,10 +44,10 @@ struct TestMatrix< TestMatrixType::LAPLACE >
   template< int USD >
   static void create( arraySlice2d< real64, USD > const & A )
   {
-    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
-    GEOS_ASSERT( 2 < N );
+    int const N = LvArray::integerConversion< int >( A.size( 0 ) );
+    GEOS_ASSERT( 2 <= N );
     LvArray::forValuesInSlice( A, []( real64 & a ){ a = 0.0; } );
-    for( INDEX_TYPE i = 0; i < N-1; i++ )
+    for( int i = 0; i < N-1; i++ )
     {
       A( i+1, i ) = -1.0;
       A( i+1, i+1 ) = 2.0;
@@ -87,31 +63,27 @@ struct TestMatrix< TestMatrixType::SAMPLING >
   template< int USD >
   static void create( arraySlice2d< real64, USD > const & A )
   {
-    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
-    GEOS_ASSERT( 2 < N );
-    StackArray< INDEX_TYPE, 1, MAX_SIZE > pivot( N );
-    random_permutation( pivot.toSlice() );
+    int const N = LvArray::integerConversion< int >( A.size( 0 ) );
+    GEOS_ASSERT( 2 <= N );
 
     real64 minajj = LvArray::NumericLimits< real64 >::max;
-    for( INDEX_TYPE j = 1; j <= N; j++ )
+    for( int j = 1; j <= N; j++ )
     {
       real64 ajj = 0.0;
-      INDEX_TYPE const pj = pivot[j-1];
-      for( INDEX_TYPE i = 1; i <= N; i++ )
+      for( int i = 1; i <= N; i++ )
       {
         if( i != j )
         {
-          A( i-1, pj ) = 0.0; //static_cast< real64 >(i)/static_cast< real64 >(i-j);
-          ajj += A( i-1, pj );
+          A( i-1, j-1 ) = static_cast< real64 >(i)/static_cast< real64 >(i-j);
+          ajj += A( i-1, j-1 );
         }
       }
-      A( j-1, pj ) = ajj;
+      A( j-1, j-1 ) = ajj;
       minajj = LvArray::math::min( ajj, minajj );
     }
-    for( INDEX_TYPE j = 0; j < N; j++ )
+    for( int j = 0; j < N; j++ )
     {
-      A( j, pivot[j] ) -= minajj;
-      A( j, pivot[j] ) += 1.0;
+      A( j, j ) -= minajj;
     }
   }
 };
@@ -122,11 +94,11 @@ struct TestMatrix< TestMatrixType::REDHEFFER >
   template< int USD >
   static void create( arraySlice2d< real64, USD > const & A )
   {
-    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
-    GEOS_ASSERT( 2 < N );
-    for( INDEX_TYPE i = 1; i <= N; i++ )
+    int const N = LvArray::integerConversion< int >( A.size( 0 ) );
+    GEOS_ASSERT( 2 <= N );
+    for( int i = 1; i <= N; i++ )
     {
-      for( INDEX_TYPE j = 1; j <= N; j++ )
+      for( int j = 1; j <= N; j++ )
       {
         A( i-1, j-1 ) = (j==1) || ((j%i)==0) ? 1.0 : 0.0;
       }
@@ -140,16 +112,16 @@ struct TestMatrix< TestMatrixType::GRCAR >
   template< int USD >
   static void create( arraySlice2d< real64, USD > const & A )
   {
-    INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( A.size( 0 ) );
+    int const N = LvArray::integerConversion< int >( A.size( 0 ) );
     GEOS_ASSERT( 4 < N );
     LvArray::forValuesInSlice( A, []( real64 & a ){ a = 0.0; } );
-    for( INDEX_TYPE i = 0; i < N-1; i++ )
+    for( int i = 0; i < N-1; i++ )
     {
       A( i+1, i ) = -1.0;
     }
-    for( INDEX_TYPE c = 0; c < 4; c++ )
+    for( int c = 0; c < 4; c++ )
     {
-      for( INDEX_TYPE i = 0; i < N-c; i++ )
+      for( int i = 0; i < N-c; i++ )
       {
         A( i, i+c ) = 1.0;
       }
@@ -157,6 +129,29 @@ struct TestMatrix< TestMatrixType::GRCAR >
   }
 };
 
+// Randomly reorder a matrix
+template< int USD >
+void random_permutation( arraySlice2d< real64, USD > const & A )
+{
+  int const N = LvArray::integerConversion< int >( A.size( 0 ) );
+  GEOS_ASSERT( 2 <= N );
+
+  StackArray< real64, 1, MAX_SIZE > ordering( N );
+  BlasLapackLA::vectorRand( ordering.toSlice() );
+
+  for( int j = 0; j < N; j++ )
+  {
+    int const i = static_cast< int >(ordering[j]*N);
+    if( i != j )
+    {
+      for( int k = 0; k < N; ++k )
+      {
+        std::swap( A( i, k ), A( j, k ));
+      }
+    }
+  }
+}
+
 // Local naive matrix-vector multiply
 template< int USD >
 void matrix_vector_multiply( arraySlice2d< real64 const, USD > const & A,
@@ -164,10 +159,10 @@ void matrix_vector_multiply( arraySlice2d< real64 const, USD > const & A,
                              arraySlice1d< real64 > const & b )
 {
   int const N = LvArray::integerConversion< int >( A.size( 0 ) );
-  for( INDEX_TYPE i = 0; i < N; ++i )
+  for( int i = 0; i < N; ++i )
   {
     real64 bi = 0.0;
-    for( INDEX_TYPE j = 0; j < N; ++j )
+    for( int j = 0; j < N; ++j )
     {
       bi += A( i, j )*x( j );
     }
@@ -181,15 +176,15 @@ void matrix_matrix_multiply( arraySlice2d< real64 const, USD > const & A,
                              arraySlice2d< real64 const, USD > const & X,
                              arraySlice2d< real64, USD > const & B )
 {
-  INDEX_TYPE const K = LvArray::integerConversion< INDEX_TYPE >( A.size( 1 ) );
-  INDEX_TYPE const M = LvArray::integerConversion< INDEX_TYPE >( X.size( 0 ) );
-  INDEX_TYPE const N = LvArray::integerConversion< INDEX_TYPE >( X.size( 1 ) );
-  for( INDEX_TYPE i = 0; i < M; ++i )
+  int const K = LvArray::integerConversion< int >( A.size( 1 ) );
+  int const M = LvArray::integerConversion< int >( X.size( 0 ) );
+  int const N = LvArray::integerConversion< int >( X.size( 1 ) );
+  for( int i = 0; i < M; ++i )
   {
-    for( INDEX_TYPE j = 0; j < N; ++j )
+    for( int j = 0; j < N; ++j )
     {
       real64 bij = 0.0;
-      for( INDEX_TYPE k = 0; k < K; ++k )
+      for( int k = 0; k < K; ++k )
       {
         bij += A( i, k )*X( k, j );
       }
@@ -231,13 +226,14 @@ class LinearSolveFixture : public ::testing::Test
   LinearSolveFixture() = default;
   ~LinearSolveFixture() override = default;
 
-  template< TestMatrixType TEST_MATRIX, INDEX_TYPE N >
+  template< TestMatrixType TEST_MATRIX, int N >
   void test_matrix_vector_solve() const
   {
-    static_assert( 1 < N && N < MAX_SIZE );
+    static_assert( 1 < N && N <= MAX_SIZE );
 
     MatrixType A( N, N );
     TestMatrix< TEST_MATRIX >::create( A.toSlice() );
+    random_permutation( A.toSlice() );
 
     VectorType b ( N );
     VectorType x ( N );
@@ -263,7 +259,7 @@ class LinearSolveFixture : public ::testing::Test
     BlasLapackLA::solveLinearSystem( A.toSliceConst(), b.toSliceConst(), x.toSlice() );
 
     // Check solution
-    for( INDEX_TYPE i = 0; i < N; ++i )
+    for( int i = 0; i < N; ++i )
     {
       EXPECT_NEAR( x( i ), x0( i ), machinePrecision );
     }
@@ -279,13 +275,14 @@ class LinearSolveFixture : public ::testing::Test
     EXPECT_NEAR( b( N-1 ), b1, machinePrecision );
   }
 
-  template< TestMatrixType TEST_MATRIX, INDEX_TYPE N >
+  template< TestMatrixType TEST_MATRIX, int N >
   void test_matrix_vector_solve_inplace( ) const
   {
-    static_assert( 1 < N && N < MAX_SIZE );
+    static_assert( 1 < N && N <= MAX_SIZE );
 
     MatrixType A( N, N );
     TestMatrix< TEST_MATRIX >::create( A.toSlice() );
+    random_permutation( A.toSlice() );
 
     VectorType x ( N );
     VectorType x0 ( N );
@@ -300,7 +297,7 @@ class LinearSolveFixture : public ::testing::Test
     BlasLapackLA::solveLinearSystem( A, x.toSlice() );
 
     // Check in place
-    for( INDEX_TYPE i = 0; i < N; ++i )
+    for( int i = 0; i < N; ++i )
     {
       EXPECT_NEAR( x( i ), x0( i ), machinePrecision );
     }
@@ -309,22 +306,12 @@ class LinearSolveFixture : public ::testing::Test
   template< TestMatrixType TEST_MATRIX, int N, int M >
   void test_matrix_matrix_solve( ) const
   {
-    static_assert( 1 < N && N < MAX_SIZE );
-    static_assert( 1 <= M && M < MAX_SIZE );
+    static_assert( 1 < N && N <= MAX_SIZE );
+    static_assert( 1 <= M && M <= MAX_SIZE );
 
     MatrixType A ( N, N );
     TestMatrix< TEST_MATRIX >::create( A.toSlice() );
-    A( 0, 1 ) = 0.3141;
-    std::cout << std::fixed << std::setprecision( 4 );
-    for( int i = 0; i < N; i++ )
-    {
-      for( int j = 0; j < N; j++ )
-      {
-        std::cout << " " << std::setw( 10 ) << A( i, j );
-      }
-      std::cout << "\n";
-    }
-    std::cout << "\n";
+    random_permutation( A.toSlice() );
 
     MatrixType B ( N, M );
     MatrixType X ( N, M );
@@ -339,15 +326,6 @@ class LinearSolveFixture : public ::testing::Test
     // Populate matrix with random coefficients
     BlasLapackLA::matrixRand( X0,
                               BlasLapackLA::RandomNumberDistribution::UNIFORM_m1p1 );
-    for( int i = 0; i < N; i++ )
-    {
-      for( int j = 0; j < M; j++ )
-      {
-        X0( i, j ) = 0.1*(j+1) + (i+1);
-      }
-    }
-    std::cout << "---------------------------------------\n";
-
 
     // Multiply to get rhs
     matrix_matrix_multiply( A.toSliceConst(), X0.toSliceConst(), B.toSlice() );
@@ -358,25 +336,16 @@ class LinearSolveFixture : public ::testing::Test
 
     // Solve
     BlasLapackLA::solveLinearSystem( A.toSliceConst(), B.toSliceConst(), X.toSlice() );
-    for( int i = 0; i < N; i++ )
+
+    // Check
+    for( int i = 0; i < N; ++i )
     {
-      for( int j = 0; j < M; j++ )
+      for( int j = 0; j < M; ++j )
       {
-        std::cout << " " << std::setw( 10 ) << X0( i, j );
+        EXPECT_NEAR( X( i, j ), X0( i, j ), machinePrecision );
       }
-      std::cout << "\n";
     }
-    std::cout << "\n";
-    // Check
-    /**
-       for( INDEX_TYPE i = 0; i < N; ++i )
-       {
-       for( INDEX_TYPE j = 0; j < M; ++j )
-       {
-        EXPECT_NEAR( X( i, j ), X0( i, j ), machinePrecision );
-       }
-       }
-     */
+
     // Check that we have not destroyed A
     EXPECT_NEAR( A( 0, 0 ), a00, machinePrecision );
     EXPECT_NEAR( A( 0, 1 ), a01, machinePrecision );
@@ -391,11 +360,12 @@ class LinearSolveFixture : public ::testing::Test
   template< TestMatrixType TEST_MATRIX, int N, int M >
   void test_matrix_matrix_solve_inplace( ) const
   {
-    static_assert( 1 < N && N < MAX_SIZE );
-    static_assert( 1 <= M && M < MAX_SIZE );
+    static_assert( 1 < N && N <= MAX_SIZE );
+    static_assert( 1 <= M && M <= MAX_SIZE );
 
     MatrixType A ( N, N );
     TestMatrix< TEST_MATRIX >::create( A.toSlice() );
+    random_permutation( A.toSlice() );
 
     MatrixType X ( N, M );
     MatrixType X0 ( N, M );
@@ -412,9 +382,9 @@ class LinearSolveFixture : public ::testing::Test
     BlasLapackLA::solveLinearSystem( A.toSlice(), X.toSlice() );
 
     // Check
-    for( INDEX_TYPE i = 0; i < N; ++i )
+    for( int i = 0; i < N; ++i )
     {
-      for( INDEX_TYPE j = 0; j < M; ++j )
+      for( int j = 0; j < M; ++j )
       {
         EXPECT_NEAR( X( i, j ), X0( i, j ), machinePrecision );
       }
@@ -422,78 +392,89 @@ class LinearSolveFixture : public ::testing::Test
   }
 };
 
-/**
-   using LinearSolveTypes = ::testing::Types<
-   Array< real64, 2, MatrixLayout::COL_MAJOR_PERM >,
-   Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM >,
-   StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::COL_MAJOR_PERM >,
-   StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::ROW_MAJOR_PERM > >;*/
 using LinearSolveTypes = ::testing::Types<
   Array< real64, 2, MatrixLayout::COL_MAJOR_PERM >,
-  Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM > >;
+  Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM >,
+  StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::COL_MAJOR_PERM >,
+  StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::ROW_MAJOR_PERM > >;
 TYPED_TEST_SUITE( LinearSolveFixture, LinearSolveTypes );
 
-/**
-   TYPED_TEST( LinearSolveFixture, matrix_vector_solve_laplace )
-   {
-   this->template test_matrix_vector_solve< TestMatrixType::LAPLACE, 5 >();
-   this->template test_matrix_vector_solve_inplace< TestMatrixType::LAPLACE, 5 >();
-   this->template test_matrix_vector_solve< TestMatrixType::LAPLACE, 12 >();
-   this->template test_matrix_vector_solve_inplace< TestMatrixType::LAPLACE, 12 >();
-   }
-
-   TYPED_TEST( LinearSolveFixture, matrix_vector_solve_grcar )
-   {
-   this->template test_matrix_vector_solve< TestMatrixType::GRCAR, 6 >();
-   this->template test_matrix_vector_solve_inplace< TestMatrixType::GRCAR, 6 >();
-   this->template test_matrix_vector_solve< TestMatrixType::GRCAR, 10 >();
-   this->template test_matrix_vector_solve_inplace< TestMatrixType::GRCAR, 10 >();
-   }
-
-
-   TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_laplace )
-   {
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
-   this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 3 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
-   }
-
-   TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_grcar )
-   {
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
-   this->template test_matrix_matrix_solve< TestMatrixType::GRCAR, 5, 3 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
-   }
-
-   TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_redheffer )
-   {
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
-   this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 12, 3 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
-   //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
-   //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
-   }*/
+TYPED_TEST( LinearSolveFixture, matrix_vector_solve_laplace )
+{
+  this->template test_matrix_vector_solve< TestMatrixType::LAPLACE, 5 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::LAPLACE, 5 >();
+  this->template test_matrix_vector_solve< TestMatrixType::LAPLACE, 12 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::LAPLACE, 12 >();
+}
+
+TYPED_TEST( LinearSolveFixture, matrix_vector_solve_grcar )
+{
+  this->template test_matrix_vector_solve< TestMatrixType::GRCAR, 6 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::GRCAR, 6 >();
+  this->template test_matrix_vector_solve< TestMatrixType::GRCAR, 10 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::GRCAR, 10 >();
+}
+
+TYPED_TEST( LinearSolveFixture, matrix_vector_solve_redheffer )
+{
+  this->template test_matrix_vector_solve< TestMatrixType::REDHEFFER, 2 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::REDHEFFER, 2 >();
+  this->template test_matrix_vector_solve< TestMatrixType::REDHEFFER, 8 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::REDHEFFER, 8 >();
+}
+
+TYPED_TEST( LinearSolveFixture, matrix_vector_solve_sampling )
+{
+  this->template test_matrix_vector_solve< TestMatrixType::SAMPLING, 3 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::SAMPLING, 3 >();
+  this->template test_matrix_vector_solve< TestMatrixType::SAMPLING, 20 >();
+  this->template test_matrix_vector_solve_inplace< TestMatrixType::SAMPLING, 20 >();
+}
+
+TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_laplace )
+{
+  this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 3 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
+}
+
+TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_grcar )
+{
+  this->template test_matrix_matrix_solve< TestMatrixType::GRCAR, 5, 1 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::GRCAR, 5, 1 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::GRCAR, 5, 3 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::GRCAR, 5, 3 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::GRCAR, 5, 5 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::GRCAR, 5, 5 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::GRCAR, 5, 12 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::GRCAR, 5, 12 >();
+}
+
+TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_redheffer )
+{
+  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 5, 1 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 1 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 12, 3 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 3 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 5, 5 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 5 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 5, 12 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 12 >();
+}
+
 TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_sampling )
 {
-  //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 1 >();
-  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 1 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::SAMPLING, 5, 1 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::SAMPLING, 5, 1 >();
   this->template test_matrix_matrix_solve< TestMatrixType::SAMPLING, 12, 3 >();
-  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 3 >();
-  //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 5 >();
-  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 5 >();
-  //this->template test_matrix_matrix_solve< TestMatrixType::LAPLACE, 5, 12 >();
-  //this->template test_matrix_matrix_solve_inplace< TestMatrixType::LAPLACE, 5, 12 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::SAMPLING, 5, 3 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::SAMPLING, 5, 5 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::SAMPLING, 5, 5 >();
+  this->template test_matrix_matrix_solve< TestMatrixType::SAMPLING, 5, 12 >();
+  this->template test_matrix_matrix_solve_inplace< TestMatrixType::SAMPLING, 5, 12 >();
 }

From 0b47c5a8fca077e08a4855417cdfb55871cfad29 Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Tue, 18 Jun 2024 14:46:11 -0500
Subject: [PATCH 4/9] Undo addition of BLAS function

---
 .../interfaces/blaslapack/BlasLapackFunctions.h     | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h
index 706b805da05..fe66dcfc2ce 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackFunctions.h
@@ -175,19 +175,6 @@ void GEOS_dgels( char const * TRANS,
                  int const * LWORK,
                  int * INFO );
 
-#define GEOS_dtrsm FORTRAN_MANGLE( dtrsm )
-void GEOS_dtrsm( char const * SIDE,
-char const * UPLO,
-char const * TRANSA,
-char const * DIAG,
-int const * M,
-                     int const * N,
-                     double const * ALPHA,
-                 double * A,
-                 int const * LDA,
-                 double * B,
-                 int const * LDB );
-
 }
 #endif
 

From 1a4273ef87c76ab615bcca9e5c69af33631b428d Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Tue, 18 Jun 2024 15:03:56 -0500
Subject: [PATCH 5/9] arraySlice1d -> Vec

---
 .../interfaces/blaslapack/BlasLapackLA.cpp    |  8 ++--
 .../interfaces/blaslapack/BlasLapackLA.hpp    | 44 +++++++++----------
 2 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
index 12081de3fdc..3ec9098e90e 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
@@ -1025,15 +1025,15 @@ void BlasLapackLA::matrixEigenvalues( MatRowMajor< real64 const > const & A,
 }
 
 void BlasLapackLA::solveLinearSystem( MatRowMajor< real64 const > const & A,
-                                      arraySlice1d< real64 const > const & rhs,
-                                      arraySlice1d< real64 > const & solution )
+                                      Vec< real64 const > const & rhs,
+                                      Vec< real64 > const & solution )
 {
   detail::solveLinearSystem( A, rhs, solution );
 }
 
 void BlasLapackLA::solveLinearSystem( MatColMajor< real64 const > const & A,
-                                      arraySlice1d< real64 const > const & rhs,
-                                      arraySlice1d< real64 > const & solution )
+                                      Vec< real64 const > const & rhs,
+                                      Vec< real64 > const & solution )
 {
   detail::solveLinearSystem( A, rhs, solution );
 }
diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
index 63bd7fad626..c910be4388f 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
@@ -454,16 +454,14 @@ struct BlasLapackLA
    * @param [in]  rhs GEOSX array1d.
    * @param [out] solution GEOSX array1d.
    */
-  static void solveLinearSystem( MatRowMajor< real64 const > const & A,
+  static void solveLinearSystem( MatColMajor< real64 const > const & A,
                                  Vec< real64 const > const & rhs,
                                  Vec< real64 > const & solution );
 
   /**
-   * @copydoc solveLinearSystem( MatRowMajor<real64 const> const &, Vec< real64 const > const &, Vec< real64 const > const & )
-   *
-   * @note this function first applies a matrix permutation and then calls the row major version of the function.
+   * @copydoc solveLinearSystem( MatColMajor<real64 const> const &, Vec< real64 const > const &, Vec< real64 const > const & )
    */
-  static void solveLinearSystem( MatColMajor< real64 const > const & A,
+  static void solveLinearSystem( MatRowMajor< real64 const > const & A,
                                  Vec< real64 const > const & rhs,
                                  Vec< real64 > const & solution );
 
@@ -480,15 +478,13 @@ struct BlasLapackLA
    * @param [in/out]  A GEOSX array2d. The matrix. On exit this will be replaced by the factorisation of A
    * @param [in/out]  rhs GEOSX array1d. The right hand side. On exit this will be the solution
    */
-  static void solveLinearSystem( MatRowMajor< real64 > const & A,
+  static void solveLinearSystem( MatColMajor< real64 > const & A,
                                  Vec< real64 > const & rhs );
 
   /**
-   * @copydoc solveLinearSystem( MatRowMajor< real64 > const &, Vec< real64 > const & )
-   *
-   * @note this function first applies a matrix permutation and then calls the row major version of the function.
+   * @copydoc solveLinearSystem( MatColMajor< real64 > const &, Vec< real64 > const & )
    */
-  static void solveLinearSystem( MatColMajor< real64 > const & A,
+  static void solveLinearSystem( MatRowMajor< real64 > const & A,
                                  Vec< real64 > const & rhs );
 
   /**
@@ -503,18 +499,18 @@ struct BlasLapackLA
    * @param [in]  rhs GEOSX array2d.
    * @param [out] solution GEOSX array2d.
    */
-  static void solveLinearSystem( MatRowMajor< real64 const > const & A,
-                                 MatRowMajor< real64 const > const & rhs,
-                                 MatRowMajor< real64 > const & solution );
+  static void solveLinearSystem( MatColMajor< real64 const > const & A,
+                                 MatColMajor< real64 const > const & rhs,
+                                 MatColMajor< real64 > const & solution );
 
   /**
-   * @copydoc solveLinearSystem( MatRowMajor< real64 const > const &, MatRowMajor< real64 const > const &, MatRowMajor< const > const & )
+   * @copydoc solveLinearSystem( MatColMajor< real64 const > const &, MatColMajor< real64 const > const &, MatColMajor< const > const & )
    *
-   * @note this function first applies a matrix permutation and then calls the row major version of the function.
+   * @note this function will allocate space to reorder the solution into column major form.
    */
-  static void solveLinearSystem( MatColMajor< real64 const > const & A,
-                                 MatColMajor< real64 const > const & rhs,
-                                 MatColMajor< real64 > const & solution );
+  static void solveLinearSystem( MatRowMajor< real64 const > const & A,
+                                 MatRowMajor< real64 const > const & rhs,
+                                 MatRowMajor< real64 > const & solution );
 
   /**
    * @brief Solves the linear system ;
@@ -530,16 +526,16 @@ struct BlasLapackLA
    * @param [in/out]  A GEOSX array2d. The matrix. On exit this will be replaced by the factorisation of A
    * @param [in/out]  rhs GEOSX array1d. The right hand side. On exit this will be the solution
    */
-  static void solveLinearSystem( MatRowMajor< real64 > const & A,
-                                 MatRowMajor< real64 > const & rhs );
+  static void solveLinearSystem( MatColMajor< real64 > const & A,
+                                 MatColMajor< real64 > const & rhs );
 
   /**
-   * @copydoc solveLinearSystem( MatRowMajor< real64 > const &, MatRowMajor< real64 > const & )
+   * @copydoc solveLinearSystem( MatColMajor< real64 > const &, MatRowMajor< real64 > const & )
    *
-   * @note this function first applies a matrix permutation and then calls the row major version of the function.
+   * @note this function will allocate space to reorder the solution into column major form.
    */
-  static void solveLinearSystem( MatColMajor< real64 > const & A,
-                                 MatColMajor< real64 > const & rhs );
+  static void solveLinearSystem( MatRowMajor< real64 > const & A,
+                                 MatRowMajor< real64 > const & rhs );
 
   /**
    * @brief Vector copy;

From 0ebf04f7ca0cf0a82bf4d7ba2b2b974dc264f128 Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Tue, 18 Jun 2024 15:14:10 -0500
Subject: [PATCH 6/9] Remove Redheffer matrix. It's singular!

---
 .../unitTests/testSolveLinearSystem.cpp       | 39 -------------------
 1 file changed, 39 deletions(-)

diff --git a/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
index 94a2726fbc9..f962723f2ca 100644
--- a/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
+++ b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
@@ -31,7 +31,6 @@ enum TestMatrixType
 {
   LAPLACE,
   SAMPLING,
-  REDHEFFER,
   GRCAR
 };
 
@@ -88,24 +87,6 @@ struct TestMatrix< TestMatrixType::SAMPLING >
   }
 };
 
-template<>
-struct TestMatrix< TestMatrixType::REDHEFFER >
-{
-  template< int USD >
-  static void create( arraySlice2d< real64, USD > const & A )
-  {
-    int const N = LvArray::integerConversion< int >( A.size( 0 ) );
-    GEOS_ASSERT( 2 <= N );
-    for( int i = 1; i <= N; i++ )
-    {
-      for( int j = 1; j <= N; j++ )
-      {
-        A( i-1, j-1 ) = (j==1) || ((j%i)==0) ? 1.0 : 0.0;
-      }
-    }
-  }
-};
-
 template<>
 struct TestMatrix< TestMatrixType::GRCAR >
 {
@@ -415,14 +396,6 @@ TYPED_TEST( LinearSolveFixture, matrix_vector_solve_grcar )
   this->template test_matrix_vector_solve_inplace< TestMatrixType::GRCAR, 10 >();
 }
 
-TYPED_TEST( LinearSolveFixture, matrix_vector_solve_redheffer )
-{
-  this->template test_matrix_vector_solve< TestMatrixType::REDHEFFER, 2 >();
-  this->template test_matrix_vector_solve_inplace< TestMatrixType::REDHEFFER, 2 >();
-  this->template test_matrix_vector_solve< TestMatrixType::REDHEFFER, 8 >();
-  this->template test_matrix_vector_solve_inplace< TestMatrixType::REDHEFFER, 8 >();
-}
-
 TYPED_TEST( LinearSolveFixture, matrix_vector_solve_sampling )
 {
   this->template test_matrix_vector_solve< TestMatrixType::SAMPLING, 3 >();
@@ -455,18 +428,6 @@ TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_grcar )
   this->template test_matrix_matrix_solve_inplace< TestMatrixType::GRCAR, 5, 12 >();
 }
 
-TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_redheffer )
-{
-  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 5, 1 >();
-  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 1 >();
-  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 12, 3 >();
-  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 3 >();
-  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 5, 5 >();
-  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 5 >();
-  this->template test_matrix_matrix_solve< TestMatrixType::REDHEFFER, 5, 12 >();
-  this->template test_matrix_matrix_solve_inplace< TestMatrixType::REDHEFFER, 5, 12 >();
-}
-
 TYPED_TEST( LinearSolveFixture, matrix_matrix_solve_sampling )
 {
   this->template test_matrix_matrix_solve< TestMatrixType::SAMPLING, 5, 1 >();

From c73a82ff02110f836486c31438ab792c83498dd9 Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Tue, 18 Jun 2024 15:53:50 -0500
Subject: [PATCH 7/9] Add name generator. Clang is insisting.

---
 .../unitTests/testSolveLinearSystem.cpp          | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
index f962723f2ca..48cf7b65dc8 100644
--- a/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
+++ b/src/coreComponents/denseLinearAlgebra/unitTests/testSolveLinearSystem.cpp
@@ -378,7 +378,21 @@ using LinearSolveTypes = ::testing::Types<
   Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM >,
   StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::COL_MAJOR_PERM >,
   StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::ROW_MAJOR_PERM > >;
-TYPED_TEST_SUITE( LinearSolveFixture, LinearSolveTypes );
+
+class NameGenerator
+{
+public:
+  template< typename T >
+  static std::string GetName( int )
+  {
+    if constexpr (std::is_same_v< T, Array< real64, 2, MatrixLayout::COL_MAJOR_PERM > >) return "col-major-heap-array";
+    if constexpr (std::is_same_v< T, Array< real64, 2, MatrixLayout::ROW_MAJOR_PERM > >) return "row-major-heap-array";
+    if constexpr (std::is_same_v< T, StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::COL_MAJOR_PERM > >) return "col-major-stack-array";
+    if constexpr (std::is_same_v< T, StackArray< real64, 2, MAX_SIZE *MAX_SIZE, MatrixLayout::ROW_MAJOR_PERM > >) return "row-major-stack-array";
+  }
+};
+
+TYPED_TEST_SUITE( LinearSolveFixture, LinearSolveTypes, NameGenerator );
 
 TYPED_TEST( LinearSolveFixture, matrix_vector_solve_laplace )
 {

From 6f5054ce99c3098239b5a4fb2626e8cafa996c46 Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Tue, 25 Jun 2024 11:18:38 -0500
Subject: [PATCH 8/9] Add helper functions

---
 .../interfaces/blaslapack/BlasLapackLA.cpp    | 62 ++++++++++---------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
index 3ec9098e90e..541a802861b 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.cpp
@@ -379,6 +379,36 @@ void matrixLeastSquaresSolutionSolve( arraySlice2d< real64, USD > const & A,
   GEOS_ERROR_IF( INFO != 0, "The algorithm computing matrix linear system failed to converge." );
 }
 
+template< int USD1, int USD2 >
+GEOS_FORCE_INLINE
+void matrixCopy( int const N,
+                 int const M,
+                 arraySlice2d< real64, USD1 > const & A,
+                 arraySlice2d< real64, USD2 > const & B )
+{
+  for( int i = 0; i < N; i++ )
+  {
+    for( int j = 0; j < M; j++ )
+    {
+      B( i, j ) = A( i, j );
+    }
+  }
+}
+
+template< int USD >
+GEOS_FORCE_INLINE
+void matrixTranspose( int const N,
+                      arraySlice2d< real64, USD > const & A )
+{
+  for( int i = 0; i < N; i++ )
+  {
+    for( int j = i+1; j < N; j++ )
+    {
+      std::swap( A( i, j ), A( j, i ) );
+    }
+  }
+}
+
 template< typename T, int USD >
 void solveLinearSystem( arraySlice2d< T, USD > const & A,
                         arraySlice2d< real64 const, USD > const & B,
@@ -447,24 +477,12 @@ void solveLinearSystem( arraySlice2d< T, USD > const & A,
     if( 1 < M && M == N )
     {
       // Square case: swap in place
-      for( int i = 0; i < N; i++ )
-      {
-        for( int j = i+1; j < N; j++ )
-        {
-          std::swap( X( i, j ), X( j, i ) );
-        }
-      }
+      matrixTranspose( N, X );
     }
     else if( 1 < M )
     {
       X0.resize( N, M );
-      for( int i = 0; i < N; i++ )
-      {
-        for( int j = 0; j < M; j++ )
-        {
-          X0( i, j ) = X( i, j );
-        }
-      }
+      matrixCopy( N, M, X, X0.toSlice() );
       solutionData = X0.data();
     }
   }
@@ -478,23 +496,11 @@ void solveLinearSystem( arraySlice2d< T, USD > const & A,
     if( 1 < M && M == N )
     {
       // Square case: swap in place
-      for( int i = 0; i < N; i++ )
-      {
-        for( int j = i+1; j < N; j++ )
-        {
-          std::swap( X( i, j ), X( j, i ) );
-        }
-      }
+      matrixTranspose( N, X );
     }
     else if( 1 < M )
     {
-      for( int i = 0; i < N; i++ )
-      {
-        for( int j = 0; j < M; j++ )
-        {
-          X( i, j ) = X0( i, j );
-        }
-      }
+      matrixCopy( N, M, X0.toSlice(), X );
     }
   }
 }

From 4ec9daea777d0976c0e5c5d9ed1b4eda20b4597b Mon Sep 17 00:00:00 2001
From: dkachuma <dickkachuma@googlemail.com>
Date: Wed, 26 Jun 2024 10:56:14 -0500
Subject: [PATCH 9/9] Add warning about GPU use

---
 .../denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
index c910be4388f..488ed59a9b9 100644
--- a/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
+++ b/src/coreComponents/denseLinearAlgebra/interfaces/blaslapack/BlasLapackLA.hpp
@@ -30,6 +30,7 @@ namespace geos
  * \class BlasLapackLA
  * \brief This class contains a collection of BLAS and LAPACK linear
  *        algebra operations for GEOSX array1d and array2d
+ * \warning These methods are currently not supported on GPUs
  */
 struct BlasLapackLA
 {