clang format

2025-02-25 18:55:30 -06:00 · 2024-06-18 11:42:00 +02:00 · 2024-06-18 11:42:00 +02:00 · 82ff782d5f
commit 82ff782d5f
parent 2b9c81fe09
3 changed files with 261 additions and 223 deletions
--- a/opm/simulators/linalg/cuistl/CuDILU.cpp
+++ b/opm/simulators/linalg/cuistl/CuDILU.cpp
@ -25,9 +25,9 @@
 #include <opm/simulators/linalg/cuistl/CuDILU.hpp>
 #include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
+#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
 #include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
-#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <opm/simulators/linalg/matrixblock.hh>
 #include <vector>

@ -65,7 +65,9 @@ createNaturalToReordered(Opm::SparseTable<size_t> levelSets)

 template <class M, class field_type, class GPUM>
 void
-createReorderedMatrix(const M& naturalMatrix, std::vector<int> reorderedToNatural, std::unique_ptr<GPUM>& reorderedGpuMat)
+createReorderedMatrix(const M& naturalMatrix,
+                      std::vector<int> reorderedToNatural,
+                      std::unique_ptr<GPUM>& reorderedGpuMat)
 {
    M reorderedMatrix(naturalMatrix.N(), naturalMatrix.N(), naturalMatrix.nonzeroes(), M::row_wise);
    for (auto dstRowIt = reorderedMatrix.createbegin(); dstRowIt != reorderedMatrix.createend(); ++dstRowIt) {
@ -81,22 +83,26 @@ createReorderedMatrix(const M& naturalMatrix, std::vector<int> reorderedToNatura

 template <class M, class field_type, class GPUM>
 void
-extractLowerAndUpperMatrices(const M& naturalMatrix, std::vector<int> reorderedToNatural, std::unique_ptr<GPUM>& lower, std::unique_ptr<GPUM>& upper)
+extractLowerAndUpperMatrices(const M& naturalMatrix,
+                             std::vector<int> reorderedToNatural,
+                             std::unique_ptr<GPUM>& lower,
+                             std::unique_ptr<GPUM>& upper)
 {
    const size_t new_nnz = (naturalMatrix.nonzeroes() - naturalMatrix.N()) / 2;

    M reorderedLower(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise);
    M reorderedUpper(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise);

-    for (auto lowerIt = reorderedLower.createbegin(), upperIt = reorderedUpper.createbegin(); lowerIt != reorderedLower.createend(); ++lowerIt, ++upperIt) {
+    for (auto lowerIt = reorderedLower.createbegin(), upperIt = reorderedUpper.createbegin();
+         lowerIt != reorderedLower.createend();
+         ++lowerIt, ++upperIt) {

        auto srcRow = naturalMatrix.begin() + reorderedToNatural[lowerIt.index()];

        for (auto elem = srcRow->begin(); elem != srcRow->end(); ++elem) {
            if (elem.index() < srcRow.index()) { // add index to lower matrix if under the diagonal
                lowerIt.insert(elem.index());
-            }
-            else if (elem.index() > srcRow.index()){ // add element to upper matrix if above the diagonal
+            } else if (elem.index() > srcRow.index()) { // add element to upper matrix if above the diagonal
                upperIt.insert(elem.index());
            }
        }
@ -146,10 +152,11 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool split_matrix)
                             A.nonzeroes()));
    if (m_split_matrix) {
        m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
-        extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper);
-    }
-    else{
-        createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReordered);
+        extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(
+            m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper);
+    } else {
+        createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
+            m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReordered);
    }
    computeDiagAndMoveReorderedData();
 }
@ -171,7 +178,8 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
        for (int level = 0; level < m_levelSets.size(); ++level) {
            const int numOfRowsInLevel = m_levelSets[level].size();
            if (m_split_matrix) {
-                detail::computeLowerSolveLevelSetSplit<field_type, blocksize_>(m_gpuMatrixReorderedLower->getNonZeroValues().data(),
+                detail::computeLowerSolveLevelSetSplit<field_type, blocksize_>(
+                    m_gpuMatrixReorderedLower->getNonZeroValues().data(),
                    m_gpuMatrixReorderedLower->getRowIndices().data(),
                    m_gpuMatrixReorderedLower->getColumnIndices().data(),
                    m_gpuReorderToNatural.data(),
@ -180,9 +188,9 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
                    m_gpuDInv.data(),
                    d.data(),
                    v.data());
-            }
-            else{
-                detail::computeLowerSolveLevelSet<field_type, blocksize_>(m_gpuMatrixReordered->getNonZeroValues().data(),
+            } else {
+                detail::computeLowerSolveLevelSet<field_type, blocksize_>(
+                    m_gpuMatrixReordered->getNonZeroValues().data(),
                    m_gpuMatrixReordered->getRowIndices().data(),
                    m_gpuMatrixReordered->getColumnIndices().data(),
                    m_gpuReorderToNatural.data(),
@ -201,7 +209,8 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
            const int numOfRowsInLevel = m_levelSets[level].size();
            levelStartIdx -= numOfRowsInLevel;
            if (m_split_matrix) {
-                detail::computeUpperSolveLevelSetSplit<field_type, blocksize_>(m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
+                detail::computeUpperSolveLevelSetSplit<field_type, blocksize_>(
+                    m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
                    m_gpuMatrixReorderedUpper->getRowIndices().data(),
                    m_gpuMatrixReorderedUpper->getColumnIndices().data(),
                    m_gpuReorderToNatural.data(),
@ -209,9 +218,9 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
                    numOfRowsInLevel,
                    m_gpuDInv.data(),
                    v.data());
-            }
-            else{
-                detail::computeUpperSolveLevelSet<field_type, blocksize_>(m_gpuMatrixReordered->getNonZeroValues().data(),
+            } else {
+                detail::computeUpperSolveLevelSet<field_type, blocksize_>(
+                    m_gpuMatrixReordered->getNonZeroValues().data(),
                    m_gpuMatrixReordered->getRowIndices().data(),
                    m_gpuMatrixReordered->getColumnIndices().data(),
                    m_gpuReorderToNatural.data(),
@ -255,7 +264,8 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData()
    OPM_TIMEBLOCK(prec_update);
    {
        if (m_split_matrix) {
-            detail::copyMatDataToReorderedSplit<field_type, blocksize_>(m_gpuMatrix.getNonZeroValues().data(),
+            detail::copyMatDataToReorderedSplit<field_type, blocksize_>(
+                m_gpuMatrix.getNonZeroValues().data(),
                m_gpuMatrix.getRowIndices().data(),
                m_gpuMatrix.getColumnIndices().data(),
                m_gpuMatrixReorderedLower->getNonZeroValues().data(),
@ -265,8 +275,7 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData()
                m_gpuMatrixReorderedDiag.value().data(),
                m_gpuNaturalToReorder.data(),
                m_gpuMatrixReorderedLower->N());
-        }
-        else{
+        } else {
            detail::copyMatDataToReordered<field_type, blocksize_>(m_gpuMatrix.getNonZeroValues().data(),
                                                                   m_gpuMatrix.getRowIndices().data(),
                                                                   m_gpuMatrixReordered->getNonZeroValues().data(),
@ -279,7 +288,8 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData()
        for (int level = 0; level < m_levelSets.size(); ++level) {
            const int numOfRowsInLevel = m_levelSets[level].size();
            if (m_split_matrix) {
-                detail::computeDiluDiagonalSplit<field_type, blocksize_>(m_gpuMatrixReorderedLower->getNonZeroValues().data(),
+                detail::computeDiluDiagonalSplit<field_type, blocksize_>(
+                    m_gpuMatrixReorderedLower->getNonZeroValues().data(),
                    m_gpuMatrixReorderedLower->getRowIndices().data(),
                    m_gpuMatrixReorderedLower->getColumnIndices().data(),
                    m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
@ -291,8 +301,7 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData()
                    levelStartIdx,
                    numOfRowsInLevel,
                    m_gpuDInv.data());
-            }
-            else{
+            } else {
                detail::computeDiluDiagonal<field_type, blocksize_>(m_gpuMatrixReordered->getNonZeroValues().data(),
                                                                    m_gpuMatrixReordered->getRowIndices().data(),
                                                                    m_gpuMatrixReordered->getColumnIndices().data(),
--- a/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu
+++ b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu
@ -457,8 +457,16 @@ namespace
    }

    template <class T, int blocksize>
-    __global__ void cuMoveDataToReorderedSplit(
-        T* srcMatrix, int* srcRowIndices, int* srcColumnIndices, T* dstLowerMatrix, int* dstLowerRowIndices, T* dstUpperMatrix, int* dstUpperRowIndices, T* dstDiag, int* naturalToReordered, size_t numberOfRows)
+    __global__ void cuMoveDataToReorderedSplit(T* srcMatrix,
+                                               int* srcRowIndices,
+                                               int* srcColumnIndices,
+                                               T* dstLowerMatrix,
+                                               int* dstLowerRowIndices,
+                                               T* dstUpperMatrix,
+                                               int* dstUpperRowIndices,
+                                               T* dstDiag,
+                                               int* naturalToReordered,
+                                               size_t numberOfRows)
    {
        const auto srcRow = blockDim.x * blockIdx.x + threadIdx.x;
        if (srcRow < numberOfRows) {
@ -478,13 +486,12 @@ namespace
                    dstBlock = lowerBlock;
                    ++lowerBlock;
                    dstBuffer = dstLowerMatrix;
-                }
-                else if (srcColumnIndices[srcBlock] > srcRow){ // we are writing a value to the upper triangular matrix
+                } else if (srcColumnIndices[srcBlock]
+                           > srcRow) { // we are writing a value to the upper triangular matrix
                    dstBlock = upperBlock;
                    ++upperBlock;
                    dstBuffer = dstUpperMatrix;
-                }
-                else{ // we are writing a value to the diagonal
+                } else { // we are writing a value to the diagonal
                    dstBlock = dstRow;
                    dstBuffer = dstDiag;
                }
@ -511,14 +518,16 @@ namespace

    // Kernel here is the function object of the cuda kernel
    template <class Kernel>
-    inline int getCudaRecomendedThreadBlockSize(Kernel k){
+    inline int getCudaRecomendedThreadBlockSize(Kernel k)
+    {
        int blockSize;
        int tmpGridSize;
        cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0);
        return blockSize;
    }

-    inline int getNumberOfBlocks(int wantedThreads, int threadBlockSize){
+    inline int getNumberOfBlocks(int wantedThreads, int threadBlockSize)
+    {
        return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
    }

@ -648,8 +657,7 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
    if (blocksize <= 3) {
        int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit<T, blocksize>);
        int nThreadBlocks = getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
-        cuComputeDiluDiagonalSplit<T, blocksize>
-            <<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
+        cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
                                                                                     lowerRowIndices,
                                                                                     lowerColIndices,
                                                                                     reorderedUpperMat,
@ -677,13 +685,29 @@ copyMatDataToReordered(

 template <class T, int blocksize>
 void
-copyMatDataToReorderedSplit(
-    T* srcMatrix, int* srcRowIndices, int* srcColumnIndices, T* dstLowerMatrix, int* dstLowerRowIndices, T* dstUpperMatrix, int* dstUpperRowIndices, T* dstDiag, int* naturalToReordered, size_t numberOfRows)
+copyMatDataToReorderedSplit(T* srcMatrix,
+                            int* srcRowIndices,
+                            int* srcColumnIndices,
+                            T* dstLowerMatrix,
+                            int* dstLowerRowIndices,
+                            T* dstUpperMatrix,
+                            int* dstUpperRowIndices,
+                            T* dstDiag,
+                            int* naturalToReordered,
+                            size_t numberOfRows)
 {
    int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit<T, blocksize>);
    int nThreadBlocks = getNumberOfBlocks(numberOfRows, threadBlockSize);
-    cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
-        srcMatrix, srcRowIndices, srcColumnIndices, dstLowerMatrix, dstLowerRowIndices, dstUpperMatrix, dstUpperRowIndices, dstDiag, naturalToReordered, numberOfRows);
+    cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
+                                                                                 srcRowIndices,
+                                                                                 srcColumnIndices,
+                                                                                 dstLowerMatrix,
+                                                                                 dstLowerRowIndices,
+                                                                                 dstUpperMatrix,
+                                                                                 dstUpperRowIndices,
+                                                                                 dstDiag,
+                                                                                 naturalToReordered,
+                                                                                 numberOfRows);
 }

 #define INSTANTIATE_KERNEL_WRAPPERS(T, blocksize)                                                                      \
@ -691,7 +715,8 @@ copyMatDataToReorderedSplit(
    template void copyMatDataToReordered<T, blocksize>(T*, int*, T*, int*, int*, size_t);                              \
    template void copyMatDataToReorderedSplit<T, blocksize>(T*, int*, int*, T*, int*, T*, int*, T*, int*, size_t);     \
    template void computeDiluDiagonal<T, blocksize>(T*, int*, int*, int*, int*, const int, int, T*);                   \
-    template void computeDiluDiagonalSplit<T, blocksize>(T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, T*);\
+    template void computeDiluDiagonalSplit<T, blocksize>(                                                              \
+        T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, T*);                                           \
    template void computeUpperSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*);               \
    template void computeLowerSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*);     \
    template void computeUpperSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*);          \
--- a/tests/cuistl/test_cudilu.cpp
+++ b/tests/cuistl/test_cudilu.cpp
@ -24,12 +24,12 @@
 #include <dune/common/fmatrix.hh>
 #include <dune/istl/bcrsmatrix.hh>
 #include <memory>
+#include <opm/simulators/linalg/DILU.hpp>
 #include <opm/simulators/linalg/cuistl/CuDILU.hpp>
 #include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
 #include <opm/simulators/linalg/cuistl/CuVector.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
 #include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
-#include <opm/simulators/linalg/DILU.hpp>
 #include <random>
 #include <vector>

@ -47,7 +47,9 @@ using CuFloatingPointVec = Opm::cuistl::CuVector<T>;
 using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
 using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;

-Sp1x1BlockMatrix get1x1BlockTestMatrix(){
+Sp1x1BlockMatrix
+get1x1BlockTestMatrix()
+{
    /*
        matA:
        1  2  0  3  0  0
@ -132,7 +134,9 @@ Sp1x1BlockMatrix get1x1BlockTestMatrix(){
    return matA;
 }

-Sp2x2BlockMatrix get2x2BlockTestMatrix(){
+Sp2x2BlockMatrix
+get2x2BlockTestMatrix()
+{
    /*
    matA:
    1  2    0  3    0  0