From 82ff782d5f300df0cf20decb94e521a843c15531 Mon Sep 17 00:00:00 2001 From: Tobias Meyer Andersen Date: Tue, 18 Jun 2024 11:42:00 +0200 Subject: [PATCH] clang format --- opm/simulators/linalg/cuistl/CuDILU.cpp | 183 +++++++-------- .../detail/cusparse_matrix_operations.cu | 211 ++++++++++-------- tests/cuistl/test_cudilu.cpp | 90 ++++---- 3 files changed, 261 insertions(+), 223 deletions(-) diff --git a/opm/simulators/linalg/cuistl/CuDILU.cpp b/opm/simulators/linalg/cuistl/CuDILU.cpp index 00e11d176..f54d124ef 100644 --- a/opm/simulators/linalg/cuistl/CuDILU.cpp +++ b/opm/simulators/linalg/cuistl/CuDILU.cpp @@ -25,9 +25,9 @@ #include #include #include +#include #include #include -#include #include #include @@ -65,7 +65,9 @@ createNaturalToReordered(Opm::SparseTable levelSets) template void -createReorderedMatrix(const M& naturalMatrix, std::vector reorderedToNatural, std::unique_ptr& reorderedGpuMat) +createReorderedMatrix(const M& naturalMatrix, + std::vector reorderedToNatural, + std::unique_ptr& reorderedGpuMat) { M reorderedMatrix(naturalMatrix.N(), naturalMatrix.N(), naturalMatrix.nonzeroes(), M::row_wise); for (auto dstRowIt = reorderedMatrix.createbegin(); dstRowIt != reorderedMatrix.createend(); ++dstRowIt) { @@ -81,22 +83,26 @@ createReorderedMatrix(const M& naturalMatrix, std::vector reorderedToNatura template void -extractLowerAndUpperMatrices(const M& naturalMatrix, std::vector reorderedToNatural, std::unique_ptr& lower, std::unique_ptr& upper) +extractLowerAndUpperMatrices(const M& naturalMatrix, + std::vector reorderedToNatural, + std::unique_ptr& lower, + std::unique_ptr& upper) { - const size_t new_nnz = (naturalMatrix.nonzeroes() - naturalMatrix.N())/2; + const size_t new_nnz = (naturalMatrix.nonzeroes() - naturalMatrix.N()) / 2; M reorderedLower(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise); M reorderedUpper(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise); - for (auto lowerIt = reorderedLower.createbegin(), upperIt = reorderedUpper.createbegin(); lowerIt != reorderedLower.createend(); ++lowerIt, ++upperIt) { + for (auto lowerIt = reorderedLower.createbegin(), upperIt = reorderedUpper.createbegin(); + lowerIt != reorderedLower.createend(); + ++lowerIt, ++upperIt) { auto srcRow = naturalMatrix.begin() + reorderedToNatural[lowerIt.index()]; for (auto elem = srcRow->begin(); elem != srcRow->end(); ++elem) { - if (elem.index() < srcRow.index()){ // add index to lower matrix if under the diagonal + if (elem.index() < srcRow.index()) { // add index to lower matrix if under the diagonal lowerIt.insert(elem.index()); - } - else if (elem.index() > srcRow.index()){ // add element to upper matrix if above the diagonal + } else if (elem.index() > srcRow.index()) { // add element to upper matrix if above the diagonal upperIt.insert(elem.index()); } } @@ -144,12 +150,13 @@ CuDILU::CuDILU(const M& A, bool split_matrix) fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ", m_gpuMatrix.nonzeroes(), A.nonzeroes())); - if (m_split_matrix){ - m_gpuMatrixReorderedDiag.emplace(CuVector(blocksize_*blocksize_*m_cpuMatrix.N())); - extractLowerAndUpperMatrices>(m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper); - } - else{ - createReorderedMatrix>(m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReordered); + if (m_split_matrix) { + m_gpuMatrixReorderedDiag.emplace(CuVector(blocksize_ * blocksize_ * m_cpuMatrix.N())); + extractLowerAndUpperMatrices>( + m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper); + } else { + createReorderedMatrix>( + m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReordered); } computeDiagAndMoveReorderedData(); } @@ -170,27 +177,28 @@ CuDILU::apply(X& v, const Y& d) int levelStartIdx = 0; for (int level = 0; level < m_levelSets.size(); ++level) { const int numOfRowsInLevel = m_levelSets[level].size(); - if (m_split_matrix){ - detail::computeLowerSolveLevelSetSplit(m_gpuMatrixReorderedLower->getNonZeroValues().data(), - m_gpuMatrixReorderedLower->getRowIndices().data(), - m_gpuMatrixReorderedLower->getColumnIndices().data(), - m_gpuReorderToNatural.data(), - levelStartIdx, - numOfRowsInLevel, - m_gpuDInv.data(), - d.data(), - v.data()); - } - else{ - detail::computeLowerSolveLevelSet(m_gpuMatrixReordered->getNonZeroValues().data(), - m_gpuMatrixReordered->getRowIndices().data(), - m_gpuMatrixReordered->getColumnIndices().data(), - m_gpuReorderToNatural.data(), - levelStartIdx, - numOfRowsInLevel, - m_gpuDInv.data(), - d.data(), - v.data()); + if (m_split_matrix) { + detail::computeLowerSolveLevelSetSplit( + m_gpuMatrixReorderedLower->getNonZeroValues().data(), + m_gpuMatrixReorderedLower->getRowIndices().data(), + m_gpuMatrixReorderedLower->getColumnIndices().data(), + m_gpuReorderToNatural.data(), + levelStartIdx, + numOfRowsInLevel, + m_gpuDInv.data(), + d.data(), + v.data()); + } else { + detail::computeLowerSolveLevelSet( + m_gpuMatrixReordered->getNonZeroValues().data(), + m_gpuMatrixReordered->getRowIndices().data(), + m_gpuMatrixReordered->getColumnIndices().data(), + m_gpuReorderToNatural.data(), + levelStartIdx, + numOfRowsInLevel, + m_gpuDInv.data(), + d.data(), + v.data()); } levelStartIdx += numOfRowsInLevel; } @@ -200,25 +208,26 @@ CuDILU::apply(X& v, const Y& d) for (int level = m_levelSets.size() - 1; level >= 0; --level) { const int numOfRowsInLevel = m_levelSets[level].size(); levelStartIdx -= numOfRowsInLevel; - if (m_split_matrix){ - detail::computeUpperSolveLevelSetSplit(m_gpuMatrixReorderedUpper->getNonZeroValues().data(), - m_gpuMatrixReorderedUpper->getRowIndices().data(), - m_gpuMatrixReorderedUpper->getColumnIndices().data(), - m_gpuReorderToNatural.data(), - levelStartIdx, - numOfRowsInLevel, - m_gpuDInv.data(), - v.data()); - } - else{ - detail::computeUpperSolveLevelSet(m_gpuMatrixReordered->getNonZeroValues().data(), - m_gpuMatrixReordered->getRowIndices().data(), - m_gpuMatrixReordered->getColumnIndices().data(), - m_gpuReorderToNatural.data(), - levelStartIdx, - numOfRowsInLevel, - m_gpuDInv.data(), - v.data()); + if (m_split_matrix) { + detail::computeUpperSolveLevelSetSplit( + m_gpuMatrixReorderedUpper->getNonZeroValues().data(), + m_gpuMatrixReorderedUpper->getRowIndices().data(), + m_gpuMatrixReorderedUpper->getColumnIndices().data(), + m_gpuReorderToNatural.data(), + levelStartIdx, + numOfRowsInLevel, + m_gpuDInv.data(), + v.data()); + } else { + detail::computeUpperSolveLevelSet( + m_gpuMatrixReordered->getNonZeroValues().data(), + m_gpuMatrixReordered->getRowIndices().data(), + m_gpuMatrixReordered->getColumnIndices().data(), + m_gpuReorderToNatural.data(), + levelStartIdx, + numOfRowsInLevel, + m_gpuDInv.data(), + v.data()); } } } @@ -254,45 +263,45 @@ CuDILU::computeDiagAndMoveReorderedData() { OPM_TIMEBLOCK(prec_update); { - if (m_split_matrix){ - detail::copyMatDataToReorderedSplit(m_gpuMatrix.getNonZeroValues().data(), - m_gpuMatrix.getRowIndices().data(), - m_gpuMatrix.getColumnIndices().data(), - m_gpuMatrixReorderedLower->getNonZeroValues().data(), - m_gpuMatrixReorderedLower->getRowIndices().data(), - m_gpuMatrixReorderedUpper->getNonZeroValues().data(), - m_gpuMatrixReorderedUpper->getRowIndices().data(), - m_gpuMatrixReorderedDiag.value().data(), - m_gpuNaturalToReorder.data(), - m_gpuMatrixReorderedLower->N()); - } - else{ + if (m_split_matrix) { + detail::copyMatDataToReorderedSplit( + m_gpuMatrix.getNonZeroValues().data(), + m_gpuMatrix.getRowIndices().data(), + m_gpuMatrix.getColumnIndices().data(), + m_gpuMatrixReorderedLower->getNonZeroValues().data(), + m_gpuMatrixReorderedLower->getRowIndices().data(), + m_gpuMatrixReorderedUpper->getNonZeroValues().data(), + m_gpuMatrixReorderedUpper->getRowIndices().data(), + m_gpuMatrixReorderedDiag.value().data(), + m_gpuNaturalToReorder.data(), + m_gpuMatrixReorderedLower->N()); + } else { detail::copyMatDataToReordered(m_gpuMatrix.getNonZeroValues().data(), - m_gpuMatrix.getRowIndices().data(), - m_gpuMatrixReordered->getNonZeroValues().data(), - m_gpuMatrixReordered->getRowIndices().data(), - m_gpuNaturalToReorder.data(), - m_gpuMatrixReordered->N()); + m_gpuMatrix.getRowIndices().data(), + m_gpuMatrixReordered->getNonZeroValues().data(), + m_gpuMatrixReordered->getRowIndices().data(), + m_gpuNaturalToReorder.data(), + m_gpuMatrixReordered->N()); } int levelStartIdx = 0; for (int level = 0; level < m_levelSets.size(); ++level) { const int numOfRowsInLevel = m_levelSets[level].size(); - if (m_split_matrix){ - detail::computeDiluDiagonalSplit(m_gpuMatrixReorderedLower->getNonZeroValues().data(), - m_gpuMatrixReorderedLower->getRowIndices().data(), - m_gpuMatrixReorderedLower->getColumnIndices().data(), - m_gpuMatrixReorderedUpper->getNonZeroValues().data(), - m_gpuMatrixReorderedUpper->getRowIndices().data(), - m_gpuMatrixReorderedUpper->getColumnIndices().data(), - m_gpuMatrixReorderedDiag.value().data(), - m_gpuReorderToNatural.data(), - m_gpuNaturalToReorder.data(), - levelStartIdx, - numOfRowsInLevel, - m_gpuDInv.data()); - } - else{ + if (m_split_matrix) { + detail::computeDiluDiagonalSplit( + m_gpuMatrixReorderedLower->getNonZeroValues().data(), + m_gpuMatrixReorderedLower->getRowIndices().data(), + m_gpuMatrixReorderedLower->getColumnIndices().data(), + m_gpuMatrixReorderedUpper->getNonZeroValues().data(), + m_gpuMatrixReorderedUpper->getRowIndices().data(), + m_gpuMatrixReorderedUpper->getColumnIndices().data(), + m_gpuMatrixReorderedDiag.value().data(), + m_gpuReorderToNatural.data(), + m_gpuNaturalToReorder.data(), + levelStartIdx, + numOfRowsInLevel, + m_gpuDInv.data()); + } else { detail::computeDiluDiagonal(m_gpuMatrixReordered->getNonZeroValues().data(), m_gpuMatrixReordered->getRowIndices().data(), m_gpuMatrixReordered->getColumnIndices().data(), diff --git a/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu index 703892dd3..967160c72 100644 --- a/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu +++ b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu @@ -229,14 +229,14 @@ namespace template __global__ void cuComputeLowerSolveLevelSetSplit(T* mat, - int* rowIndices, - int* colIndices, - int* indexConversion, - int startIdx, - int rowsInLevelSet, - const T* dInv, - const T* d, - T* v) + int* rowIndices, + int* colIndices, + int* indexConversion, + int startIdx, + int rowsInLevelSet, + const T* dInv, + const T* d, + T* v) { const auto reorderedRowIdx = startIdx + (blockDim.x * blockIdx.x + threadIdx.x); if (reorderedRowIdx < rowsInLevelSet + startIdx) { @@ -250,7 +250,7 @@ namespace rhs[i] = d[naturalRowIdx * blocksize + i]; } - //TODO: removce the first condition in the for loop + // TODO: removce the first condition in the for loop for (int block = nnzIdx; block < nnzIdxLim; ++block) { const int col = colIndices[block]; mmv(&mat[block * blocksize * blocksize], &v[col * blocksize], rhs); @@ -288,13 +288,13 @@ namespace template __global__ void cuComputeUpperSolveLevelSetSplit(T* mat, - int* rowIndices, - int* colIndices, - int* indexConversion, - int startIdx, - int rowsInLevelSet, - const T* dInv, - T* v) + int* rowIndices, + int* colIndices, + int* indexConversion, + int startIdx, + int rowsInLevelSet, + const T* dInv, + T* v) { const auto reorderedRowIdx = startIdx + (blockDim.x * blockIdx.x + threadIdx.x); if (reorderedRowIdx < rowsInLevelSet + startIdx) { @@ -381,23 +381,23 @@ namespace template __global__ void cuComputeDiluDiagonalSplit(T* reorderedLowerMat, - int* lowerRowIndices, - int* lowerColIndices, - T* reorderedUpperMat, - int* upperRowIndices, - int* upperColIndices, - T* diagonal, - int* reorderedToNatural, - int* naturalToReordered, - const int startIdx, - int rowsInLevelSet, - T* dInv) + int* lowerRowIndices, + int* lowerColIndices, + T* reorderedUpperMat, + int* upperRowIndices, + int* upperColIndices, + T* diagonal, + int* reorderedToNatural, + int* naturalToReordered, + const int startIdx, + int rowsInLevelSet, + T* dInv) { const auto reorderedRowIdx = startIdx + blockDim.x * blockIdx.x + threadIdx.x; if (reorderedRowIdx < rowsInLevelSet + startIdx) { const int naturalRowIdx = reorderedToNatural[reorderedRowIdx]; const size_t lowerRowStart = lowerRowIndices[reorderedRowIdx]; - const size_t lowerRowEnd = lowerRowIndices[reorderedRowIdx+1]; + const size_t lowerRowEnd = lowerRowIndices[reorderedRowIdx + 1]; T dInvTmp[blocksize * blocksize]; for (int i = 0; i < blocksize; ++i) { @@ -410,8 +410,8 @@ namespace const int col = naturalToReordered[lowerColIndices[block]]; int symOppositeIdx = upperRowIndices[col]; - for (; symOppositeIdx < upperRowIndices[col + 1]; ++symOppositeIdx){ - if (naturalRowIdx == upperColIndices[symOppositeIdx]){ + for (; symOppositeIdx < upperRowIndices[col + 1]; ++symOppositeIdx) { + if (naturalRowIdx == upperColIndices[symOppositeIdx]) { break; } } @@ -457,15 +457,23 @@ namespace } template - __global__ void cuMoveDataToReorderedSplit( - T* srcMatrix, int* srcRowIndices, int* srcColumnIndices, T* dstLowerMatrix, int* dstLowerRowIndices, T* dstUpperMatrix, int* dstUpperRowIndices, T* dstDiag, int* naturalToReordered, size_t numberOfRows) + __global__ void cuMoveDataToReorderedSplit(T* srcMatrix, + int* srcRowIndices, + int* srcColumnIndices, + T* dstLowerMatrix, + int* dstLowerRowIndices, + T* dstUpperMatrix, + int* dstUpperRowIndices, + T* dstDiag, + int* naturalToReordered, + size_t numberOfRows) { const auto srcRow = blockDim.x * blockIdx.x + threadIdx.x; if (srcRow < numberOfRows) { const auto dstRow = naturalToReordered[srcRow]; const auto rowStart = srcRowIndices[srcRow]; - const auto rowEnd = srcRowIndices[srcRow+1]; + const auto rowEnd = srcRowIndices[srcRow + 1]; auto lowerBlock = dstLowerRowIndices[dstRow]; auto upperBlock = dstUpperRowIndices[dstRow]; @@ -474,17 +482,16 @@ namespace int dstBlock; T* dstBuffer; - if (srcColumnIndices[srcBlock] < srcRow){ // we are writing a value to the lower triangular matrix + if (srcColumnIndices[srcBlock] < srcRow) { // we are writing a value to the lower triangular matrix dstBlock = lowerBlock; ++lowerBlock; dstBuffer = dstLowerMatrix; - } - else if (srcColumnIndices[srcBlock] > srcRow){ // we are writing a value to the upper triangular matrix + } else if (srcColumnIndices[srcBlock] + > srcRow) { // we are writing a value to the upper triangular matrix dstBlock = upperBlock; ++upperBlock; dstBuffer = dstUpperMatrix; - } - else{ // we are writing a value to the diagonal + } else { // we are writing a value to the diagonal dstBlock = dstRow; dstBuffer = dstDiag; } @@ -511,14 +518,16 @@ namespace // Kernel here is the function object of the cuda kernel template - inline int getCudaRecomendedThreadBlockSize(Kernel k){ + inline int getCudaRecomendedThreadBlockSize(Kernel k) + { int blockSize; int tmpGridSize; - cudaOccupancyMaxPotentialBlockSize( &tmpGridSize, &blockSize, k, 0, 0); + cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0); return blockSize; } - inline int getNumberOfBlocks(int wantedThreads, int threadBlockSize){ + inline int getNumberOfBlocks(int wantedThreads, int threadBlockSize) + { return (wantedThreads + threadBlockSize - 1) / threadBlockSize; } @@ -557,14 +566,14 @@ computeLowerSolveLevelSet(T* reorderedMat, template void computeLowerSolveLevelSetSplit(T* reorderedMat, - int* rowIndices, - int* colIndices, - int* indexConversion, - int startIdx, - int rowsInLevelSet, - const T* dInv, - const T* d, - T* v) + int* rowIndices, + int* colIndices, + int* indexConversion, + int startIdx, + int rowsInLevelSet, + const T* dInv, + const T* d, + T* v) { int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit); int nThreadBlocks = getNumberOfBlocks(rowsInLevelSet, threadBlockSize); @@ -590,13 +599,13 @@ computeUpperSolveLevelSet(T* reorderedMat, template void computeUpperSolveLevelSetSplit(T* reorderedMat, - int* rowIndices, - int* colIndices, - int* indexConversion, - int startIdx, - int rowsInLevelSet, - const T* dInv, - T* v) + int* rowIndices, + int* colIndices, + int* indexConversion, + int startIdx, + int rowsInLevelSet, + const T* dInv, + T* v) { int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit); int nThreadBlocks = getNumberOfBlocks(rowsInLevelSet, threadBlockSize); @@ -633,34 +642,33 @@ computeDiluDiagonal(T* reorderedMat, template void computeDiluDiagonalSplit(T* reorderedLowerMat, - int* lowerRowIndices, - int* lowerColIndices, - T* reorderedUpperMat, - int* upperRowIndices, - int* upperColIndices, - T* diagonal, - int* reorderedToNatural, - int* naturalToReordered, - const int startIdx, - int rowsInLevelSet, - T* dInv) + int* lowerRowIndices, + int* lowerColIndices, + T* reorderedUpperMat, + int* upperRowIndices, + int* upperColIndices, + T* diagonal, + int* reorderedToNatural, + int* naturalToReordered, + const int startIdx, + int rowsInLevelSet, + T* dInv) { if (blocksize <= 3) { int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit); int nThreadBlocks = getNumberOfBlocks(rowsInLevelSet, threadBlockSize); - cuComputeDiluDiagonalSplit - <<>>(reorderedLowerMat, - lowerRowIndices, - lowerColIndices, - reorderedUpperMat, - upperRowIndices, - upperColIndices, - diagonal, - reorderedToNatural, - naturalToReordered, - startIdx, - rowsInLevelSet, - dInv); + cuComputeDiluDiagonalSplit<<>>(reorderedLowerMat, + lowerRowIndices, + lowerColIndices, + reorderedUpperMat, + upperRowIndices, + upperColIndices, + diagonal, + reorderedToNatural, + naturalToReordered, + startIdx, + rowsInLevelSet, + dInv); } else { OPM_THROW(std::invalid_argument, "Inverting diagonal is not implemented for blocksizes > 3"); } @@ -677,24 +685,41 @@ copyMatDataToReordered( template void -copyMatDataToReorderedSplit( - T* srcMatrix, int* srcRowIndices, int* srcColumnIndices, T* dstLowerMatrix, int* dstLowerRowIndices, T* dstUpperMatrix, int* dstUpperRowIndices, T* dstDiag, int* naturalToReordered, size_t numberOfRows) +copyMatDataToReorderedSplit(T* srcMatrix, + int* srcRowIndices, + int* srcColumnIndices, + T* dstLowerMatrix, + int* dstLowerRowIndices, + T* dstUpperMatrix, + int* dstUpperRowIndices, + T* dstDiag, + int* naturalToReordered, + size_t numberOfRows) { int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit); int nThreadBlocks = getNumberOfBlocks(numberOfRows, threadBlockSize); - cuMoveDataToReorderedSplit<<>>( - srcMatrix, srcRowIndices, srcColumnIndices, dstLowerMatrix, dstLowerRowIndices, dstUpperMatrix, dstUpperRowIndices, dstDiag, naturalToReordered, numberOfRows); + cuMoveDataToReorderedSplit<<>>(srcMatrix, + srcRowIndices, + srcColumnIndices, + dstLowerMatrix, + dstLowerRowIndices, + dstUpperMatrix, + dstUpperRowIndices, + dstDiag, + naturalToReordered, + numberOfRows); } -#define INSTANTIATE_KERNEL_WRAPPERS(T, blocksize) \ - template void invertDiagonalAndFlatten(T*, int*, int*, size_t, T*); \ - template void copyMatDataToReordered(T*, int*, T*, int*, int*, size_t); \ - template void copyMatDataToReorderedSplit(T*, int*, int*, T*, int*, T*, int*, T*, int*, size_t); \ - template void computeDiluDiagonal(T*, int*, int*, int*, int*, const int, int, T*); \ - template void computeDiluDiagonalSplit(T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, T*);\ - template void computeUpperSolveLevelSet(T*, int*, int*, int*, int, int, const T*, T*); \ - template void computeLowerSolveLevelSet(T*, int*, int*, int*, int, int, const T*, const T*, T*); \ - template void computeUpperSolveLevelSetSplit(T*, int*, int*, int*, int, int, const T*, T*); \ +#define INSTANTIATE_KERNEL_WRAPPERS(T, blocksize) \ + template void invertDiagonalAndFlatten(T*, int*, int*, size_t, T*); \ + template void copyMatDataToReordered(T*, int*, T*, int*, int*, size_t); \ + template void copyMatDataToReorderedSplit(T*, int*, int*, T*, int*, T*, int*, T*, int*, size_t); \ + template void computeDiluDiagonal(T*, int*, int*, int*, int*, const int, int, T*); \ + template void computeDiluDiagonalSplit( \ + T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, T*); \ + template void computeUpperSolveLevelSet(T*, int*, int*, int*, int, int, const T*, T*); \ + template void computeLowerSolveLevelSet(T*, int*, int*, int*, int, int, const T*, const T*, T*); \ + template void computeUpperSolveLevelSetSplit(T*, int*, int*, int*, int, int, const T*, T*); \ template void computeLowerSolveLevelSetSplit(T*, int*, int*, int*, int, int, const T*, const T*, T*); INSTANTIATE_KERNEL_WRAPPERS(float, 1); diff --git a/tests/cuistl/test_cudilu.cpp b/tests/cuistl/test_cudilu.cpp index 221948220..d18fdaf66 100644 --- a/tests/cuistl/test_cudilu.cpp +++ b/tests/cuistl/test_cudilu.cpp @@ -24,12 +24,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -44,47 +44,49 @@ using Sp2x2BlockMatrix = Dune::BCRSMatrix; using CuMatrix = Opm::cuistl::CuSparseMatrix; using CuIntVec = Opm::cuistl::CuVector; using CuFloatingPointVec = Opm::cuistl::CuVector; -using CuDilu1x1 = Opm::cuistl::CuDILU; -using CuDilu2x2 = Opm::cuistl::CuDILU; +using CuDilu1x1 = Opm::cuistl::CuDILU; +using CuDilu2x2 = Opm::cuistl::CuDILU; -Sp1x1BlockMatrix get1x1BlockTestMatrix(){ -/* - matA: - 1 2 0 3 0 0 - 4 5 0 6 0 7 - 0 0 8 0 0 0 - 9 10 0 11 12 0 - 0 0 0 13 14 0 - 0 15 0 0 0 16 +Sp1x1BlockMatrix +get1x1BlockTestMatrix() +{ + /* + matA: + 1 2 0 3 0 0 + 4 5 0 6 0 7 + 0 0 8 0 0 0 + 9 10 0 11 12 0 + 0 0 0 13 14 0 + 0 15 0 0 0 16 - Expected reordering: - 1 2 0 3 0 0 - 0 0 8 0 0 0 - 4 5 0 6 0 7 - 9 10 0 11 12 0 - 0 15 0 0 0 16 - 0 0 0 13 14 0 + Expected reordering: + 1 2 0 3 0 0 + 0 0 8 0 0 0 + 4 5 0 6 0 7 + 9 10 0 11 12 0 + 0 15 0 0 0 16 + 0 0 0 13 14 0 - Expected lowerTriangularReorderedMatrix: - 0 0 0 0 0 0 - 0 0 0 0 0 0 - 4 0 0 0 0 0 - 9 10 0 0 0 0 - 0 15 0 0 0 0 - 0 0 0 13 0 0 + Expected lowerTriangularReorderedMatrix: + 0 0 0 0 0 0 + 0 0 0 0 0 0 + 4 0 0 0 0 0 + 9 10 0 0 0 0 + 0 15 0 0 0 0 + 0 0 0 13 0 0 - Expected lowerTriangularReorderedMatrix: - 0 2 0 3 0 0 - 0 0 0 0 0 0 - 0 0 0 6 0 7 - 0 0 0 0 12 0 - 0 0 0 0 0 0 -*/ + Expected lowerTriangularReorderedMatrix: + 0 2 0 3 0 0 + 0 0 0 0 0 0 + 0 0 0 6 0 7 + 0 0 0 0 12 0 + 0 0 0 0 0 0 + */ const int N = 6; const int nonZeroes = 16; - //Create the Dune A matrix + // Create the Dune A matrix Sp1x1BlockMatrix matA(N, N, nonZeroes, Sp1x1BlockMatrix::row_wise); for (auto row = matA.createbegin(); row != matA.createend(); ++row) { row.insert(row.index()); @@ -132,7 +134,9 @@ Sp1x1BlockMatrix get1x1BlockTestMatrix(){ return matA; } -Sp2x2BlockMatrix get2x2BlockTestMatrix(){ +Sp2x2BlockMatrix +get2x2BlockTestMatrix() +{ /* matA: 1 2 0 3 0 0 @@ -148,7 +152,7 @@ Sp2x2BlockMatrix get2x2BlockTestMatrix(){ const int N = 3; const int nonZeroes = 9; - //Create the Dune A matrix + // Create the Dune A matrix Sp2x2BlockMatrix matA(N, N, nonZeroes, Sp2x2BlockMatrix::row_wise); for (auto row = matA.createbegin(); row != matA.createend(); ++row) { row.insert(row.index()); @@ -215,13 +219,13 @@ BOOST_AUTO_TEST_CASE(TestDiluApply) // put results in std::vector std::vector cpudilures; - for (auto e : h_output){ + for (auto e : h_output) { cpudilures.push_back(e); } auto cudilures = d_output.asStdVector(); // check that CuDilu results matches that of CPU dilu - for (size_t i = 0; i < cudilures.size(); ++i){ + for (size_t i = 0; i < cudilures.size(); ++i) { BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7); } } @@ -255,14 +259,14 @@ BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked) auto cudilures = d_output.asStdVector(); std::vector cpudilures; - for (auto v : h_output){ - for (auto e : v){ + for (auto v : h_output) { + for (auto e : v) { cpudilures.push_back(e); } } // check that the values are close - for (size_t i = 0; i < cudilures.size(); ++i){ + for (size_t i = 0; i < cudilures.size(); ++i) { BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7); } } @@ -316,13 +320,13 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge) // put results in std::vector std::vector cpudilures; - for (auto e : h_output){ + for (auto e : h_output) { cpudilures.push_back(e); } auto cudilures = d_output.asStdVector(); // check that CuDilu results matches that of CPU dilu - for (size_t i = 0; i < cudilures.size(); ++i){ + for (size_t i = 0; i < cudilures.size(); ++i) { BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7); } }