/* Copyright 2022-2023 SINTEF AS This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include // This file is based on the guide at https://docs.nvidia.com/cuda/cusparse/index.html#csrilu02_solve , // it highly recommended to read that before proceeding. namespace Opm::cuistl { template CuSeqILU0::CuSeqILU0(const M& A, field_type w) : m_underlyingMatrix(A) , m_w(w) , m_LU(CuSparseMatrix::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A))) , m_temporaryStorage(m_LU.N() * m_LU.blockSize()) , m_descriptionL(detail::createLowerDiagonalDescription()) , m_descriptionU(detail::createUpperDiagonalDescription()) , m_cuSparseHandle(detail::CuSparseHandle::getInstance()) { // Some sanity check OPM_ERROR_IF(A.N() != m_LU.N(), fmt::format("CuSparse matrix not same size as DUNE matrix. {} vs {}.", m_LU.N(), A.N())); OPM_ERROR_IF( A[0][0].N() != m_LU.blockSize(), fmt::format("CuSparse matrix not same blocksize as DUNE matrix. {} vs {}.", m_LU.blockSize(), A[0][0].N())); OPM_ERROR_IF( A.N() * A[0][0].N() != m_LU.dim(), fmt::format("CuSparse matrix not same dimension as DUNE matrix. {} vs {}.", m_LU.dim(), A.N() * A[0][0].N())); OPM_ERROR_IF(A.nonzeroes() != m_LU.nonzeroes(), fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ", m_LU.nonzeroes(), A.nonzeroes())); updateILUConfiguration(); } template void CuSeqILU0::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) { } template void CuSeqILU0::apply(X& v, const Y& d) { // We need to pass the solve routine a scalar to multiply. // In our case this scalar is 1.0 const field_type one = 1.0; const auto numberOfRows = detail::to_int(m_LU.N()); const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes()); const auto blockSize = detail::to_int(m_LU.blockSize()); auto nonZeroValues = m_LU.getNonZeroValues().data(); auto rowIndices = m_LU.getRowIndices().data(); auto columnIndices = m_LU.getColumnIndices().data(); // Solve L m_temporaryStorage = d OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_solve(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, CUSPARSE_OPERATION_NON_TRANSPOSE, numberOfRows, numberOfNonzeroBlocks, &one, m_descriptionL->get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoL.get(), d.data(), m_temporaryStorage.data(), CUSPARSE_SOLVE_POLICY_USE_LEVEL, m_buffer->data())); // Solve U v = m_temporaryStorage OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_solve(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, CUSPARSE_OPERATION_NON_TRANSPOSE, numberOfRows, numberOfNonzeroBlocks, &one, m_descriptionU->get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoU.get(), m_temporaryStorage.data(), v.data(), CUSPARSE_SOLVE_POLICY_USE_LEVEL, m_buffer->data())); v *= m_w; } template void CuSeqILU0::post([[maybe_unused]] X& x) { } template Dune::SolverCategory::Category CuSeqILU0::category() const { return Dune::SolverCategory::sequential; } template void CuSeqILU0::update() { m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix)); createILU(); } template void CuSeqILU0::analyzeMatrix() { if (!m_buffer) { OPM_THROW(std::runtime_error, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size."); } const auto numberOfRows = detail::to_int(m_LU.N()); const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes()); const auto blockSize = detail::to_int(m_LU.blockSize()); auto nonZeroValues = m_LU.getNonZeroValues().data(); auto rowIndices = m_LU.getRowIndices().data(); auto columnIndices = m_LU.getColumnIndices().data(); // analysis of ilu LU decomposition OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02_analysis(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, numberOfRows, numberOfNonzeroBlocks, m_LU.getDescription().get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoM.get(), CUSPARSE_SOLVE_POLICY_USE_LEVEL, m_buffer->data())); // Make sure we can decompose the matrix. int structuralZero; auto statusPivot = cusparseXbsrilu02_zeroPivot(m_cuSparseHandle.get(), m_infoM.get(), &structuralZero); OPM_ERROR_IF(statusPivot != CUSPARSE_STATUS_SUCCESS, fmt::format("Found a structural zero at A({}, {}). Could not decompose LU \\approx A.\n\n A has " "dimension {}, and has {} nonzeroes.", structuralZero, structuralZero, m_LU.N(), m_LU.nonzeroes())); // analysis of ilu apply OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_analysis(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, CUSPARSE_OPERATION_NON_TRANSPOSE, numberOfRows, numberOfNonzeroBlocks, m_descriptionL->get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoL.get(), CUSPARSE_SOLVE_POLICY_USE_LEVEL, m_buffer->data())); OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_analysis(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, CUSPARSE_OPERATION_NON_TRANSPOSE, numberOfRows, numberOfNonzeroBlocks, m_descriptionU->get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoU.get(), CUSPARSE_SOLVE_POLICY_USE_LEVEL, m_buffer->data())); m_analysisDone = true; } template size_t CuSeqILU0::findBufferSize() { // We have three calls that need buffers: // 1) LU decomposition // 2) solve Lv = y // 3) solve Ux = z // we combine these buffers into one since it is not used across calls, const auto numberOfRows = detail::to_int(m_LU.N()); const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes()); const auto blockSize = detail::to_int(m_LU.blockSize()); auto nonZeroValues = m_LU.getNonZeroValues().data(); auto rowIndices = m_LU.getRowIndices().data(); auto columnIndices = m_LU.getColumnIndices().data(); int bufferSizeM = 0; OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02_bufferSize(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, numberOfRows, numberOfNonzeroBlocks, m_LU.getDescription().get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoM.get(), &bufferSizeM)); int bufferSizeL = 0; OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_bufferSize(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, CUSPARSE_OPERATION_NON_TRANSPOSE, numberOfRows, numberOfNonzeroBlocks, m_descriptionL->get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoL.get(), &bufferSizeL)); int bufferSizeU = 0; OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_bufferSize(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, CUSPARSE_OPERATION_NON_TRANSPOSE, numberOfRows, numberOfNonzeroBlocks, m_descriptionL->get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoU.get(), &bufferSizeU)); OPM_ERROR_IF(bufferSizeL <= 0, fmt::format("bufferSizeL is non-positive. Given value is {}.", bufferSizeL)); OPM_ERROR_IF(bufferSizeU <= 0, fmt::format("bufferSizeU is non-positive. Given value is {}.", bufferSizeU)); OPM_ERROR_IF(bufferSizeM <= 0, fmt::format("bufferSizeM is non-positive. Given value is {}.", bufferSizeM)); return size_t(std::max(bufferSizeL, std::max(bufferSizeU, bufferSizeM))); } template void CuSeqILU0::createILU() { OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size."); OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first."); const auto numberOfRows = detail::to_int(m_LU.N()); const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes()); const auto blockSize = detail::to_int(m_LU.blockSize()); auto nonZeroValues = m_LU.getNonZeroValues().data(); auto rowIndices = m_LU.getRowIndices().data(); auto columnIndices = m_LU.getColumnIndices().data(); OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02(m_cuSparseHandle.get(), detail::CUSPARSE_MATRIX_ORDER, numberOfRows, numberOfNonzeroBlocks, m_LU.getDescription().get(), nonZeroValues, rowIndices, columnIndices, blockSize, m_infoM.get(), CUSPARSE_SOLVE_POLICY_USE_LEVEL, m_buffer->data())); // We need to do this here as well. The first call was to check that we could decompose the system A=LU // the second call here is to make sure we can solve LUx=y int structuralZero; // cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize() auto statusPivot = cusparseXbsrilu02_zeroPivot(m_cuSparseHandle.get(), m_infoM.get(), &structuralZero); OPM_ERROR_IF( statusPivot != CUSPARSE_STATUS_SUCCESS, fmt::format("Found a structucal zero at LU({}, {}). Could not solve LUx = y.", structuralZero, structuralZero)); } template void CuSeqILU0::updateILUConfiguration() { auto bufferSize = findBufferSize(); if (!m_buffer || m_buffer->dim() < bufferSize) { m_buffer.reset(new CuVector((bufferSize + sizeof(field_type) - 1) / sizeof(field_type))); } analyzeMatrix(); createILU(); } } // namespace Opm::cuistl #define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \ template class ::Opm::cuistl::CuSeqILU0>, \ ::Opm::cuistl::CuVector, \ ::Opm::cuistl::CuVector>; \ template class ::Opm::cuistl::CuSeqILU0>, \ ::Opm::cuistl::CuVector, \ ::Opm::cuistl::CuVector> INSTANTIATE_CUSEQILU0_DUNE(double, 1); INSTANTIATE_CUSEQILU0_DUNE(double, 2); INSTANTIATE_CUSEQILU0_DUNE(double, 3); INSTANTIATE_CUSEQILU0_DUNE(double, 4); INSTANTIATE_CUSEQILU0_DUNE(double, 5); INSTANTIATE_CUSEQILU0_DUNE(double, 6); INSTANTIATE_CUSEQILU0_DUNE(float, 1); INSTANTIATE_CUSEQILU0_DUNE(float, 2); INSTANTIATE_CUSEQILU0_DUNE(float, 3); INSTANTIATE_CUSEQILU0_DUNE(float, 4); INSTANTIATE_CUSEQILU0_DUNE(float, 5); INSTANTIATE_CUSEQILU0_DUNE(float, 6);