/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
// This file is based on the guide at https://docs.nvidia.com/cuda/cusparse/index.html#csrilu02_solve ,
// it highly recommended to read that before proceeding.
namespace Opm::cuistl
{
template
CuSeqILU0::CuSeqILU0(const M& A, field_type w)
: m_underlyingMatrix(A)
, m_w(w)
, m_LU(CuSparseMatrix::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
, m_temporaryStorage(m_LU.N() * m_LU.blockSize())
, m_descriptionL(detail::createLowerDiagonalDescription())
, m_descriptionU(detail::createUpperDiagonalDescription())
, m_cuSparseHandle(detail::CuSparseHandle::getInstance())
{
// Some sanity check
OPM_ERROR_IF(A.N() != m_LU.N(),
fmt::format("CuSparse matrix not same size as DUNE matrix. {} vs {}.", m_LU.N(), A.N()));
OPM_ERROR_IF(
A[0][0].N() != m_LU.blockSize(),
fmt::format("CuSparse matrix not same blocksize as DUNE matrix. {} vs {}.", m_LU.blockSize(), A[0][0].N()));
OPM_ERROR_IF(
A.N() * A[0][0].N() != m_LU.dim(),
fmt::format("CuSparse matrix not same dimension as DUNE matrix. {} vs {}.", m_LU.dim(), A.N() * A[0][0].N()));
OPM_ERROR_IF(A.nonzeroes() != m_LU.nonzeroes(),
fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ",
m_LU.nonzeroes(),
A.nonzeroes()));
updateILUConfiguration();
}
template
void
CuSeqILU0::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{
}
template
void
CuSeqILU0::apply(X& v, const Y& d)
{
// We need to pass the solve routine a scalar to multiply.
// In our case this scalar is 1.0
const field_type one = 1.0;
const auto numberOfRows = detail::to_int(m_LU.N());
const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
const auto blockSize = detail::to_int(m_LU.blockSize());
auto nonZeroValues = m_LU.getNonZeroValues().data();
auto rowIndices = m_LU.getRowIndices().data();
auto columnIndices = m_LU.getColumnIndices().data();
// Solve L m_temporaryStorage = d
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_solve(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfNonzeroBlocks,
&one,
m_descriptionL->get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoL.get(),
d.data(),
m_temporaryStorage.data(),
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
m_buffer->data()));
// Solve U v = m_temporaryStorage
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_solve(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfNonzeroBlocks,
&one,
m_descriptionU->get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoU.get(),
m_temporaryStorage.data(),
v.data(),
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
m_buffer->data()));
v *= m_w;
}
template
void
CuSeqILU0::post([[maybe_unused]] X& x)
{
}
template
Dune::SolverCategory::Category
CuSeqILU0::category() const
{
return Dune::SolverCategory::sequential;
}
template
void
CuSeqILU0::update()
{
m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix));
createILU();
}
template
void
CuSeqILU0::analyzeMatrix()
{
if (!m_buffer) {
OPM_THROW(std::runtime_error,
"Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
}
const auto numberOfRows = detail::to_int(m_LU.N());
const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
const auto blockSize = detail::to_int(m_LU.blockSize());
auto nonZeroValues = m_LU.getNonZeroValues().data();
auto rowIndices = m_LU.getRowIndices().data();
auto columnIndices = m_LU.getColumnIndices().data();
// analysis of ilu LU decomposition
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02_analysis(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
numberOfRows,
numberOfNonzeroBlocks,
m_LU.getDescription().get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoM.get(),
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
m_buffer->data()));
// Make sure we can decompose the matrix.
int structuralZero;
auto statusPivot = cusparseXbsrilu02_zeroPivot(m_cuSparseHandle.get(), m_infoM.get(), &structuralZero);
OPM_ERROR_IF(statusPivot != CUSPARSE_STATUS_SUCCESS,
fmt::format("Found a structural zero at A({}, {}). Could not decompose LU \\approx A.\n\n A has "
"dimension {}, and has {} nonzeroes.",
structuralZero,
structuralZero,
m_LU.N(),
m_LU.nonzeroes()));
// analysis of ilu apply
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_analysis(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfNonzeroBlocks,
m_descriptionL->get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoL.get(),
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
m_buffer->data()));
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_analysis(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfNonzeroBlocks,
m_descriptionU->get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoU.get(),
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
m_buffer->data()));
m_analysisDone = true;
}
template
size_t
CuSeqILU0::findBufferSize()
{
// We have three calls that need buffers:
// 1) LU decomposition
// 2) solve Lv = y
// 3) solve Ux = z
// we combine these buffers into one since it is not used across calls,
const auto numberOfRows = detail::to_int(m_LU.N());
const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
const auto blockSize = detail::to_int(m_LU.blockSize());
auto nonZeroValues = m_LU.getNonZeroValues().data();
auto rowIndices = m_LU.getRowIndices().data();
auto columnIndices = m_LU.getColumnIndices().data();
int bufferSizeM = 0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02_bufferSize(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
numberOfRows,
numberOfNonzeroBlocks,
m_LU.getDescription().get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoM.get(),
&bufferSizeM));
int bufferSizeL = 0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_bufferSize(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfNonzeroBlocks,
m_descriptionL->get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoL.get(),
&bufferSizeL));
int bufferSizeU = 0;
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrsv2_bufferSize(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
CUSPARSE_OPERATION_NON_TRANSPOSE,
numberOfRows,
numberOfNonzeroBlocks,
m_descriptionL->get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoU.get(),
&bufferSizeU));
OPM_ERROR_IF(bufferSizeL <= 0, fmt::format("bufferSizeL is non-positive. Given value is {}.", bufferSizeL));
OPM_ERROR_IF(bufferSizeU <= 0, fmt::format("bufferSizeU is non-positive. Given value is {}.", bufferSizeU));
OPM_ERROR_IF(bufferSizeM <= 0, fmt::format("bufferSizeM is non-positive. Given value is {}.", bufferSizeM));
return size_t(std::max(bufferSizeL, std::max(bufferSizeU, bufferSizeM)));
}
template
void
CuSeqILU0::createILU()
{
OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first.");
const auto numberOfRows = detail::to_int(m_LU.N());
const auto numberOfNonzeroBlocks = detail::to_int(m_LU.nonzeroes());
const auto blockSize = detail::to_int(m_LU.blockSize());
auto nonZeroValues = m_LU.getNonZeroValues().data();
auto rowIndices = m_LU.getRowIndices().data();
auto columnIndices = m_LU.getColumnIndices().data();
OPM_CUSPARSE_SAFE_CALL(detail::cusparseBsrilu02(m_cuSparseHandle.get(),
detail::CUSPARSE_MATRIX_ORDER,
numberOfRows,
numberOfNonzeroBlocks,
m_LU.getDescription().get(),
nonZeroValues,
rowIndices,
columnIndices,
blockSize,
m_infoM.get(),
CUSPARSE_SOLVE_POLICY_USE_LEVEL,
m_buffer->data()));
// We need to do this here as well. The first call was to check that we could decompose the system A=LU
// the second call here is to make sure we can solve LUx=y
int structuralZero;
// cusparseXbsrilu02_zeroPivot() calls cudaDeviceSynchronize()
auto statusPivot = cusparseXbsrilu02_zeroPivot(m_cuSparseHandle.get(), m_infoM.get(), &structuralZero);
OPM_ERROR_IF(
statusPivot != CUSPARSE_STATUS_SUCCESS,
fmt::format("Found a structucal zero at LU({}, {}). Could not solve LUx = y.", structuralZero, structuralZero));
}
template
void
CuSeqILU0::updateILUConfiguration()
{
auto bufferSize = findBufferSize();
if (!m_buffer || m_buffer->dim() < bufferSize) {
m_buffer.reset(new CuVector((bufferSize + sizeof(field_type) - 1) / sizeof(field_type)));
}
analyzeMatrix();
createILU();
}
} // namespace Opm::cuistl
#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuSeqILU0>, \
::Opm::cuistl::CuVector, \
::Opm::cuistl::CuVector>; \
template class ::Opm::cuistl::CuSeqILU0>, \
::Opm::cuistl::CuVector, \
::Opm::cuistl::CuVector>
INSTANTIATE_CUSEQILU0_DUNE(double, 1);
INSTANTIATE_CUSEQILU0_DUNE(double, 2);
INSTANTIATE_CUSEQILU0_DUNE(double, 3);
INSTANTIATE_CUSEQILU0_DUNE(double, 4);
INSTANTIATE_CUSEQILU0_DUNE(double, 5);
INSTANTIATE_CUSEQILU0_DUNE(double, 6);
INSTANTIATE_CUSEQILU0_DUNE(float, 1);
INSTANTIATE_CUSEQILU0_DUNE(float, 2);
INSTANTIATE_CUSEQILU0_DUNE(float, 3);
INSTANTIATE_CUSEQILU0_DUNE(float, 4);
INSTANTIATE_CUSEQILU0_DUNE(float, 5);
INSTANTIATE_CUSEQILU0_DUNE(float, 6);