/*
Copyright 2022-2023 SINTEF AS
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
namespace
{
std::vector
createReorderedToNatural(Opm::SparseTable levelSets)
{
auto res = std::vector(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0;
for (auto row : levelSets) {
for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[globCnt++] = static_cast(col);
}
}
return res;
}
std::vector
createNaturalToReordered(Opm::SparseTable levelSets)
{
auto res = std::vector(Opm::cuistl::detail::to_size_t(levelSets.dataSize()));
int globCnt = 0;
for (auto row : levelSets) {
for (auto col : row) {
OPM_ERROR_IF(Opm::cuistl::detail::to_size_t(globCnt) >= res.size(),
fmt::format("Internal error. globCnt = {}, res.size() = {}", globCnt, res.size()));
res[col] = globCnt++;
}
}
return res;
}
template
void
createReorderedMatrix(const M& naturalMatrix,
std::vector reorderedToNatural,
std::unique_ptr& reorderedGpuMat)
{
M reorderedMatrix(naturalMatrix.N(), naturalMatrix.N(), naturalMatrix.nonzeroes(), M::row_wise);
for (auto dstRowIt = reorderedMatrix.createbegin(); dstRowIt != reorderedMatrix.createend(); ++dstRowIt) {
auto srcRow = naturalMatrix.begin() + reorderedToNatural[dstRowIt.index()];
// For elements in A
for (auto elem = srcRow->begin(); elem != srcRow->end(); elem++) {
dstRowIt.insert(elem.index());
}
}
reorderedGpuMat.reset(new auto(Opm::cuistl::CuSparseMatrix::fromMatrix(reorderedMatrix, true)));
}
template
void
extractLowerAndUpperMatrices(const M& naturalMatrix,
std::vector reorderedToNatural,
std::unique_ptr& lower,
std::unique_ptr& upper)
{
const size_t new_nnz = (naturalMatrix.nonzeroes() - naturalMatrix.N()) / 2;
M reorderedLower(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise);
M reorderedUpper(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise);
for (auto lowerIt = reorderedLower.createbegin(), upperIt = reorderedUpper.createbegin();
lowerIt != reorderedLower.createend();
++lowerIt, ++upperIt) {
auto srcRow = naturalMatrix.begin() + reorderedToNatural[lowerIt.index()];
for (auto elem = srcRow->begin(); elem != srcRow->end(); ++elem) {
if (elem.index() < srcRow.index()) { // add index to lower matrix if under the diagonal
lowerIt.insert(elem.index());
} else if (elem.index() > srcRow.index()) { // add element to upper matrix if above the diagonal
upperIt.insert(elem.index());
}
}
}
lower.reset(new auto(Opm::cuistl::CuSparseMatrix::fromMatrix(reorderedLower, true)));
upper.reset(new auto(Opm::cuistl::CuSparseMatrix::fromMatrix(reorderedUpper, true)));
return;
}
} // NAMESPACE
namespace Opm::cuistl
{
template
CuDILU::CuDILU(const M& A, bool split_matrix)
: m_cpuMatrix(A)
, m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER))
, m_reorderedToNatural(createReorderedToNatural(m_levelSets))
, m_naturalToReordered(createNaturalToReordered(m_levelSets))
, m_gpuMatrix(CuSparseMatrix::fromMatrix(m_cpuMatrix, true))
, m_gpuMatrixReordered(nullptr)
, m_gpuMatrixReorderedLower(nullptr)
, m_gpuMatrixReorderedUpper(nullptr)
, m_gpuNaturalToReorder(m_naturalToReordered)
, m_gpuReorderToNatural(m_reorderedToNatural)
, m_gpuDInv(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
, m_split_matrix(split_matrix)
{
// TODO: Should in some way verify that this matrix is symmetric, only do it debug mode?
// Some sanity check
OPM_ERROR_IF(A.N() != m_gpuMatrix.N(),
fmt::format("CuSparse matrix not same size as DUNE matrix. {} vs {}.", m_gpuMatrix.N(), A.N()));
OPM_ERROR_IF(A[0][0].N() != m_gpuMatrix.blockSize(),
fmt::format("CuSparse matrix not same blocksize as DUNE matrix. {} vs {}.",
m_gpuMatrix.blockSize(),
A[0][0].N()));
OPM_ERROR_IF(A.N() * A[0][0].N() != m_gpuMatrix.dim(),
fmt::format("CuSparse matrix not same dimension as DUNE matrix. {} vs {}.",
m_gpuMatrix.dim(),
A.N() * A[0][0].N()));
OPM_ERROR_IF(A.nonzeroes() != m_gpuMatrix.nonzeroes(),
fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ",
m_gpuMatrix.nonzeroes(),
A.nonzeroes()));
if (m_split_matrix) {
m_gpuMatrixReorderedDiag.emplace(CuVector(blocksize_ * blocksize_ * m_cpuMatrix.N()));
extractLowerAndUpperMatrices>(
m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper);
} else {
createReorderedMatrix>(
m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReordered);
}
computeDiagAndMoveReorderedData();
}
template
void
CuDILU::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{
}
template
void
CuDILU::apply(X& v, const Y& d)
{
OPM_TIMEBLOCK(prec_apply);
{
// ScopedTimer timer("apply", apply_time);
int levelStartIdx = 0;
for (int level = 0; level < m_levelSets.size(); ++level) {
const int numOfRowsInLevel = m_levelSets[level].size();
if (m_split_matrix) {
detail::computeLowerSolveLevelSetSplit(
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
m_gpuMatrixReorderedLower->getRowIndices().data(),
m_gpuMatrixReorderedLower->getColumnIndices().data(),
m_gpuReorderToNatural.data(),
levelStartIdx,
numOfRowsInLevel,
m_gpuDInv.data(),
d.data(),
v.data());
} else {
detail::computeLowerSolveLevelSet(
m_gpuMatrixReordered->getNonZeroValues().data(),
m_gpuMatrixReordered->getRowIndices().data(),
m_gpuMatrixReordered->getColumnIndices().data(),
m_gpuReorderToNatural.data(),
levelStartIdx,
numOfRowsInLevel,
m_gpuDInv.data(),
d.data(),
v.data());
}
levelStartIdx += numOfRowsInLevel;
}
levelStartIdx = m_cpuMatrix.N();
// upper triangular solve: (D + U_A) v = Dy
for (int level = m_levelSets.size() - 1; level >= 0; --level) {
const int numOfRowsInLevel = m_levelSets[level].size();
levelStartIdx -= numOfRowsInLevel;
if (m_split_matrix) {
detail::computeUpperSolveLevelSetSplit(
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
m_gpuMatrixReorderedUpper->getRowIndices().data(),
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
m_gpuReorderToNatural.data(),
levelStartIdx,
numOfRowsInLevel,
m_gpuDInv.data(),
v.data());
} else {
detail::computeUpperSolveLevelSet(
m_gpuMatrixReordered->getNonZeroValues().data(),
m_gpuMatrixReordered->getRowIndices().data(),
m_gpuMatrixReordered->getColumnIndices().data(),
m_gpuReorderToNatural.data(),
levelStartIdx,
numOfRowsInLevel,
m_gpuDInv.data(),
v.data());
}
}
}
}
template
void
CuDILU::post([[maybe_unused]] X& x)
{
}
template
Dune::SolverCategory::Category
CuDILU::category() const
{
return Dune::SolverCategory::sequential;
}
template
void
CuDILU::update()
{
OPM_TIMEBLOCK(prec_update);
{
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix, true); // send updated matrix to the gpu
computeDiagAndMoveReorderedData();
}
}
template
void
CuDILU::computeDiagAndMoveReorderedData()
{
OPM_TIMEBLOCK(prec_update);
{
if (m_split_matrix) {
detail::copyMatDataToReorderedSplit(
m_gpuMatrix.getNonZeroValues().data(),
m_gpuMatrix.getRowIndices().data(),
m_gpuMatrix.getColumnIndices().data(),
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
m_gpuMatrixReorderedLower->getRowIndices().data(),
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
m_gpuMatrixReorderedUpper->getRowIndices().data(),
m_gpuMatrixReorderedDiag.value().data(),
m_gpuNaturalToReorder.data(),
m_gpuMatrixReorderedLower->N());
} else {
detail::copyMatDataToReordered(m_gpuMatrix.getNonZeroValues().data(),
m_gpuMatrix.getRowIndices().data(),
m_gpuMatrixReordered->getNonZeroValues().data(),
m_gpuMatrixReordered->getRowIndices().data(),
m_gpuNaturalToReorder.data(),
m_gpuMatrixReordered->N());
}
int levelStartIdx = 0;
for (int level = 0; level < m_levelSets.size(); ++level) {
const int numOfRowsInLevel = m_levelSets[level].size();
if (m_split_matrix) {
detail::computeDiluDiagonalSplit(
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
m_gpuMatrixReorderedLower->getRowIndices().data(),
m_gpuMatrixReorderedLower->getColumnIndices().data(),
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
m_gpuMatrixReorderedUpper->getRowIndices().data(),
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
m_gpuMatrixReorderedDiag.value().data(),
m_gpuReorderToNatural.data(),
m_gpuNaturalToReorder.data(),
levelStartIdx,
numOfRowsInLevel,
m_gpuDInv.data());
} else {
detail::computeDiluDiagonal(m_gpuMatrixReordered->getNonZeroValues().data(),
m_gpuMatrixReordered->getRowIndices().data(),
m_gpuMatrixReordered->getColumnIndices().data(),
m_gpuReorderToNatural.data(),
m_gpuNaturalToReorder.data(),
levelStartIdx,
numOfRowsInLevel,
m_gpuDInv.data());
}
levelStartIdx += numOfRowsInLevel;
}
}
}
} // namespace Opm::cuistl
#define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
template class ::Opm::cuistl::CuDILU>, \
::Opm::cuistl::CuVector, \
::Opm::cuistl::CuVector>; \
template class ::Opm::cuistl::CuDILU>, \
::Opm::cuistl::CuVector, \
::Opm::cuistl::CuVector>
INSTANTIATE_CUDILU_DUNE(double, 1);
INSTANTIATE_CUDILU_DUNE(double, 2);
INSTANTIATE_CUDILU_DUNE(double, 3);
INSTANTIATE_CUDILU_DUNE(double, 4);
INSTANTIATE_CUDILU_DUNE(double, 5);
INSTANTIATE_CUDILU_DUNE(double, 6);
INSTANTIATE_CUDILU_DUNE(float, 1);
INSTANTIATE_CUDILU_DUNE(float, 2);
INSTANTIATE_CUDILU_DUNE(float, 3);
INSTANTIATE_CUDILU_DUNE(float, 4);
INSTANTIATE_CUDILU_DUNE(float, 5);
INSTANTIATE_CUDILU_DUNE(float, 6);