2023-11-22 07:29:43 -06:00
/*
Copyright 2022 - 2023 SINTEF AS
This file is part of the Open Porous Media project ( OPM ) .
OPM is free software : you can redistribute it and / or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation , either version 3 of the License , or
( at your option ) any later version .
OPM is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
GNU General Public License for more details .
You should have received a copy of the GNU General Public License
along with OPM . If not , see < http : //www.gnu.org/licenses/>.
*/
2024-06-19 06:14:59 -05:00
# include <chrono>
# include <config.h>
2023-11-22 07:29:43 -06:00
# include <dune/common/fmatrix.hh>
# include <dune/istl/bcrsmatrix.hh>
# include <fmt/core.h>
2024-06-19 06:14:59 -05:00
# include <limits>
2023-11-22 07:29:43 -06:00
# include <opm/common/ErrorMacros.hpp>
2024-06-19 06:14:59 -05:00
# include <opm/common/TimingMacros.hpp>
# include <opm/simulators/linalg/GraphColoring.hpp>
2024-08-22 08:32:21 -05:00
# include <opm/simulators/linalg/gpuistl/detail/autotuner.hpp>
# include <opm/simulators/linalg/gpuistl/GpuDILU.hpp>
# include <opm/simulators/linalg/gpuistl/GpuSparseMatrix.hpp>
# include <opm/simulators/linalg/gpuistl/GpuVector.hpp>
# include <opm/simulators/linalg/gpuistl/detail/coloringAndReorderingUtils.hpp>
2024-08-23 04:15:18 -05:00
# include <opm/simulators/linalg/gpuistl/detail/gpusparse_matrix_operations.hpp>
2024-08-22 08:32:21 -05:00
# include <opm/simulators/linalg/gpuistl/detail/preconditionerKernels/DILUKernels.hpp>
2023-11-22 07:29:43 -06:00
# include <opm/simulators/linalg/matrixblock.hh>
2024-06-18 08:44:19 -05:00
# include <tuple>
2024-08-12 02:54:51 -05:00
# include <functional>
# include <utility>
2024-08-22 03:07:06 -05:00
# include <string>
2024-08-22 06:52:50 -05:00
namespace Opm : : gpuistl
2023-11-22 07:29:43 -06:00
{
template < class M , class X , class Y , int l >
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : GpuDILU ( const M & A , bool splitMatrix , bool tuneKernels )
2023-11-22 07:29:43 -06:00
: m_cpuMatrix ( A )
, m_levelSets ( Opm : : getMatrixRowColoring ( m_cpuMatrix , Opm : : ColoringType : : LOWER ) )
2024-06-19 06:14:59 -05:00
, m_reorderedToNatural ( detail : : createReorderedToNatural ( m_levelSets ) )
, m_naturalToReordered ( detail : : createNaturalToReordered ( m_levelSets ) )
2024-08-22 08:14:33 -05:00
, m_gpuMatrix ( GpuSparseMatrix < field_type > : : fromMatrix ( m_cpuMatrix , true ) )
2023-11-22 07:29:43 -06:00
, m_gpuNaturalToReorder ( m_naturalToReordered )
, m_gpuReorderToNatural ( m_reorderedToNatural )
, m_gpuDInv ( m_gpuMatrix . N ( ) * m_gpuMatrix . blockSize ( ) * m_gpuMatrix . blockSize ( ) )
2024-06-18 08:44:19 -05:00
, m_splitMatrix ( splitMatrix )
, m_tuneThreadBlockSizes ( tuneKernels )
2023-11-22 07:29:43 -06:00
{
// TODO: Should in some way verify that this matrix is symmetric, only do it debug mode?
// Some sanity check
OPM_ERROR_IF ( A . N ( ) ! = m_gpuMatrix . N ( ) ,
fmt : : format ( " CuSparse matrix not same size as DUNE matrix. {} vs {}. " , m_gpuMatrix . N ( ) , A . N ( ) ) ) ;
OPM_ERROR_IF ( A [ 0 ] [ 0 ] . N ( ) ! = m_gpuMatrix . blockSize ( ) ,
fmt : : format ( " CuSparse matrix not same blocksize as DUNE matrix. {} vs {}. " ,
m_gpuMatrix . blockSize ( ) ,
A [ 0 ] [ 0 ] . N ( ) ) ) ;
OPM_ERROR_IF ( A . N ( ) * A [ 0 ] [ 0 ] . N ( ) ! = m_gpuMatrix . dim ( ) ,
fmt : : format ( " CuSparse matrix not same dimension as DUNE matrix. {} vs {}. " ,
m_gpuMatrix . dim ( ) ,
A . N ( ) * A [ 0 ] [ 0 ] . N ( ) ) ) ;
OPM_ERROR_IF ( A . nonzeroes ( ) ! = m_gpuMatrix . nonzeroes ( ) ,
fmt : : format ( " CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. " ,
m_gpuMatrix . nonzeroes ( ) ,
A . nonzeroes ( ) ) ) ;
2024-06-26 08:34:47 -05:00
if ( m_splitMatrix ) {
2024-08-22 08:20:20 -05:00
m_gpuMatrixReorderedDiag = std : : make_unique < GpuVector < field_type > > ( blocksize_ * blocksize_ * m_cpuMatrix . N ( ) ) ;
2024-06-19 06:14:59 -05:00
std : : tie ( m_gpuMatrixReorderedLower , m_gpuMatrixReorderedUpper )
2024-08-22 08:14:33 -05:00
= detail : : extractLowerAndUpperMatrices < M , field_type , GpuSparseMatrix < field_type > > ( m_cpuMatrix ,
2024-06-19 06:14:59 -05:00
m_reorderedToNatural ) ;
2024-08-20 08:06:59 -05:00
}
else {
2024-08-22 08:14:33 -05:00
m_gpuMatrixReordered = detail : : createReorderedMatrix < M , field_type , GpuSparseMatrix < field_type > > (
2024-06-19 06:14:59 -05:00
m_cpuMatrix , m_reorderedToNatural ) ;
2024-05-31 03:36:46 -05:00
}
2024-08-20 08:06:59 -05:00
computeDiagAndMoveReorderedData ( m_moveThreadBlockSize , m_DILUFactorizationThreadBlockSize ) ;
2024-06-18 08:44:19 -05:00
2024-06-19 06:14:59 -05:00
if ( m_tuneThreadBlockSizes ) {
2024-06-18 08:44:19 -05:00
tuneThreadBlockSizes ( ) ;
}
2023-11-22 07:29:43 -06:00
}
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : pre ( [[maybe_unused]] X& x, [[maybe_unused]] Y & b )
2023-11-22 07:29:43 -06:00
{
}
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : apply ( X & v , const Y & d )
2023-11-22 07:29:43 -06:00
{
OPM_TIMEBLOCK ( prec_apply ) ;
2024-05-31 03:36:46 -05:00
{
2024-08-20 08:06:59 -05:00
apply ( v , d , m_lowerSolveThreadBlockSize , m_upperSolveThreadBlockSize ) ;
}
}
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : apply ( X & v , const Y & d , int lowerSolveThreadBlockSize , int upperSolveThreadBlockSize )
2024-08-20 08:06:59 -05:00
{
int levelStartIdx = 0 ;
for ( int level = 0 ; level < m_levelSets . size ( ) ; + + level ) {
const int numOfRowsInLevel = m_levelSets [ level ] . size ( ) ;
if ( m_splitMatrix ) {
detail : : DILU : : solveLowerLevelSetSplit < field_type , blocksize_ > (
m_gpuMatrixReorderedLower - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReorderedLower - > getRowIndices ( ) . data ( ) ,
m_gpuMatrixReorderedLower - > getColumnIndices ( ) . data ( ) ,
m_gpuReorderToNatural . data ( ) ,
levelStartIdx ,
numOfRowsInLevel ,
m_gpuDInv . data ( ) ,
d . data ( ) ,
v . data ( ) ,
lowerSolveThreadBlockSize ) ;
} else {
detail : : DILU : : solveLowerLevelSet < field_type , blocksize_ > (
m_gpuMatrixReordered - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReordered - > getRowIndices ( ) . data ( ) ,
m_gpuMatrixReordered - > getColumnIndices ( ) . data ( ) ,
m_gpuReorderToNatural . data ( ) ,
levelStartIdx ,
numOfRowsInLevel ,
m_gpuDInv . data ( ) ,
d . data ( ) ,
v . data ( ) ,
lowerSolveThreadBlockSize ) ;
2024-05-31 03:36:46 -05:00
}
2024-08-20 08:06:59 -05:00
levelStartIdx + = numOfRowsInLevel ;
}
2023-11-22 07:29:43 -06:00
2024-08-20 08:06:59 -05:00
levelStartIdx = m_cpuMatrix . N ( ) ;
// upper triangular solve: (D + U_A) v = Dy
for ( int level = m_levelSets . size ( ) - 1 ; level > = 0 ; - - level ) {
const int numOfRowsInLevel = m_levelSets [ level ] . size ( ) ;
levelStartIdx - = numOfRowsInLevel ;
if ( m_splitMatrix ) {
detail : : DILU : : solveUpperLevelSetSplit < field_type , blocksize_ > (
m_gpuMatrixReorderedUpper - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReorderedUpper - > getRowIndices ( ) . data ( ) ,
m_gpuMatrixReorderedUpper - > getColumnIndices ( ) . data ( ) ,
m_gpuReorderToNatural . data ( ) ,
levelStartIdx ,
numOfRowsInLevel ,
m_gpuDInv . data ( ) ,
v . data ( ) ,
upperSolveThreadBlockSize ) ;
} else {
detail : : DILU : : solveUpperLevelSet < field_type , blocksize_ > (
m_gpuMatrixReordered - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReordered - > getRowIndices ( ) . data ( ) ,
m_gpuMatrixReordered - > getColumnIndices ( ) . data ( ) ,
m_gpuReorderToNatural . data ( ) ,
levelStartIdx ,
numOfRowsInLevel ,
m_gpuDInv . data ( ) ,
v . data ( ) ,
upperSolveThreadBlockSize ) ;
2024-05-31 03:36:46 -05:00
}
2023-11-22 07:29:43 -06:00
}
}
2024-08-20 08:06:59 -05:00
2023-11-22 07:29:43 -06:00
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : post ( [[maybe_unused]] X & x )
2023-11-22 07:29:43 -06:00
{
}
template < class M , class X , class Y , int l >
Dune : : SolverCategory : : Category
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : category ( ) const
2023-11-22 07:29:43 -06:00
{
return Dune : : SolverCategory : : sequential ;
}
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : update ( )
2023-11-22 07:29:43 -06:00
{
OPM_TIMEBLOCK ( prec_update ) ;
2024-05-31 03:36:46 -05:00
{
2024-08-20 08:06:59 -05:00
update ( m_moveThreadBlockSize , m_DILUFactorizationThreadBlockSize ) ;
2024-05-31 03:36:46 -05:00
}
}
2023-11-22 07:29:43 -06:00
2024-05-31 03:36:46 -05:00
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : update ( int moveThreadBlockSize , int factorizationBlockSize )
2024-05-31 03:36:46 -05:00
{
2024-08-20 08:06:59 -05:00
m_gpuMatrix . updateNonzeroValues ( m_cpuMatrix , true ) ; // send updated matrix to the gpu
computeDiagAndMoveReorderedData ( moveThreadBlockSize , factorizationBlockSize ) ;
}
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : computeDiagAndMoveReorderedData ( int moveThreadBlockSize , int factorizationBlockSize )
2024-08-20 08:06:59 -05:00
{
if ( m_splitMatrix ) {
detail : : copyMatDataToReorderedSplit < field_type , blocksize_ > (
m_gpuMatrix . getNonZeroValues ( ) . data ( ) ,
m_gpuMatrix . getRowIndices ( ) . data ( ) ,
m_gpuMatrix . getColumnIndices ( ) . data ( ) ,
m_gpuMatrixReorderedLower - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReorderedLower - > getRowIndices ( ) . data ( ) ,
m_gpuMatrixReorderedUpper - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReorderedUpper - > getRowIndices ( ) . data ( ) ,
m_gpuMatrixReorderedDiag - > data ( ) ,
m_gpuNaturalToReorder . data ( ) ,
m_gpuMatrixReorderedLower - > N ( ) ,
moveThreadBlockSize ) ;
} else {
detail : : copyMatDataToReordered < field_type , blocksize_ > ( m_gpuMatrix . getNonZeroValues ( ) . data ( ) ,
m_gpuMatrix . getRowIndices ( ) . data ( ) ,
m_gpuMatrixReordered - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReordered - > getRowIndices ( ) . data ( ) ,
m_gpuNaturalToReorder . data ( ) ,
m_gpuMatrixReordered - > N ( ) ,
moveThreadBlockSize ) ;
}
int levelStartIdx = 0 ;
for ( int level = 0 ; level < m_levelSets . size ( ) ; + + level ) {
const int numOfRowsInLevel = m_levelSets [ level ] . size ( ) ;
2024-06-26 08:34:47 -05:00
if ( m_splitMatrix ) {
2024-08-20 08:06:59 -05:00
detail : : DILU : : computeDiluDiagonalSplit < field_type , blocksize_ > (
2024-06-18 04:42:00 -05:00
m_gpuMatrixReorderedLower - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReorderedLower - > getRowIndices ( ) . data ( ) ,
2024-08-20 08:06:59 -05:00
m_gpuMatrixReorderedLower - > getColumnIndices ( ) . data ( ) ,
2024-06-18 04:42:00 -05:00
m_gpuMatrixReorderedUpper - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReorderedUpper - > getRowIndices ( ) . data ( ) ,
2024-08-20 08:06:59 -05:00
m_gpuMatrixReorderedUpper - > getColumnIndices ( ) . data ( ) ,
2024-06-26 08:31:52 -05:00
m_gpuMatrixReorderedDiag - > data ( ) ,
2024-08-20 08:06:59 -05:00
m_gpuReorderToNatural . data ( ) ,
2024-06-18 04:42:00 -05:00
m_gpuNaturalToReorder . data ( ) ,
2024-08-20 08:06:59 -05:00
levelStartIdx ,
numOfRowsInLevel ,
m_gpuDInv . data ( ) ,
factorizationBlockSize ) ;
2024-06-18 04:42:00 -05:00
} else {
2024-08-20 08:06:59 -05:00
detail : : DILU : : computeDiluDiagonal < field_type , blocksize_ > (
m_gpuMatrixReordered - > getNonZeroValues ( ) . data ( ) ,
m_gpuMatrixReordered - > getRowIndices ( ) . data ( ) ,
m_gpuMatrixReordered - > getColumnIndices ( ) . data ( ) ,
m_gpuReorderToNatural . data ( ) ,
m_gpuNaturalToReorder . data ( ) ,
levelStartIdx ,
numOfRowsInLevel ,
m_gpuDInv . data ( ) ,
factorizationBlockSize ) ;
2024-05-31 03:36:46 -05:00
}
2024-08-20 08:06:59 -05:00
levelStartIdx + = numOfRowsInLevel ;
2023-11-22 07:29:43 -06:00
}
}
2024-06-18 08:44:19 -05:00
template < class M , class X , class Y , int l >
void
2024-08-22 07:28:33 -05:00
GpuDILU < M , X , Y , l > : : tuneThreadBlockSizes ( )
2024-06-18 08:44:19 -05:00
{
2024-08-12 02:54:51 -05:00
// tune the thread-block size of the update function
2024-08-20 08:06:59 -05:00
auto tuneMoveThreadBlockSizeInUpdate = [ this ] ( int moveThreadBlockSize ) {
this - > update ( moveThreadBlockSize , m_DILUFactorizationThreadBlockSize ) ;
} ;
2024-08-22 03:07:06 -05:00
m_moveThreadBlockSize = detail : : tuneThreadBlockSize ( tuneMoveThreadBlockSizeInUpdate , " Kernel moving data to reordered matrix " ) ;
2024-08-20 08:06:59 -05:00
auto tuneFactorizationThreadBlockSizeInUpdate = [ this ] ( int factorizationThreadBlockSize ) {
this - > update ( m_moveThreadBlockSize , factorizationThreadBlockSize ) ;
} ;
2024-08-22 03:07:06 -05:00
m_DILUFactorizationThreadBlockSize = detail : : tuneThreadBlockSize ( tuneFactorizationThreadBlockSizeInUpdate , " Kernel computing DILU factorization " ) ;
2024-08-12 02:54:51 -05:00
// tune the thread-block size of the apply
2024-08-22 08:20:20 -05:00
GpuVector < field_type > tmpV ( m_gpuMatrix . N ( ) * m_gpuMatrix . blockSize ( ) ) ;
GpuVector < field_type > tmpD ( m_gpuMatrix . N ( ) * m_gpuMatrix . blockSize ( ) ) ;
2024-06-18 08:44:19 -05:00
tmpD = 1 ;
2024-08-20 08:06:59 -05:00
auto tuneLowerSolveThreadBlockSizeInApply = [ this , & tmpV , & tmpD ] ( int lowerSolveThreadBlockSize ) {
this - > apply ( tmpV , tmpD , lowerSolveThreadBlockSize , m_DILUFactorizationThreadBlockSize ) ;
} ;
2024-08-22 03:07:06 -05:00
m_lowerSolveThreadBlockSize = detail : : tuneThreadBlockSize ( tuneLowerSolveThreadBlockSizeInApply , " Kernel computing a lower triangular solve for a level set " ) ;
2024-08-20 08:06:59 -05:00
auto tuneUpperSolveThreadBlockSizeInApply = [ this , & tmpV , & tmpD ] ( int upperSolveThreadBlockSize ) {
this - > apply ( tmpV , tmpD , m_moveThreadBlockSize , upperSolveThreadBlockSize ) ;
} ;
2024-08-22 03:07:06 -05:00
m_upperSolveThreadBlockSize = detail : : tuneThreadBlockSize ( tuneUpperSolveThreadBlockSizeInApply , " Kernel computing an upper triangular solve for a level set " ) ;
2024-06-18 08:44:19 -05:00
}
2024-08-22 06:52:50 -05:00
} // namespace Opm::gpuistl
2023-11-22 07:29:43 -06:00
# define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \
2024-08-22 07:28:33 -05:00
template class : : Opm : : gpuistl : : GpuDILU < Dune : : BCRSMatrix < Dune : : FieldMatrix < realtype , blockdim , blockdim > > , \
2024-08-22 08:20:20 -05:00
: : Opm : : gpuistl : : GpuVector < realtype > , \
: : Opm : : gpuistl : : GpuVector < realtype > > ; \
2024-08-22 07:28:33 -05:00
template class : : Opm : : gpuistl : : GpuDILU < Dune : : BCRSMatrix < Opm : : MatrixBlock < realtype , blockdim , blockdim > > , \
2024-08-22 08:20:20 -05:00
: : Opm : : gpuistl : : GpuVector < realtype > , \
: : Opm : : gpuistl : : GpuVector < realtype > >
2023-11-22 07:29:43 -06:00
INSTANTIATE_CUDILU_DUNE ( double , 1 ) ;
INSTANTIATE_CUDILU_DUNE ( double , 2 ) ;
INSTANTIATE_CUDILU_DUNE ( double , 3 ) ;
INSTANTIATE_CUDILU_DUNE ( double , 4 ) ;
INSTANTIATE_CUDILU_DUNE ( double , 5 ) ;
INSTANTIATE_CUDILU_DUNE ( double , 6 ) ;
INSTANTIATE_CUDILU_DUNE ( float , 1 ) ;
INSTANTIATE_CUDILU_DUNE ( float , 2 ) ;
INSTANTIATE_CUDILU_DUNE ( float , 3 ) ;
INSTANTIATE_CUDILU_DUNE ( float , 4 ) ;
INSTANTIATE_CUDILU_DUNE ( float , 5 ) ;
INSTANTIATE_CUDILU_DUNE ( float , 6 ) ;