make ILU methods more similar

This commit is contained in:
Tobias Meyer Andersen 2024-07-02 11:32:29 +02:00
parent 033723cf14
commit a41dfc5a6e
6 changed files with 56 additions and 56 deletions

View File

@ -103,7 +103,7 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
for (int level = 0; level < m_levelSets.size(); ++level) {
const int numOfRowsInLevel = m_levelSets[level].size();
if (m_splitMatrix) {
detail::DILU::computeLowerSolveLevelSetSplit<field_type, blocksize_>(
detail::DILU::solveLowerLevelSetSplit<field_type, blocksize_>(
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
m_gpuMatrixReorderedLower->getRowIndices().data(),
m_gpuMatrixReorderedLower->getColumnIndices().data(),
@ -115,7 +115,7 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
v.data(),
m_applyThreadBlockSize);
} else {
detail::DILU::computeLowerSolveLevelSet<field_type, blocksize_>(
detail::DILU::solveLowerLevelSet<field_type, blocksize_>(
m_gpuMatrixReordered->getNonZeroValues().data(),
m_gpuMatrixReordered->getRowIndices().data(),
m_gpuMatrixReordered->getColumnIndices().data(),
@ -136,7 +136,7 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
const int numOfRowsInLevel = m_levelSets[level].size();
levelStartIdx -= numOfRowsInLevel;
if (m_splitMatrix) {
detail::DILU::computeUpperSolveLevelSetSplit<field_type, blocksize_>(
detail::DILU::solveUpperLevelSetSplit<field_type, blocksize_>(
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
m_gpuMatrixReorderedUpper->getRowIndices().data(),
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
@ -147,7 +147,7 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
v.data(),
m_applyThreadBlockSize);
} else {
detail::DILU::computeUpperSolveLevelSet<field_type, blocksize_>(
detail::DILU::solveUpperLevelSet<field_type, blocksize_>(
m_gpuMatrixReordered->getNonZeroValues().data(),
m_gpuMatrixReordered->getRowIndices().data(),
m_gpuMatrixReordered->getColumnIndices().data(),

View File

@ -104,7 +104,7 @@ CuILU0_OPM_Impl<M, X, Y, l>::apply(X& v, const Y& d)
for (int level = 0; level < m_levelSets.size(); ++level) {
const int numOfRowsInLevel = m_levelSets[level].size();
if (m_splitMatrix) {
detail::ILU0::ILULowerSolveLevelSetSplit<field_type, blocksize_>(
detail::ILU0::solveLowerLevelSetSplit<field_type, blocksize_>(
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
m_gpuMatrixReorderedLower->getRowIndices().data(),
m_gpuMatrixReorderedLower->getColumnIndices().data(),
@ -116,7 +116,7 @@ CuILU0_OPM_Impl<M, X, Y, l>::apply(X& v, const Y& d)
v.data(),
m_applyThreadBlockSize);
} else {
detail::ILU0::ILULowerSolveLevelSet<field_type, blocksize_>(
detail::ILU0::solveLowerLevelSet<field_type, blocksize_>(
m_gpuReorderedLU->getNonZeroValues().data(),
m_gpuReorderedLU->getRowIndices().data(),
m_gpuReorderedLU->getColumnIndices().data(),
@ -136,7 +136,7 @@ CuILU0_OPM_Impl<M, X, Y, l>::apply(X& v, const Y& d)
const int numOfRowsInLevel = m_levelSets[level].size();
levelStartIdx -= numOfRowsInLevel;
if (m_splitMatrix) {
detail::ILU0::ILUUpperSolveLevelSetSplit<field_type, blocksize_>(
detail::ILU0::solveUpperLevelSetSplit<field_type, blocksize_>(
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
m_gpuMatrixReorderedUpper->getRowIndices().data(),
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
@ -147,7 +147,7 @@ CuILU0_OPM_Impl<M, X, Y, l>::apply(X& v, const Y& d)
v.data(),
m_applyThreadBlockSize);
} else {
detail::ILU0::ILUUpperSolveLevelSet<field_type, blocksize_>(
detail::ILU0::solveUpperLevelSet<field_type, blocksize_>(
m_gpuReorderedLU->getNonZeroValues().data(),
m_gpuReorderedLU->getRowIndices().data(),
m_gpuReorderedLU->getColumnIndices().data(),

View File

@ -29,7 +29,7 @@ namespace
{
template <class T, int blocksize>
__global__ void cuComputeLowerSolveLevelSet(T* mat,
__global__ void cuSolveLowerLevelSet(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -60,7 +60,7 @@ namespace
}
template <class T, int blocksize>
__global__ void cuComputeLowerSolveLevelSetSplit(T* mat,
__global__ void cuSolveLowerLevelSetSplit(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -94,7 +94,7 @@ namespace
template <class T, int blocksize>
__global__ void cuComputeUpperSolveLevelSet(T* mat,
__global__ void cuSolveUpperLevelSet(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -119,7 +119,7 @@ namespace
}
template <class T, int blocksize>
__global__ void cuComputeUpperSolveLevelSetSplit(T* mat,
__global__ void cuSolveUpperLevelSetSplit(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -270,7 +270,7 @@ namespace
// perform the lower solve for all rows in the same level set
template <class T, int blocksize>
void
computeLowerSolveLevelSet(T* reorderedMat,
solveLowerLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -281,16 +281,16 @@ computeLowerSolveLevelSet(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSet<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeLowerSolveLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
}
template <class T, int blocksize>
void
computeLowerSolveLevelSetSplit(T* reorderedMat,
solveLowerLevelSetSplit(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -301,15 +301,15 @@ computeLowerSolveLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeLowerSolveLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
}
// perform the upper solve for all rows in the same level set
template <class T, int blocksize>
void
computeUpperSolveLevelSet(T* reorderedMat,
solveUpperLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -319,15 +319,15 @@ computeUpperSolveLevelSet(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuComputeUpperSolveLevelSet<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeUpperSolveLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
template <class T, int blocksize>
void
computeUpperSolveLevelSetSplit(T* reorderedMat,
solveUpperLevelSetSplit(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -337,9 +337,9 @@ computeUpperSolveLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuComputeUpperSolveLevelSetSplit<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuComputeUpperSolveLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@ -412,10 +412,10 @@ computeDiluDiagonalSplit(T* reorderedLowerMat,
template void computeDiluDiagonal<T, blocksize>(T*, int*, int*, int*, int*, const int, int, T*, int); \
template void computeDiluDiagonalSplit<T, blocksize>( \
T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, T*, int); \
template void computeUpperSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void computeLowerSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*, int); \
template void computeUpperSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void computeLowerSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*, int);
template void solveUpperLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void solveLowerLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*, int); \
template void solveUpperLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void solveLowerLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*, int);
INSTANTIATE_KERNEL_WRAPPERS(float, 1);
INSTANTIATE_KERNEL_WRAPPERS(float, 2);

View File

@ -44,7 +44,7 @@ namespace Opm::cuistl::detail::DILU
* @param [out] v Will store the results of the lower solve
*/
template <class T, int blocksize>
void computeLowerSolveLevelSet(T* reorderedMat,
void solveLowerLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -72,7 +72,7 @@ void computeLowerSolveLevelSet(T* reorderedMat,
* @param [out] v Will store the results of the lower solve
*/
template <class T, int blocksize>
void computeLowerSolveLevelSetSplit(T* reorderedUpperMat,
void solveLowerLevelSetSplit(T* reorderedUpperMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -99,7 +99,7 @@ void computeLowerSolveLevelSetSplit(T* reorderedUpperMat,
* solve
*/
template <class T, int blocksize>
void computeUpperSolveLevelSet(T* reorderedMat,
void solveUpperLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -125,7 +125,7 @@ void computeUpperSolveLevelSet(T* reorderedMat,
* solve
*/
template <class T, int blocksize>
void computeUpperSolveLevelSetSplit(T* reorderedUpperMat,
void solveUpperLevelSetSplit(T* reorderedUpperMat,
int* rowIndices,
int* colIndices,
int* indexConversion,

View File

@ -201,7 +201,7 @@ namespace
}
template <class T, int blocksize>
__global__ void cuILULowerSolveLevelSet(T* mat,
__global__ void cuSolveLowerLevelSet(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -233,7 +233,7 @@ namespace
}
template <class T, int blocksize>
__global__ void cuILUUpperSolveLevelSet(T* mat,
__global__ void cuSolveUpperLevelSet(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -263,7 +263,7 @@ namespace
}
template <class T, int blocksize>
__global__ void cuILULowerSolveLevelSetSplit(T* mat,
__global__ void cuSolveLowerLevelSetSplit(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -295,7 +295,7 @@ namespace
}
template <class T, int blocksize>
__global__ void cuILUUpperSolveLevelSetSplit(T* mat,
__global__ void cuSolveUpperLevelSetSplit(T* mat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -327,7 +327,7 @@ namespace
template <class T, int blocksize>
void
ILULowerSolveLevelSet(T* reorderedMat,
solveLowerLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -337,15 +337,15 @@ ILULowerSolveLevelSet(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuILULowerSolveLevelSet<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuILULowerSolveLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveLowerLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, d, v);
}
// perform the upper solve for all rows in the same level set
template <class T, int blocksize>
void
ILUUpperSolveLevelSet(T* reorderedMat,
solveUpperLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -354,15 +354,15 @@ ILUUpperSolveLevelSet(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuILUUpperSolveLevelSet<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSet<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuILUUpperSolveLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveUpperLevelSet<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, v);
}
template <class T, int blocksize>
void
ILULowerSolveLevelSetSplit(T* reorderedMat,
solveLowerLevelSetSplit(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -373,15 +373,15 @@ ILULowerSolveLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuILULowerSolveLevelSetSplit<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveLowerLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuILULowerSolveLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveLowerLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, d, v);
}
// perform the upper solve for all rows in the same level set
template <class T, int blocksize>
void
ILUUpperSolveLevelSetSplit(T* reorderedMat,
solveUpperLevelSetSplit(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -391,9 +391,9 @@ ILUUpperSolveLevelSetSplit(T* reorderedMat,
T* v,
int thrBlockSize)
{
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuILUUpperSolveLevelSetSplit<T, blocksize>, thrBlockSize);
int threadBlockSize = ::Opm::cuistl::detail::getCudaRecomendedThreadBlockSize(cuSolveUpperLevelSetSplit<T, blocksize>, thrBlockSize);
int nThreadBlocks = ::Opm::cuistl::detail::getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
cuILUUpperSolveLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
cuSolveUpperLevelSetSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
reorderedMat, rowIndices, colIndices, indexConversion, startIdx, rowsInLevelSet, dInv, v);
}
@ -433,12 +433,12 @@ void LUFactorizationSplit(T* reorderedLowerMat,
}
#define INSTANTIATE_KERNEL_WRAPPERS(T, blocksize) \
template void ILUUpperSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, T*, int); \
template void ILULowerSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void solveUpperLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, T*, int); \
template void solveLowerLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void LUFactorization<T, blocksize>(T*, int*, int*, int*, int*, size_t, int, int); \
template void LUFactorizationSplit<T, blocksize>(T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, int); \
template void ILUUpperSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void ILULowerSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*, int);
template void solveUpperLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*, int); \
template void solveLowerLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*, int);
INSTANTIATE_KERNEL_WRAPPERS(float, 1);
INSTANTIATE_KERNEL_WRAPPERS(float, 2);

View File

@ -38,7 +38,7 @@ namespace Opm::cuistl::detail::ILU0
* @param threadBlockSize The number of threads per threadblock. Leave as -1 if no blocksize is already chosen
*/
template <class T, int blocksize>
void ILUUpperSolveLevelSet(T* reorderedMat,
void solveUpperLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -63,7 +63,7 @@ void ILUUpperSolveLevelSet(T* reorderedMat,
* @param threadBlockSize The number of threads per threadblock. Leave as -1 if no blocksize is already chosen
*/
template <class T, int blocksize>
void ILULowerSolveLevelSet(T* reorderedMat,
void solveLowerLevelSet(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -90,7 +90,7 @@ void ILULowerSolveLevelSet(T* reorderedMat,
* @param threadBlockSize The number of threads per threadblock. Leave as -1 if no blocksize is already chosen
*/
template <class T, int blocksize>
void ILUUpperSolveLevelSetSplit(T* reorderedMat,
void solveUpperLevelSetSplit(T* reorderedMat,
int* rowIndices,
int* colIndices,
int* indexConversion,
@ -118,7 +118,7 @@ void ILUUpperSolveLevelSetSplit(T* reorderedMat,
* @param threadBlockSize The number of threads per threadblock. Leave as -1 if no blocksize is already chosen
*/
template <class T, int blocksize>
void ILULowerSolveLevelSetSplit(T* reorderedLowerMat,
void solveLowerLevelSetSplit(T* reorderedLowerMat,
int* rowIndices,
int* colIndices,
int* indexConversion,