mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
clang format
This commit is contained in:
parent
2b9c81fe09
commit
82ff782d5f
@ -25,9 +25,9 @@
|
||||
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
#include <vector>
|
||||
|
||||
@ -65,7 +65,9 @@ createNaturalToReordered(Opm::SparseTable<size_t> levelSets)
|
||||
|
||||
template <class M, class field_type, class GPUM>
|
||||
void
|
||||
createReorderedMatrix(const M& naturalMatrix, std::vector<int> reorderedToNatural, std::unique_ptr<GPUM>& reorderedGpuMat)
|
||||
createReorderedMatrix(const M& naturalMatrix,
|
||||
std::vector<int> reorderedToNatural,
|
||||
std::unique_ptr<GPUM>& reorderedGpuMat)
|
||||
{
|
||||
M reorderedMatrix(naturalMatrix.N(), naturalMatrix.N(), naturalMatrix.nonzeroes(), M::row_wise);
|
||||
for (auto dstRowIt = reorderedMatrix.createbegin(); dstRowIt != reorderedMatrix.createend(); ++dstRowIt) {
|
||||
@ -81,22 +83,26 @@ createReorderedMatrix(const M& naturalMatrix, std::vector<int> reorderedToNatura
|
||||
|
||||
template <class M, class field_type, class GPUM>
|
||||
void
|
||||
extractLowerAndUpperMatrices(const M& naturalMatrix, std::vector<int> reorderedToNatural, std::unique_ptr<GPUM>& lower, std::unique_ptr<GPUM>& upper)
|
||||
extractLowerAndUpperMatrices(const M& naturalMatrix,
|
||||
std::vector<int> reorderedToNatural,
|
||||
std::unique_ptr<GPUM>& lower,
|
||||
std::unique_ptr<GPUM>& upper)
|
||||
{
|
||||
const size_t new_nnz = (naturalMatrix.nonzeroes() - naturalMatrix.N())/2;
|
||||
const size_t new_nnz = (naturalMatrix.nonzeroes() - naturalMatrix.N()) / 2;
|
||||
|
||||
M reorderedLower(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise);
|
||||
M reorderedUpper(naturalMatrix.N(), naturalMatrix.N(), new_nnz, M::row_wise);
|
||||
|
||||
for (auto lowerIt = reorderedLower.createbegin(), upperIt = reorderedUpper.createbegin(); lowerIt != reorderedLower.createend(); ++lowerIt, ++upperIt) {
|
||||
for (auto lowerIt = reorderedLower.createbegin(), upperIt = reorderedUpper.createbegin();
|
||||
lowerIt != reorderedLower.createend();
|
||||
++lowerIt, ++upperIt) {
|
||||
|
||||
auto srcRow = naturalMatrix.begin() + reorderedToNatural[lowerIt.index()];
|
||||
|
||||
for (auto elem = srcRow->begin(); elem != srcRow->end(); ++elem) {
|
||||
if (elem.index() < srcRow.index()){ // add index to lower matrix if under the diagonal
|
||||
if (elem.index() < srcRow.index()) { // add index to lower matrix if under the diagonal
|
||||
lowerIt.insert(elem.index());
|
||||
}
|
||||
else if (elem.index() > srcRow.index()){ // add element to upper matrix if above the diagonal
|
||||
} else if (elem.index() > srcRow.index()) { // add element to upper matrix if above the diagonal
|
||||
upperIt.insert(elem.index());
|
||||
}
|
||||
}
|
||||
@ -144,12 +150,13 @@ CuDILU<M, X, Y, l>::CuDILU(const M& A, bool split_matrix)
|
||||
fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ",
|
||||
m_gpuMatrix.nonzeroes(),
|
||||
A.nonzeroes()));
|
||||
if (m_split_matrix){
|
||||
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_*blocksize_*m_cpuMatrix.N()));
|
||||
extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper);
|
||||
}
|
||||
else{
|
||||
createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReordered);
|
||||
if (m_split_matrix) {
|
||||
m_gpuMatrixReorderedDiag.emplace(CuVector<field_type>(blocksize_ * blocksize_ * m_cpuMatrix.N()));
|
||||
extractLowerAndUpperMatrices<M, field_type, CuSparseMatrix<field_type>>(
|
||||
m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReorderedLower, m_gpuMatrixReorderedUpper);
|
||||
} else {
|
||||
createReorderedMatrix<M, field_type, CuSparseMatrix<field_type>>(
|
||||
m_cpuMatrix, m_reorderedToNatural, m_gpuMatrixReordered);
|
||||
}
|
||||
computeDiagAndMoveReorderedData();
|
||||
}
|
||||
@ -170,27 +177,28 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
int levelStartIdx = 0;
|
||||
for (int level = 0; level < m_levelSets.size(); ++level) {
|
||||
const int numOfRowsInLevel = m_levelSets[level].size();
|
||||
if (m_split_matrix){
|
||||
detail::computeLowerSolveLevelSetSplit<field_type, blocksize_>(m_gpuMatrixReorderedLower->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedLower->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedLower->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
d.data(),
|
||||
v.data());
|
||||
}
|
||||
else{
|
||||
detail::computeLowerSolveLevelSet<field_type, blocksize_>(m_gpuMatrixReordered->getNonZeroValues().data(),
|
||||
m_gpuMatrixReordered->getRowIndices().data(),
|
||||
m_gpuMatrixReordered->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
d.data(),
|
||||
v.data());
|
||||
if (m_split_matrix) {
|
||||
detail::computeLowerSolveLevelSetSplit<field_type, blocksize_>(
|
||||
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedLower->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedLower->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
d.data(),
|
||||
v.data());
|
||||
} else {
|
||||
detail::computeLowerSolveLevelSet<field_type, blocksize_>(
|
||||
m_gpuMatrixReordered->getNonZeroValues().data(),
|
||||
m_gpuMatrixReordered->getRowIndices().data(),
|
||||
m_gpuMatrixReordered->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
d.data(),
|
||||
v.data());
|
||||
}
|
||||
levelStartIdx += numOfRowsInLevel;
|
||||
}
|
||||
@ -200,25 +208,26 @@ CuDILU<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
for (int level = m_levelSets.size() - 1; level >= 0; --level) {
|
||||
const int numOfRowsInLevel = m_levelSets[level].size();
|
||||
levelStartIdx -= numOfRowsInLevel;
|
||||
if (m_split_matrix){
|
||||
detail::computeUpperSolveLevelSetSplit<field_type, blocksize_>(m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedUpper->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
v.data());
|
||||
}
|
||||
else{
|
||||
detail::computeUpperSolveLevelSet<field_type, blocksize_>(m_gpuMatrixReordered->getNonZeroValues().data(),
|
||||
m_gpuMatrixReordered->getRowIndices().data(),
|
||||
m_gpuMatrixReordered->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
v.data());
|
||||
if (m_split_matrix) {
|
||||
detail::computeUpperSolveLevelSetSplit<field_type, blocksize_>(
|
||||
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedUpper->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
v.data());
|
||||
} else {
|
||||
detail::computeUpperSolveLevelSet<field_type, blocksize_>(
|
||||
m_gpuMatrixReordered->getNonZeroValues().data(),
|
||||
m_gpuMatrixReordered->getRowIndices().data(),
|
||||
m_gpuMatrixReordered->getColumnIndices().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data(),
|
||||
v.data());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -254,45 +263,45 @@ CuDILU<M, X, Y, l>::computeDiagAndMoveReorderedData()
|
||||
{
|
||||
OPM_TIMEBLOCK(prec_update);
|
||||
{
|
||||
if (m_split_matrix){
|
||||
detail::copyMatDataToReorderedSplit<field_type, blocksize_>(m_gpuMatrix.getNonZeroValues().data(),
|
||||
m_gpuMatrix.getRowIndices().data(),
|
||||
m_gpuMatrix.getColumnIndices().data(),
|
||||
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedLower->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedUpper->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedDiag.value().data(),
|
||||
m_gpuNaturalToReorder.data(),
|
||||
m_gpuMatrixReorderedLower->N());
|
||||
}
|
||||
else{
|
||||
if (m_split_matrix) {
|
||||
detail::copyMatDataToReorderedSplit<field_type, blocksize_>(
|
||||
m_gpuMatrix.getNonZeroValues().data(),
|
||||
m_gpuMatrix.getRowIndices().data(),
|
||||
m_gpuMatrix.getColumnIndices().data(),
|
||||
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedLower->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedUpper->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedDiag.value().data(),
|
||||
m_gpuNaturalToReorder.data(),
|
||||
m_gpuMatrixReorderedLower->N());
|
||||
} else {
|
||||
detail::copyMatDataToReordered<field_type, blocksize_>(m_gpuMatrix.getNonZeroValues().data(),
|
||||
m_gpuMatrix.getRowIndices().data(),
|
||||
m_gpuMatrixReordered->getNonZeroValues().data(),
|
||||
m_gpuMatrixReordered->getRowIndices().data(),
|
||||
m_gpuNaturalToReorder.data(),
|
||||
m_gpuMatrixReordered->N());
|
||||
m_gpuMatrix.getRowIndices().data(),
|
||||
m_gpuMatrixReordered->getNonZeroValues().data(),
|
||||
m_gpuMatrixReordered->getRowIndices().data(),
|
||||
m_gpuNaturalToReorder.data(),
|
||||
m_gpuMatrixReordered->N());
|
||||
}
|
||||
|
||||
int levelStartIdx = 0;
|
||||
for (int level = 0; level < m_levelSets.size(); ++level) {
|
||||
const int numOfRowsInLevel = m_levelSets[level].size();
|
||||
if (m_split_matrix){
|
||||
detail::computeDiluDiagonalSplit<field_type, blocksize_>(m_gpuMatrixReorderedLower->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedLower->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedLower->getColumnIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedUpper->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
|
||||
m_gpuMatrixReorderedDiag.value().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
m_gpuNaturalToReorder.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data());
|
||||
}
|
||||
else{
|
||||
if (m_split_matrix) {
|
||||
detail::computeDiluDiagonalSplit<field_type, blocksize_>(
|
||||
m_gpuMatrixReorderedLower->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedLower->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedLower->getColumnIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getNonZeroValues().data(),
|
||||
m_gpuMatrixReorderedUpper->getRowIndices().data(),
|
||||
m_gpuMatrixReorderedUpper->getColumnIndices().data(),
|
||||
m_gpuMatrixReorderedDiag.value().data(),
|
||||
m_gpuReorderToNatural.data(),
|
||||
m_gpuNaturalToReorder.data(),
|
||||
levelStartIdx,
|
||||
numOfRowsInLevel,
|
||||
m_gpuDInv.data());
|
||||
} else {
|
||||
detail::computeDiluDiagonal<field_type, blocksize_>(m_gpuMatrixReordered->getNonZeroValues().data(),
|
||||
m_gpuMatrixReordered->getRowIndices().data(),
|
||||
m_gpuMatrixReordered->getColumnIndices().data(),
|
||||
|
@ -229,14 +229,14 @@ namespace
|
||||
|
||||
template <class T, int blocksize>
|
||||
__global__ void cuComputeLowerSolveLevelSetSplit(T* mat,
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
const T* d,
|
||||
T* v)
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
const T* d,
|
||||
T* v)
|
||||
{
|
||||
const auto reorderedRowIdx = startIdx + (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (reorderedRowIdx < rowsInLevelSet + startIdx) {
|
||||
@ -250,7 +250,7 @@ namespace
|
||||
rhs[i] = d[naturalRowIdx * blocksize + i];
|
||||
}
|
||||
|
||||
//TODO: removce the first condition in the for loop
|
||||
// TODO: removce the first condition in the for loop
|
||||
for (int block = nnzIdx; block < nnzIdxLim; ++block) {
|
||||
const int col = colIndices[block];
|
||||
mmv<T, blocksize>(&mat[block * blocksize * blocksize], &v[col * blocksize], rhs);
|
||||
@ -288,13 +288,13 @@ namespace
|
||||
|
||||
template <class T, int blocksize>
|
||||
__global__ void cuComputeUpperSolveLevelSetSplit(T* mat,
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
T* v)
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
T* v)
|
||||
{
|
||||
const auto reorderedRowIdx = startIdx + (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if (reorderedRowIdx < rowsInLevelSet + startIdx) {
|
||||
@ -381,23 +381,23 @@ namespace
|
||||
|
||||
template <class T, int blocksize>
|
||||
__global__ void cuComputeDiluDiagonalSplit(T* reorderedLowerMat,
|
||||
int* lowerRowIndices,
|
||||
int* lowerColIndices,
|
||||
T* reorderedUpperMat,
|
||||
int* upperRowIndices,
|
||||
int* upperColIndices,
|
||||
T* diagonal,
|
||||
int* reorderedToNatural,
|
||||
int* naturalToReordered,
|
||||
const int startIdx,
|
||||
int rowsInLevelSet,
|
||||
T* dInv)
|
||||
int* lowerRowIndices,
|
||||
int* lowerColIndices,
|
||||
T* reorderedUpperMat,
|
||||
int* upperRowIndices,
|
||||
int* upperColIndices,
|
||||
T* diagonal,
|
||||
int* reorderedToNatural,
|
||||
int* naturalToReordered,
|
||||
const int startIdx,
|
||||
int rowsInLevelSet,
|
||||
T* dInv)
|
||||
{
|
||||
const auto reorderedRowIdx = startIdx + blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (reorderedRowIdx < rowsInLevelSet + startIdx) {
|
||||
const int naturalRowIdx = reorderedToNatural[reorderedRowIdx];
|
||||
const size_t lowerRowStart = lowerRowIndices[reorderedRowIdx];
|
||||
const size_t lowerRowEnd = lowerRowIndices[reorderedRowIdx+1];
|
||||
const size_t lowerRowEnd = lowerRowIndices[reorderedRowIdx + 1];
|
||||
|
||||
T dInvTmp[blocksize * blocksize];
|
||||
for (int i = 0; i < blocksize; ++i) {
|
||||
@ -410,8 +410,8 @@ namespace
|
||||
const int col = naturalToReordered[lowerColIndices[block]];
|
||||
|
||||
int symOppositeIdx = upperRowIndices[col];
|
||||
for (; symOppositeIdx < upperRowIndices[col + 1]; ++symOppositeIdx){
|
||||
if (naturalRowIdx == upperColIndices[symOppositeIdx]){
|
||||
for (; symOppositeIdx < upperRowIndices[col + 1]; ++symOppositeIdx) {
|
||||
if (naturalRowIdx == upperColIndices[symOppositeIdx]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -457,15 +457,23 @@ namespace
|
||||
}
|
||||
|
||||
template <class T, int blocksize>
|
||||
__global__ void cuMoveDataToReorderedSplit(
|
||||
T* srcMatrix, int* srcRowIndices, int* srcColumnIndices, T* dstLowerMatrix, int* dstLowerRowIndices, T* dstUpperMatrix, int* dstUpperRowIndices, T* dstDiag, int* naturalToReordered, size_t numberOfRows)
|
||||
__global__ void cuMoveDataToReorderedSplit(T* srcMatrix,
|
||||
int* srcRowIndices,
|
||||
int* srcColumnIndices,
|
||||
T* dstLowerMatrix,
|
||||
int* dstLowerRowIndices,
|
||||
T* dstUpperMatrix,
|
||||
int* dstUpperRowIndices,
|
||||
T* dstDiag,
|
||||
int* naturalToReordered,
|
||||
size_t numberOfRows)
|
||||
{
|
||||
const auto srcRow = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (srcRow < numberOfRows) {
|
||||
|
||||
const auto dstRow = naturalToReordered[srcRow];
|
||||
const auto rowStart = srcRowIndices[srcRow];
|
||||
const auto rowEnd = srcRowIndices[srcRow+1];
|
||||
const auto rowEnd = srcRowIndices[srcRow + 1];
|
||||
|
||||
auto lowerBlock = dstLowerRowIndices[dstRow];
|
||||
auto upperBlock = dstUpperRowIndices[dstRow];
|
||||
@ -474,17 +482,16 @@ namespace
|
||||
int dstBlock;
|
||||
T* dstBuffer;
|
||||
|
||||
if (srcColumnIndices[srcBlock] < srcRow){ // we are writing a value to the lower triangular matrix
|
||||
if (srcColumnIndices[srcBlock] < srcRow) { // we are writing a value to the lower triangular matrix
|
||||
dstBlock = lowerBlock;
|
||||
++lowerBlock;
|
||||
dstBuffer = dstLowerMatrix;
|
||||
}
|
||||
else if (srcColumnIndices[srcBlock] > srcRow){ // we are writing a value to the upper triangular matrix
|
||||
} else if (srcColumnIndices[srcBlock]
|
||||
> srcRow) { // we are writing a value to the upper triangular matrix
|
||||
dstBlock = upperBlock;
|
||||
++upperBlock;
|
||||
dstBuffer = dstUpperMatrix;
|
||||
}
|
||||
else{ // we are writing a value to the diagonal
|
||||
} else { // we are writing a value to the diagonal
|
||||
dstBlock = dstRow;
|
||||
dstBuffer = dstDiag;
|
||||
}
|
||||
@ -511,14 +518,16 @@ namespace
|
||||
|
||||
// Kernel here is the function object of the cuda kernel
|
||||
template <class Kernel>
|
||||
inline int getCudaRecomendedThreadBlockSize(Kernel k){
|
||||
inline int getCudaRecomendedThreadBlockSize(Kernel k)
|
||||
{
|
||||
int blockSize;
|
||||
int tmpGridSize;
|
||||
cudaOccupancyMaxPotentialBlockSize( &tmpGridSize, &blockSize, k, 0, 0);
|
||||
cudaOccupancyMaxPotentialBlockSize(&tmpGridSize, &blockSize, k, 0, 0);
|
||||
return blockSize;
|
||||
}
|
||||
|
||||
inline int getNumberOfBlocks(int wantedThreads, int threadBlockSize){
|
||||
inline int getNumberOfBlocks(int wantedThreads, int threadBlockSize)
|
||||
{
|
||||
return (wantedThreads + threadBlockSize - 1) / threadBlockSize;
|
||||
}
|
||||
|
||||
@ -557,14 +566,14 @@ computeLowerSolveLevelSet(T* reorderedMat,
|
||||
template <class T, int blocksize>
|
||||
void
|
||||
computeLowerSolveLevelSetSplit(T* reorderedMat,
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
const T* d,
|
||||
T* v)
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
const T* d,
|
||||
T* v)
|
||||
{
|
||||
int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit<T, blocksize>);
|
||||
int nThreadBlocks = getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
@ -590,13 +599,13 @@ computeUpperSolveLevelSet(T* reorderedMat,
|
||||
template <class T, int blocksize>
|
||||
void
|
||||
computeUpperSolveLevelSetSplit(T* reorderedMat,
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
T* v)
|
||||
int* rowIndices,
|
||||
int* colIndices,
|
||||
int* indexConversion,
|
||||
int startIdx,
|
||||
int rowsInLevelSet,
|
||||
const T* dInv,
|
||||
T* v)
|
||||
{
|
||||
int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit<T, blocksize>);
|
||||
int nThreadBlocks = getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
@ -633,34 +642,33 @@ computeDiluDiagonal(T* reorderedMat,
|
||||
template <class T, int blocksize>
|
||||
void
|
||||
computeDiluDiagonalSplit(T* reorderedLowerMat,
|
||||
int* lowerRowIndices,
|
||||
int* lowerColIndices,
|
||||
T* reorderedUpperMat,
|
||||
int* upperRowIndices,
|
||||
int* upperColIndices,
|
||||
T* diagonal,
|
||||
int* reorderedToNatural,
|
||||
int* naturalToReordered,
|
||||
const int startIdx,
|
||||
int rowsInLevelSet,
|
||||
T* dInv)
|
||||
int* lowerRowIndices,
|
||||
int* lowerColIndices,
|
||||
T* reorderedUpperMat,
|
||||
int* upperRowIndices,
|
||||
int* upperColIndices,
|
||||
T* diagonal,
|
||||
int* reorderedToNatural,
|
||||
int* naturalToReordered,
|
||||
const int startIdx,
|
||||
int rowsInLevelSet,
|
||||
T* dInv)
|
||||
{
|
||||
if (blocksize <= 3) {
|
||||
int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit<T, blocksize>);
|
||||
int nThreadBlocks = getNumberOfBlocks(rowsInLevelSet, threadBlockSize);
|
||||
cuComputeDiluDiagonalSplit<T, blocksize>
|
||||
<<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
|
||||
lowerRowIndices,
|
||||
lowerColIndices,
|
||||
reorderedUpperMat,
|
||||
upperRowIndices,
|
||||
upperColIndices,
|
||||
diagonal,
|
||||
reorderedToNatural,
|
||||
naturalToReordered,
|
||||
startIdx,
|
||||
rowsInLevelSet,
|
||||
dInv);
|
||||
cuComputeDiluDiagonalSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(reorderedLowerMat,
|
||||
lowerRowIndices,
|
||||
lowerColIndices,
|
||||
reorderedUpperMat,
|
||||
upperRowIndices,
|
||||
upperColIndices,
|
||||
diagonal,
|
||||
reorderedToNatural,
|
||||
naturalToReordered,
|
||||
startIdx,
|
||||
rowsInLevelSet,
|
||||
dInv);
|
||||
} else {
|
||||
OPM_THROW(std::invalid_argument, "Inverting diagonal is not implemented for blocksizes > 3");
|
||||
}
|
||||
@ -677,24 +685,41 @@ copyMatDataToReordered(
|
||||
|
||||
template <class T, int blocksize>
|
||||
void
|
||||
copyMatDataToReorderedSplit(
|
||||
T* srcMatrix, int* srcRowIndices, int* srcColumnIndices, T* dstLowerMatrix, int* dstLowerRowIndices, T* dstUpperMatrix, int* dstUpperRowIndices, T* dstDiag, int* naturalToReordered, size_t numberOfRows)
|
||||
copyMatDataToReorderedSplit(T* srcMatrix,
|
||||
int* srcRowIndices,
|
||||
int* srcColumnIndices,
|
||||
T* dstLowerMatrix,
|
||||
int* dstLowerRowIndices,
|
||||
T* dstUpperMatrix,
|
||||
int* dstUpperRowIndices,
|
||||
T* dstDiag,
|
||||
int* naturalToReordered,
|
||||
size_t numberOfRows)
|
||||
{
|
||||
int threadBlockSize = getCudaRecomendedThreadBlockSize(cuComputeLowerSolveLevelSetSplit<T, blocksize>);
|
||||
int nThreadBlocks = getNumberOfBlocks(numberOfRows, threadBlockSize);
|
||||
cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(
|
||||
srcMatrix, srcRowIndices, srcColumnIndices, dstLowerMatrix, dstLowerRowIndices, dstUpperMatrix, dstUpperRowIndices, dstDiag, naturalToReordered, numberOfRows);
|
||||
cuMoveDataToReorderedSplit<T, blocksize><<<nThreadBlocks, threadBlockSize>>>(srcMatrix,
|
||||
srcRowIndices,
|
||||
srcColumnIndices,
|
||||
dstLowerMatrix,
|
||||
dstLowerRowIndices,
|
||||
dstUpperMatrix,
|
||||
dstUpperRowIndices,
|
||||
dstDiag,
|
||||
naturalToReordered,
|
||||
numberOfRows);
|
||||
}
|
||||
|
||||
#define INSTANTIATE_KERNEL_WRAPPERS(T, blocksize) \
|
||||
template void invertDiagonalAndFlatten<T, blocksize>(T*, int*, int*, size_t, T*); \
|
||||
template void copyMatDataToReordered<T, blocksize>(T*, int*, T*, int*, int*, size_t); \
|
||||
template void copyMatDataToReorderedSplit<T, blocksize>(T*, int*, int*, T*, int*, T*, int*, T*, int*, size_t); \
|
||||
template void computeDiluDiagonal<T, blocksize>(T*, int*, int*, int*, int*, const int, int, T*); \
|
||||
template void computeDiluDiagonalSplit<T, blocksize>(T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, T*);\
|
||||
template void computeUpperSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*); \
|
||||
template void computeLowerSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*); \
|
||||
template void computeUpperSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*); \
|
||||
#define INSTANTIATE_KERNEL_WRAPPERS(T, blocksize) \
|
||||
template void invertDiagonalAndFlatten<T, blocksize>(T*, int*, int*, size_t, T*); \
|
||||
template void copyMatDataToReordered<T, blocksize>(T*, int*, T*, int*, int*, size_t); \
|
||||
template void copyMatDataToReorderedSplit<T, blocksize>(T*, int*, int*, T*, int*, T*, int*, T*, int*, size_t); \
|
||||
template void computeDiluDiagonal<T, blocksize>(T*, int*, int*, int*, int*, const int, int, T*); \
|
||||
template void computeDiluDiagonalSplit<T, blocksize>( \
|
||||
T*, int*, int*, T*, int*, int*, T*, int*, int*, const int, int, T*); \
|
||||
template void computeUpperSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*); \
|
||||
template void computeLowerSolveLevelSet<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*); \
|
||||
template void computeUpperSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, T*); \
|
||||
template void computeLowerSolveLevelSetSplit<T, blocksize>(T*, int*, int*, int*, int, int, const T*, const T*, T*);
|
||||
|
||||
INSTANTIATE_KERNEL_WRAPPERS(float, 1);
|
||||
|
@ -24,12 +24,12 @@
|
||||
#include <dune/common/fmatrix.hh>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <memory>
|
||||
#include <opm/simulators/linalg/DILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuDILU.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/DILU.hpp>
|
||||
#include <random>
|
||||
#include <vector>
|
||||
|
||||
@ -44,47 +44,49 @@ using Sp2x2BlockMatrix = Dune::BCRSMatrix<FM2x2>;
|
||||
using CuMatrix = Opm::cuistl::CuSparseMatrix<T>;
|
||||
using CuIntVec = Opm::cuistl::CuVector<int>;
|
||||
using CuFloatingPointVec = Opm::cuistl::CuVector<T>;
|
||||
using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuDilu1x1 = Opm::cuistl::CuDILU<Sp1x1BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
using CuDilu2x2 = Opm::cuistl::CuDILU<Sp2x2BlockMatrix, CuFloatingPointVec, CuFloatingPointVec>;
|
||||
|
||||
Sp1x1BlockMatrix get1x1BlockTestMatrix(){
|
||||
/*
|
||||
matA:
|
||||
1 2 0 3 0 0
|
||||
4 5 0 6 0 7
|
||||
0 0 8 0 0 0
|
||||
9 10 0 11 12 0
|
||||
0 0 0 13 14 0
|
||||
0 15 0 0 0 16
|
||||
Sp1x1BlockMatrix
|
||||
get1x1BlockTestMatrix()
|
||||
{
|
||||
/*
|
||||
matA:
|
||||
1 2 0 3 0 0
|
||||
4 5 0 6 0 7
|
||||
0 0 8 0 0 0
|
||||
9 10 0 11 12 0
|
||||
0 0 0 13 14 0
|
||||
0 15 0 0 0 16
|
||||
|
||||
Expected reordering:
|
||||
1 2 0 3 0 0
|
||||
0 0 8 0 0 0
|
||||
4 5 0 6 0 7
|
||||
9 10 0 11 12 0
|
||||
0 15 0 0 0 16
|
||||
0 0 0 13 14 0
|
||||
Expected reordering:
|
||||
1 2 0 3 0 0
|
||||
0 0 8 0 0 0
|
||||
4 5 0 6 0 7
|
||||
9 10 0 11 12 0
|
||||
0 15 0 0 0 16
|
||||
0 0 0 13 14 0
|
||||
|
||||
Expected lowerTriangularReorderedMatrix:
|
||||
0 0 0 0 0 0
|
||||
0 0 0 0 0 0
|
||||
4 0 0 0 0 0
|
||||
9 10 0 0 0 0
|
||||
0 15 0 0 0 0
|
||||
0 0 0 13 0 0
|
||||
Expected lowerTriangularReorderedMatrix:
|
||||
0 0 0 0 0 0
|
||||
0 0 0 0 0 0
|
||||
4 0 0 0 0 0
|
||||
9 10 0 0 0 0
|
||||
0 15 0 0 0 0
|
||||
0 0 0 13 0 0
|
||||
|
||||
Expected lowerTriangularReorderedMatrix:
|
||||
0 2 0 3 0 0
|
||||
0 0 0 0 0 0
|
||||
0 0 0 6 0 7
|
||||
0 0 0 0 12 0
|
||||
0 0 0 0 0 0
|
||||
*/
|
||||
Expected lowerTriangularReorderedMatrix:
|
||||
0 2 0 3 0 0
|
||||
0 0 0 0 0 0
|
||||
0 0 0 6 0 7
|
||||
0 0 0 0 12 0
|
||||
0 0 0 0 0 0
|
||||
*/
|
||||
|
||||
const int N = 6;
|
||||
const int nonZeroes = 16;
|
||||
|
||||
//Create the Dune A matrix
|
||||
// Create the Dune A matrix
|
||||
Sp1x1BlockMatrix matA(N, N, nonZeroes, Sp1x1BlockMatrix::row_wise);
|
||||
for (auto row = matA.createbegin(); row != matA.createend(); ++row) {
|
||||
row.insert(row.index());
|
||||
@ -132,7 +134,9 @@ Sp1x1BlockMatrix get1x1BlockTestMatrix(){
|
||||
return matA;
|
||||
}
|
||||
|
||||
Sp2x2BlockMatrix get2x2BlockTestMatrix(){
|
||||
Sp2x2BlockMatrix
|
||||
get2x2BlockTestMatrix()
|
||||
{
|
||||
/*
|
||||
matA:
|
||||
1 2 0 3 0 0
|
||||
@ -148,7 +152,7 @@ Sp2x2BlockMatrix get2x2BlockTestMatrix(){
|
||||
const int N = 3;
|
||||
const int nonZeroes = 9;
|
||||
|
||||
//Create the Dune A matrix
|
||||
// Create the Dune A matrix
|
||||
Sp2x2BlockMatrix matA(N, N, nonZeroes, Sp2x2BlockMatrix::row_wise);
|
||||
for (auto row = matA.createbegin(); row != matA.createend(); ++row) {
|
||||
row.insert(row.index());
|
||||
@ -215,13 +219,13 @@ BOOST_AUTO_TEST_CASE(TestDiluApply)
|
||||
|
||||
// put results in std::vector
|
||||
std::vector<T> cpudilures;
|
||||
for (auto e : h_output){
|
||||
for (auto e : h_output) {
|
||||
cpudilures.push_back(e);
|
||||
}
|
||||
auto cudilures = d_output.asStdVector();
|
||||
|
||||
// check that CuDilu results matches that of CPU dilu
|
||||
for (size_t i = 0; i < cudilures.size(); ++i){
|
||||
for (size_t i = 0; i < cudilures.size(); ++i) {
|
||||
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
||||
}
|
||||
}
|
||||
@ -255,14 +259,14 @@ BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked)
|
||||
|
||||
auto cudilures = d_output.asStdVector();
|
||||
std::vector<T> cpudilures;
|
||||
for (auto v : h_output){
|
||||
for (auto e : v){
|
||||
for (auto v : h_output) {
|
||||
for (auto e : v) {
|
||||
cpudilures.push_back(e);
|
||||
}
|
||||
}
|
||||
|
||||
// check that the values are close
|
||||
for (size_t i = 0; i < cudilures.size(); ++i){
|
||||
for (size_t i = 0; i < cudilures.size(); ++i) {
|
||||
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
||||
}
|
||||
}
|
||||
@ -316,13 +320,13 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge)
|
||||
|
||||
// put results in std::vector
|
||||
std::vector<T> cpudilures;
|
||||
for (auto e : h_output){
|
||||
for (auto e : h_output) {
|
||||
cpudilures.push_back(e);
|
||||
}
|
||||
auto cudilures = d_output.asStdVector();
|
||||
|
||||
// check that CuDilu results matches that of CPU dilu
|
||||
for (size_t i = 0; i < cudilures.size(); ++i){
|
||||
for (size_t i = 0; i < cudilures.size(); ++i) {
|
||||
BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user