mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-12-22 07:23:27 -06:00
Merge pull request #4852 from multitalentloes/cujac_preconditioner
Cujac preconditioner
This commit is contained in:
commit
8b480a0a49
@ -659,7 +659,10 @@ if(CUDA_FOUND)
|
||||
cuda_safe_call
|
||||
cuda_check_last_error
|
||||
cublas_handle
|
||||
cujac
|
||||
cusparse_handle
|
||||
cuSparse_matrix_operations
|
||||
cuVector_operations
|
||||
cuvector
|
||||
cusparsematrix
|
||||
cuseqilu0
|
||||
|
@ -153,20 +153,24 @@ endif()
|
||||
if(CUDA_FOUND)
|
||||
# CUISTL SOURCE
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuVector.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/vector_operations.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuJac.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSeqILU0.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/set_device.cpp)
|
||||
|
||||
# CUISTL HEADERS
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuJac.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuVector.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.hpp)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp)
|
||||
@ -284,18 +288,21 @@ if(CUDA_FOUND)
|
||||
if(USE_BDA_BRIDGE)
|
||||
list(APPEND TEST_SOURCE_FILES tests/test_cusparseSolver.cpp)
|
||||
endif()
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_safe_call.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_safe_call.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_safe_call.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuda_safe_call.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuda_check_last_error.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparsematrix.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cujac.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuowneroverlapcopy.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuSparse_matrix_operations.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparsematrix.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuVector_operations.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/cuistl/test_solver_adapter.cpp)
|
||||
|
||||
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
|
||||
#endif
|
||||
|
||||
@ -239,6 +240,17 @@ struct StandardPreconditioners
|
||||
auto wrapped = std::make_shared<Opm::cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
|
||||
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CuJac = typename Opm::cuistl::CuJac<M, Opm::cuistl::CuVector<field_type>, Opm::cuistl::CuVector<field_type>>;
|
||||
auto cuJac = std::make_shared<CuJac>(op.getmat(), w);
|
||||
|
||||
auto adapted = std::make_shared<Opm::cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac);
|
||||
auto wrapped = std::make_shared<Opm::cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm);
|
||||
return wrapped;
|
||||
});
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -445,6 +457,13 @@ struct StandardPreconditioners<Operator,Dune::Amg::SequentialInformation>
|
||||
return converted;
|
||||
|
||||
});
|
||||
|
||||
F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
|
||||
const double w = prm.get<double>("relaxation", 1.0);
|
||||
using field_type = typename V::field_type;
|
||||
using CUJac = typename Opm::cuistl::CuJac<M, Opm::cuistl::CuVector<field_type>, Opm::cuistl::CuVector<field_type>>;
|
||||
return std::make_shared<Opm::cuistl::PreconditionerAdapter<V, V, CUJac>>(std::make_shared<CUJac>(op.getmat(), w));
|
||||
});
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
148
opm/simulators/linalg/cuistl/CuJac.cpp
Normal file
148
opm/simulators/linalg/cuistl/CuJac.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cusparse.h>
|
||||
#include <dune/common/fmatrix.hh>
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <fmt/core.h>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
CuJac<M, X, Y, l>::CuJac(const M& A, field_type w)
|
||||
: m_cpuMatrix(A)
|
||||
, m_relaxationFactor(w)
|
||||
, m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(A))
|
||||
, m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize())
|
||||
{
|
||||
// Some sanity check
|
||||
OPM_ERROR_IF(A.N() != m_gpuMatrix.N(),
|
||||
fmt::format("CuSparse matrix not same size as DUNE matrix. {} vs {}.", m_gpuMatrix.N(), A.N()));
|
||||
OPM_ERROR_IF(A[0][0].N() != m_gpuMatrix.blockSize(),
|
||||
fmt::format("CuSparse matrix not same blocksize as DUNE matrix. {} vs {}.",
|
||||
m_gpuMatrix.blockSize(),
|
||||
A[0][0].N()));
|
||||
OPM_ERROR_IF(A.N() * A[0][0].N() != m_gpuMatrix.dim(),
|
||||
fmt::format("CuSparse matrix not same dimension as DUNE matrix. {} vs {}.",
|
||||
m_gpuMatrix.dim(),
|
||||
A.N() * A[0][0].N()));
|
||||
OPM_ERROR_IF(A.nonzeroes() != m_gpuMatrix.nonzeroes(),
|
||||
fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ",
|
||||
m_gpuMatrix.nonzeroes(),
|
||||
A.nonzeroes()));
|
||||
|
||||
// Compute the inverted diagonal of A and store it in a vector format in m_diagInvFlattened
|
||||
detail::invertDiagonalAndFlatten(m_gpuMatrix.getNonZeroValues().data(),
|
||||
m_gpuMatrix.getRowIndices().data(),
|
||||
m_gpuMatrix.getColumnIndices().data(),
|
||||
m_gpuMatrix.N(),
|
||||
m_gpuMatrix.blockSize(),
|
||||
m_diagInvFlattened.data());
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::apply(X& v, const Y& d)
|
||||
{
|
||||
// Jacobi preconditioner: x_{n+1} = x_n + w * (D^-1 * (b - Ax_n) )
|
||||
// Working with defect d and update v it we only need to set v = w*(D^-1)*d
|
||||
|
||||
// Compute the MV product where the matrix is diagonal and therefore stored as a vector.
|
||||
// The product is thus computed as a hadamard product.
|
||||
detail::weightedDiagMV(m_diagInvFlattened.data(),
|
||||
m_gpuMatrix.N(),
|
||||
m_gpuMatrix.blockSize(),
|
||||
m_relaxationFactor,
|
||||
d.data(),
|
||||
v.data());
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::post([[maybe_unused]] X& x)
|
||||
{
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
Dune::SolverCategory::Category
|
||||
CuJac<M, X, Y, l>::category() const
|
||||
{
|
||||
return Dune::SolverCategory::sequential;
|
||||
}
|
||||
|
||||
template <class M, class X, class Y, int l>
|
||||
void
|
||||
CuJac<M, X, Y, l>::update()
|
||||
{
|
||||
m_gpuMatrix.updateNonzeroValues(m_cpuMatrix);
|
||||
detail::invertDiagonalAndFlatten(m_gpuMatrix.getNonZeroValues().data(),
|
||||
m_gpuMatrix.getRowIndices().data(),
|
||||
m_gpuMatrix.getColumnIndices().data(),
|
||||
m_gpuMatrix.N(),
|
||||
m_gpuMatrix.blockSize(),
|
||||
m_diagInvFlattened.data());
|
||||
}
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \
|
||||
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>; \
|
||||
template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
|
||||
::Opm::cuistl::CuVector<realtype>, \
|
||||
::Opm::cuistl::CuVector<realtype>>
|
||||
|
||||
INSTANTIATE_CUJAC_DUNE(double, 1);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 2);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 3);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 4);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 5);
|
||||
INSTANTIATE_CUJAC_DUNE(double, 6);
|
||||
|
||||
INSTANTIATE_CUJAC_DUNE(float, 1);
|
||||
INSTANTIATE_CUJAC_DUNE(float, 2);
|
||||
INSTANTIATE_CUJAC_DUNE(float, 3);
|
||||
INSTANTIATE_CUJAC_DUNE(float, 4);
|
||||
INSTANTIATE_CUJAC_DUNE(float, 5);
|
||||
INSTANTIATE_CUJAC_DUNE(float, 6);
|
109
opm/simulators/linalg/cuistl/CuJac.hpp
Normal file
109
opm/simulators/linalg/cuistl/CuJac.hpp
Normal file
@ -0,0 +1,109 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUJAC_HPP
|
||||
#define OPM_CUJAC_HPP
|
||||
|
||||
#include <dune/istl/preconditioner.hh>
|
||||
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp>
|
||||
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
{
|
||||
//! \brief Jacobi preconditioner on the GPU.
|
||||
//!
|
||||
//! \note This is a fast but weak preconditioner
|
||||
//!
|
||||
//! \tparam M The matrix type to operate on
|
||||
//! \tparam X Type of the update
|
||||
//! \tparam Y Type of the defect
|
||||
//! \tparam l Ignored. Just there to have the same number of template arguments
|
||||
//! as other preconditioners.
|
||||
//!
|
||||
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
|
||||
//! arguments in case of future additions.
|
||||
template <class M, class X, class Y, int l = 1>
|
||||
class CuJac : public Dune::PreconditionerWithUpdate<X, Y>
|
||||
{
|
||||
public:
|
||||
//! \brief The matrix type the preconditioner is for.
|
||||
using matrix_type = typename std::remove_const<M>::type;
|
||||
//! \brief The domain type of the preconditioner.
|
||||
using domain_type = X;
|
||||
//! \brief The range type of the preconditioner.
|
||||
using range_type = Y;
|
||||
//! \brief The field type of the preconditioner.
|
||||
using field_type = typename X::field_type;
|
||||
|
||||
//! \brief Constructor.
|
||||
//!
|
||||
//! Constructor gets all parameters to operate the prec.
|
||||
//! \param A The matrix to operate on.
|
||||
//! \param w The relaxation factor.
|
||||
//!
|
||||
CuJac(const M& A, field_type w);
|
||||
|
||||
//! \brief Prepare the preconditioner.
|
||||
//! \note Does nothing at the time being.
|
||||
virtual void pre(X& x, Y& b) override;
|
||||
|
||||
//! \brief Apply the preconditoner.
|
||||
virtual void apply(X& v, const Y& d) override;
|
||||
|
||||
//! \brief Post processing
|
||||
//! \note Does nothing at the moment
|
||||
virtual void post(X& x) override;
|
||||
|
||||
//! Category of the preconditioner (see SolverCategory::Category)
|
||||
virtual Dune::SolverCategory::Category category() const override;
|
||||
|
||||
//! \brief Updates the matrix data.
|
||||
virtual void update() override;
|
||||
|
||||
|
||||
//! \returns false
|
||||
static constexpr bool shouldCallPre()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
//! \returns false
|
||||
static constexpr bool shouldCallPost()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
//! \brief Reference to the underlying matrix
|
||||
const M& m_cpuMatrix;
|
||||
//! \brief The relaxation factor to use.
|
||||
const field_type m_relaxationFactor;
|
||||
//! \brief The A matrix stored on the gpu
|
||||
CuSparseMatrix<field_type> m_gpuMatrix;
|
||||
//! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector
|
||||
CuVector<field_type> m_diagInvFlattened;
|
||||
};
|
||||
} // end namespace Opm::cuistl
|
||||
|
||||
#endif
|
@ -26,6 +26,7 @@
|
||||
#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
|
||||
namespace Opm::cuistl
|
||||
@ -361,6 +362,18 @@ public:
|
||||
*/
|
||||
void setZeroAtIndexSet(const CuVector<int>& indexSet);
|
||||
|
||||
// Slow method that creates a string representation of a CuVector for debug purposes
|
||||
std::string toDebugString()
|
||||
{
|
||||
std::vector<T> v = asStdVector();
|
||||
std::string res = "";
|
||||
for (T element : v){
|
||||
res += std::to_string(element) + " ";
|
||||
}
|
||||
res += std::to_string(v[v.size()-1]);
|
||||
return res;
|
||||
}
|
||||
|
||||
private:
|
||||
T* m_dataOnDevice = nullptr;
|
||||
|
||||
@ -374,5 +387,6 @@ private:
|
||||
|
||||
void assertHasElements() const;
|
||||
};
|
||||
|
||||
} // namespace Opm::cuistl
|
||||
#endif
|
||||
|
@ -0,0 +1,174 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <stdexcept>
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
// TODO: combine the three following kernels into one using template arguments so that
|
||||
// no compile time optimization is lost while making the code more compact
|
||||
template <class T>
|
||||
__global__ void cuInvertDiagonalAndFlattenBlocksize1(
|
||||
T* matNonZeroValues, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec)
|
||||
{
|
||||
const auto row = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int blocksize = 1;
|
||||
|
||||
if (row < numberOfRows) {
|
||||
size_t nnzIdx = rowIndices[row];
|
||||
size_t nnzIdxLim = rowIndices[row + 1];
|
||||
|
||||
// this loop will cause some extra checks that we are within the limit in the case of the diagonal having a
|
||||
// zero element
|
||||
while (colIndices[nnzIdx] != row && nnzIdx <= nnzIdxLim) {
|
||||
++nnzIdx;
|
||||
}
|
||||
|
||||
// diagBlock points to the start of where the diagonal block is stored
|
||||
T* diagBlock = &matNonZeroValues[blocksize * blocksize * nnzIdx];
|
||||
// vecBlock points to the start of the block element in the vector where the inverse of the diagonal block
|
||||
// element should be stored
|
||||
T* vecBlock = &vec[blocksize * blocksize * row];
|
||||
|
||||
vecBlock[0] = 1.0 / (diagBlock[0]);
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void cuInvertDiagonalAndFlattenBlocksize2(
|
||||
T* matNonZeroValues, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec)
|
||||
{
|
||||
const auto row = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int blocksize = 2;
|
||||
|
||||
if (row < numberOfRows) {
|
||||
size_t nnzIdx = rowIndices[row];
|
||||
size_t nnzIdxLim = rowIndices[row + 1];
|
||||
|
||||
// this loop will cause some extra checks that we are within the limit in the case of the diagonal having a
|
||||
// zero element
|
||||
while (colIndices[nnzIdx] != row && nnzIdx <= nnzIdxLim) {
|
||||
++nnzIdx;
|
||||
}
|
||||
|
||||
// diagBlock points to the start of where the diagonal block is stored
|
||||
T* diagBlock = &matNonZeroValues[blocksize * blocksize * nnzIdx];
|
||||
// vecBlock points to the start of the block element in the vector where the inverse of the diagonal block
|
||||
// element should be stored
|
||||
T* vecBlock = &vec[blocksize * blocksize * row];
|
||||
|
||||
// based on Dune implementation
|
||||
T detInv = 1.0 / (diagBlock[0] * diagBlock[3] - diagBlock[1] * diagBlock[2]);
|
||||
vecBlock[0] = diagBlock[3] * detInv;
|
||||
vecBlock[1] = -diagBlock[1] * detInv;
|
||||
vecBlock[2] = -diagBlock[2] * detInv;
|
||||
vecBlock[3] = diagBlock[0] * detInv;
|
||||
}
|
||||
}
|
||||
template <class T>
|
||||
__global__ void cuInvertDiagonalAndFlattenBlocksize3(
|
||||
T* matNonZeroValues, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec)
|
||||
{
|
||||
const auto row = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int blocksize = 3;
|
||||
|
||||
if (row < numberOfRows) {
|
||||
size_t nnzIdx = rowIndices[row];
|
||||
size_t nnzIdxLim = rowIndices[row + 1];
|
||||
|
||||
// this loop will cause some extra checks that we are within the limit in the case of the diagonal having a
|
||||
// zero element
|
||||
while (colIndices[nnzIdx] != row && nnzIdx <= nnzIdxLim) {
|
||||
++nnzIdx;
|
||||
}
|
||||
|
||||
// diagBlock points to the start of where the diagonal block is stored
|
||||
T* diagBlock = &matNonZeroValues[blocksize * blocksize * nnzIdx];
|
||||
// vecBlock points to the start of the block element in the vector where the inverse of the diagonal block
|
||||
// element should be stored
|
||||
T* vecBlock = &vec[blocksize * blocksize * row];
|
||||
|
||||
// based on Dune implementation
|
||||
T t4 = diagBlock[0] * diagBlock[4];
|
||||
T t6 = diagBlock[0] * diagBlock[5];
|
||||
T t8 = diagBlock[1] * diagBlock[3];
|
||||
T t10 = diagBlock[2] * diagBlock[3];
|
||||
T t12 = diagBlock[1] * diagBlock[6];
|
||||
T t14 = diagBlock[2] * diagBlock[6];
|
||||
|
||||
T t17 = 1.0
|
||||
/ (t4 * diagBlock[8] - t6 * diagBlock[7] - t8 * diagBlock[8] + t10 * diagBlock[7] + t12 * diagBlock[5]
|
||||
- t14 * diagBlock[4]); // t17 is 1/determinant
|
||||
|
||||
vecBlock[0] = (diagBlock[4] * diagBlock[8] - diagBlock[5] * diagBlock[7]) * t17;
|
||||
vecBlock[1] = -(diagBlock[1] * diagBlock[8] - diagBlock[2] * diagBlock[7]) * t17;
|
||||
vecBlock[2] = (diagBlock[1] * diagBlock[5] - diagBlock[2] * diagBlock[4]) * t17;
|
||||
vecBlock[3] = -(diagBlock[3] * diagBlock[8] - diagBlock[5] * diagBlock[6]) * t17;
|
||||
vecBlock[4] = (diagBlock[0] * diagBlock[8] - t14) * t17;
|
||||
vecBlock[5] = -(t6 - t10) * t17;
|
||||
vecBlock[6] = (diagBlock[3] * diagBlock[7] - diagBlock[4] * diagBlock[6]) * t17;
|
||||
vecBlock[7] = -(diagBlock[0] * diagBlock[7] - t12) * t17;
|
||||
vecBlock[8] = (t4 - t8) * t17;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr inline size_t getThreads([[maybe_unused]] size_t numberOfRows)
|
||||
{
|
||||
return 1024;
|
||||
}
|
||||
|
||||
inline size_t getBlocks(size_t numberOfRows)
|
||||
{
|
||||
const auto threads = getThreads(numberOfRows);
|
||||
return (numberOfRows + threads - 1) / threads;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <class T>
|
||||
void
|
||||
invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, size_t blocksize, T* vec)
|
||||
{
|
||||
switch (blocksize) {
|
||||
case 1:
|
||||
cuInvertDiagonalAndFlattenBlocksize1<<<getBlocks(numberOfRows), getThreads(numberOfRows)>>>(
|
||||
mat, rowIndices, colIndices, numberOfRows, vec);
|
||||
break;
|
||||
case 2:
|
||||
cuInvertDiagonalAndFlattenBlocksize2<<<getBlocks(numberOfRows), getThreads(numberOfRows)>>>(
|
||||
mat, rowIndices, colIndices, numberOfRows, vec);
|
||||
break;
|
||||
case 3:
|
||||
cuInvertDiagonalAndFlattenBlocksize3<<<getBlocks(numberOfRows), getThreads(numberOfRows)>>>(
|
||||
mat, rowIndices, colIndices, numberOfRows, vec);
|
||||
break;
|
||||
default:
|
||||
// TODO: Figure out what is why it did not produce an error or any output in the output stream or the DBG file
|
||||
// when I forced this case to execute
|
||||
OPM_THROW(std::invalid_argument, "Inverting diagonal is not implemented for blocksizes > 3");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template void invertDiagonalAndFlatten(double*, int*, int*, size_t, size_t, double*);
|
||||
template void invertDiagonalAndFlatten(float*, int*, int*, size_t, size_t, float*);
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
@ -0,0 +1,39 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#ifndef OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
|
||||
#define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
namespace Opm::cuistl::detail
|
||||
{
|
||||
|
||||
/**
|
||||
* @brief This function receives a matrix, and the inverse of the matrix containing only its diagonal is stored in d_vec
|
||||
* @param mat pointer to GPU memory containing nonzerovalues of the sparse matrix
|
||||
* @param rowIndices Pointer to vector on GPU containing row indices compliant wiht bsr format
|
||||
* @param colIndices Pointer to vector on GPU containing col indices compliant wiht bsr format
|
||||
* @param numberOfRows Integer describing the number of rows in the matrix
|
||||
* @param blocksize Integer describing the sidelength of a block in the sparse matrix
|
||||
* @param[out] vec Pointer to the vector where the inverse of the diagonal matrix should be stored
|
||||
*/
|
||||
template <class T>
|
||||
void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, size_t blocksize, T* vec);
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
#endif
|
@ -16,8 +16,10 @@
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
// TODO: [perf] Get rid of thrust.
|
||||
#include <stdexcept>
|
||||
#include <thrust/device_ptr.h>
|
||||
#include <thrust/reduce.h>
|
||||
namespace Opm::cuistl::detail
|
||||
@ -45,6 +47,27 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
template <class T, int blocksize>
|
||||
__global__ void weightedDiagMV(
|
||||
const T* squareBlockVector, const size_t numberOfElements, const T w, const T* globalSrcVec, T* globalDstVec)
|
||||
{
|
||||
const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
if (globalIndex < numberOfElements) {
|
||||
const T* localBlock = (squareBlockVector + (blocksize * blocksize * globalIndex));
|
||||
const T* localSrcVec = (globalSrcVec + (blocksize * globalIndex));
|
||||
T* localDstVec = (globalDstVec + (blocksize * globalIndex));
|
||||
|
||||
for (int i = 0; i < blocksize; ++i) {
|
||||
T rowResult = 0.0;
|
||||
for (int j = 0; j < blocksize; ++j) {
|
||||
rowResult += localBlock[i * blocksize + j] * localSrcVec[j];
|
||||
}
|
||||
localDstVec[i] = rowResult * w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void
|
||||
elementWiseMultiplyKernel(const T* a, const T* b, T* buffer, size_t numberOfElements, const int* indices)
|
||||
@ -109,4 +132,36 @@ template double innerProductAtIndices(const double*, const double*, double* buff
|
||||
template float innerProductAtIndices(const float*, const float*, float* buffer, size_t, const int*);
|
||||
template int innerProductAtIndices(const int*, const int*, int* buffer, size_t, const int*);
|
||||
|
||||
} // namespace Opm::cuistl::impl
|
||||
|
||||
template <class T>
|
||||
void
|
||||
weightedDiagMV(const T* squareBlockVector,
|
||||
const size_t numberOfElements,
|
||||
const size_t blocksize,
|
||||
T relaxationFactor,
|
||||
const T* srcVec,
|
||||
T* dstVec)
|
||||
{
|
||||
switch (blocksize) {
|
||||
case 1:
|
||||
weightedDiagMV<T, 1><<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||
squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
break;
|
||||
case 2:
|
||||
weightedDiagMV<T, 2><<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||
squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
break;
|
||||
case 3:
|
||||
weightedDiagMV<T, 3><<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>(
|
||||
squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec);
|
||||
break;
|
||||
default:
|
||||
OPM_THROW(std::invalid_argument, "blockvector Hadamard product not implemented for blocksize>3");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*);
|
||||
template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*);
|
||||
|
||||
} // namespace Opm::cuistl::detail
|
||||
|
@ -54,5 +54,25 @@ void setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indice
|
||||
*/
|
||||
template <class T>
|
||||
T innerProductAtIndices(const T* deviceA, const T* deviceB, T* buffer, size_t numberOfElements, const int* indices);
|
||||
|
||||
/**
|
||||
* @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we
|
||||
* compute the Hadamard product.
|
||||
* @param squareBlockVector A CuVector whose elements are NxN matrix blocks
|
||||
* @param numberOfRows The number of rows in the vector
|
||||
* @param blocksize The sidelength of the square block elements in the vector
|
||||
* @param src_vec A pointer to the data of the CuVector we multiply the blockvector with
|
||||
* @param[out] dst_vec A pointer to the data of the CuVector we store the result in
|
||||
*
|
||||
* @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the
|
||||
* diagonal of the matrix and use this product.
|
||||
*/
|
||||
template <class T>
|
||||
void weightedDiagMV(const T* squareBlockVector,
|
||||
const size_t numberOfRows,
|
||||
const size_t blocksize,
|
||||
T relaxationFactor,
|
||||
const T* srcVec,
|
||||
T* dstVec);
|
||||
} // namespace Opm::cuistl::detail
|
||||
#endif
|
||||
|
182
tests/cuistl/test_cuSparse_matrix_operations.cpp
Normal file
182
tests/cuistl/test_cuSparse_matrix_operations.cpp
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuSparseMatrixOperations
|
||||
#include <boost/mpl/list.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
|
||||
using NumericTypes = boost::mpl::list<double, float>;
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, NumericTypes)
|
||||
{
|
||||
const size_t blocksize = 3;
|
||||
const size_t N = 2;
|
||||
const int nonZeroes = 3;
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
/*
|
||||
create this sparse matrix
|
||||
| |1 2 3| | 1 0 0| |
|
||||
| |5 2 3| | 0 1 0| |
|
||||
| |2 1 1| | 0 0 1| |
|
||||
| |
|
||||
| |0 0 0| |-1 0 0| |
|
||||
| |0 0 0| | 0 -1 0| |
|
||||
| |0 0 0| | 0 0 -1| |
|
||||
|
||||
The diagonal elements inverted, and put in a vector should look like this
|
||||
| |-1/4 1/4 0| |
|
||||
| | 1/4 -4/5 3| |
|
||||
| | 1/4 3/4 -2| |
|
||||
| |
|
||||
| | -1 0 0| |
|
||||
| | 0 -1 0| |
|
||||
| | 0 0 -1| |
|
||||
*/
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
row.insert(row.index());
|
||||
if (row.index() == 0) {
|
||||
row.insert(row.index() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
B[0][0][0][0] = 1.0;
|
||||
B[0][0][0][1] = 2.0;
|
||||
B[0][0][0][2] = 3.0;
|
||||
B[0][0][1][0] = 5.0;
|
||||
B[0][0][1][1] = 2.0;
|
||||
B[0][0][1][2] = 3.0;
|
||||
B[0][0][2][0] = 2.0;
|
||||
B[0][0][2][1] = 1.0;
|
||||
B[0][0][2][2] = 1.0;
|
||||
|
||||
B[0][1][0][0] = 1.0;
|
||||
B[0][1][1][1] = 1.0;
|
||||
B[0][1][2][2] = 1.0;
|
||||
|
||||
B[1][1][0][0] = -1.0;
|
||||
B[1][1][1][1] = -1.0;
|
||||
B[1][1][2][2] = -1.0;
|
||||
|
||||
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::cuistl::detail::invertDiagonalAndFlatten(m.getNonZeroValues().data(),
|
||||
m.getRowIndices().data(),
|
||||
m.getColumnIndices().data(),
|
||||
N,
|
||||
blocksize,
|
||||
dInvDiag.data());
|
||||
|
||||
std::vector<T> expectedInvDiag {-1.0 / 4.0,
|
||||
1.0 / 4.0,
|
||||
0.0,
|
||||
1.0 / 4.0,
|
||||
-5.0 / 4.0,
|
||||
3.0,
|
||||
1.0 / 4.0,
|
||||
3.0 / 4.0,
|
||||
-2.0,
|
||||
-1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
-1.0,
|
||||
0.0,
|
||||
0.0,
|
||||
0.0,
|
||||
-1.0};
|
||||
std::vector<T> computedInvDiag = dInvDiag.asStdVector();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(expectedInvDiag.size(), computedInvDiag.size());
|
||||
for (size_t i = 0; i < expectedInvDiag.size(); ++i) {
|
||||
BOOST_CHECK_CLOSE(expectedInvDiag[i], computedInvDiag[i], 1e-7);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, NumericTypes)
|
||||
{
|
||||
const size_t blocksize = 2;
|
||||
const size_t N = 2;
|
||||
const int nonZeroes = 3;
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
/*
|
||||
create this sparse matrix
|
||||
| | 1 2| | 1 0| |
|
||||
| |1/2 2| | 0 1| |
|
||||
| |
|
||||
| | 0 0| |-1 0| |
|
||||
| | 0 0| | 0 -1| |
|
||||
|
||||
The diagonal elements inverted, and put in a vector should look like this
|
||||
| | 2 - 2| |
|
||||
| |-1/2 1| |
|
||||
| |
|
||||
| | -1 0| |
|
||||
| | 0 -1| |
|
||||
*/
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
row.insert(row.index());
|
||||
if (row.index() == 0) {
|
||||
row.insert(row.index() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
B[0][0][0][0] = 1.0;
|
||||
B[0][0][0][1] = 2.0;
|
||||
B[0][0][1][0] = 1.0 / 2.0;
|
||||
B[0][0][1][1] = 2.0;
|
||||
|
||||
B[0][1][0][0] = 1.0;
|
||||
B[0][1][1][1] = 1.0;
|
||||
|
||||
B[1][1][0][0] = -1.0;
|
||||
B[1][1][1][1] = -1.0;
|
||||
|
||||
Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B);
|
||||
Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N);
|
||||
|
||||
Opm::cuistl::detail::invertDiagonalAndFlatten(m.getNonZeroValues().data(),
|
||||
m.getRowIndices().data(),
|
||||
m.getColumnIndices().data(),
|
||||
N,
|
||||
blocksize,
|
||||
dInvDiag.data());
|
||||
|
||||
std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0};
|
||||
std::vector<T> computedInvDiag = dInvDiag.asStdVector();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(expectedInvDiag.size(), computedInvDiag.size());
|
||||
for (size_t i = 0; i < expectedInvDiag.size(); ++i) {
|
||||
BOOST_CHECK_CLOSE(expectedInvDiag[i], computedInvDiag[i], 1e-7);
|
||||
}
|
||||
}
|
98
tests/cuistl/test_cuVector_operations.cpp
Normal file
98
tests/cuistl/test_cuVector_operations.cpp
Normal file
@ -0,0 +1,98 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuVectorOperations
|
||||
#include <boost/mpl/list.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
|
||||
using NumericTypes = boost::mpl::list<double, float>;
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVectorVector, T, NumericTypes)
|
||||
{
|
||||
/*
|
||||
Example in the test for multiplying by element a blockvector with a vector of vectors
|
||||
| |1 2 3| | | |3| | | |10| |
|
||||
| |5 2 3| | X | |2| | = | |22| |
|
||||
| |2 1 1| | | |1| | | |10| |
|
||||
*/
|
||||
|
||||
const size_t blocksize = 3;
|
||||
const size_t N = 1;
|
||||
const T weight = 1.0;
|
||||
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0});
|
||||
std::vector<T> hostVecVector({3.0, 2.0, 1.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0});
|
||||
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::cuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
|
||||
std::vector<T> expectedVec {10.0, 22.0, 10.0};
|
||||
std::vector<T> computedVec = deviceDstVector.asStdVector();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(expectedVec.size(), computedVec.size());
|
||||
for (size_t i = 0; i < expectedVec.size(); i++) {
|
||||
BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVectorVector, T, NumericTypes)
|
||||
{
|
||||
/*
|
||||
Example in the test for multiplying by element a blockvector with a vector of vectors
|
||||
| |1 2| | | |1| | | | 3.5| |
|
||||
0.5 * | |3 4| | X | |3| | = | | 7.5| |
|
||||
| | | | | |
|
||||
| |4 3| | | |2| | | |10.0| |
|
||||
| |2 1| | | |4| | | | 4.0| |
|
||||
*/
|
||||
|
||||
const size_t blocksize = 2;
|
||||
const size_t N = 2;
|
||||
const T weight = 0.5;
|
||||
|
||||
std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0});
|
||||
std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0});
|
||||
std::vector<T> hostDstVector({0, 0, 0, 0});
|
||||
Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector);
|
||||
Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector);
|
||||
Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector);
|
||||
|
||||
Opm::cuistl::detail::weightedDiagMV(
|
||||
deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data());
|
||||
|
||||
std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0};
|
||||
std::vector<T> computedVec = deviceDstVector.asStdVector();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(expectedVec.size(), computedVec.size());
|
||||
for (size_t i = 0; i < expectedVec.size(); i++) {
|
||||
BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7);
|
||||
}
|
||||
}
|
148
tests/cuistl/test_cujac.cpp
Normal file
148
tests/cuistl/test_cujac.cpp
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
Copyright 2022-2023 SINTEF AS
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include <config.h>
|
||||
|
||||
#define BOOST_TEST_MODULE TestCuJac
|
||||
#include <boost/mpl/list.hpp>
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <opm/simulators/linalg/cuistl/CuJac.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp>
|
||||
#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp>
|
||||
|
||||
using NumericTypes = boost::mpl::list<double, float>;
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes)
|
||||
{
|
||||
/*
|
||||
Test data to validate jacobi preconditioner, expected result is x_1, and relaxation factor is 0.5
|
||||
| |3 1| | 1 0| | | |2| | | | 0.5| |
|
||||
| |2 1| | 0 1| | | |1| | | |-0.5| |
|
||||
A = | | d = | | v = | |
|
||||
| |0 0| |-1 0| | | |3| | | |-1.5| |
|
||||
| |0 0| | 0 -1| | | |4| | | |-2.0| |
|
||||
*/
|
||||
const int N = 2;
|
||||
const int blocksize = 2;
|
||||
const int nonZeroes = 3;
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
row.insert(row.index());
|
||||
if (row.index() == 0) {
|
||||
row.insert(row.index() + 1);
|
||||
}
|
||||
}
|
||||
|
||||
B[0][0][0][0] = 3.0;
|
||||
B[0][0][0][1] = 1.0;
|
||||
B[0][0][1][0] = 2.0;
|
||||
B[0][0][1][1] = 1.0;
|
||||
|
||||
B[0][1][0][0] = 1.0;
|
||||
B[0][1][1][1] = 1.0;
|
||||
|
||||
B[1][1][0][0] = -1.0;
|
||||
B[1][1][1][1] = -1.0;
|
||||
|
||||
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
|
||||
Vector vVector(2);
|
||||
Vector dVector(2);
|
||||
dVector[0][0] = 2.0;
|
||||
dVector[0][1] = 1.0;
|
||||
dVector[1][0] = 3.0;
|
||||
dVector[1][1] = 4.0;
|
||||
|
||||
const T expectedAns[2][2] = {{1.0 / 2.0, -1.0 / 2.0}, {-3.0 / 2.0, -2.0}};
|
||||
|
||||
cujac.apply(vVector, dVector);
|
||||
BOOST_CHECK_CLOSE(vVector[0][0], expectedAns[0][0], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[0][1], expectedAns[0][1], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[1][0], expectedAns[1][0], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[1][1], expectedAns[1][1], 1e-7);
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes)
|
||||
{
|
||||
/*
|
||||
Test data to validate jacobi preconditioner, expected result is x_1, relaxation factor is 0.5
|
||||
| 3 1 1 0| |1| | 1/3|
|
||||
| 2 1 0 1| |2| | 1/2|
|
||||
A = | 0 0 -1 0| d = |1| v = |-3/2|
|
||||
| 0 0 0 -1| |1| | -2|
|
||||
*/
|
||||
const int N = 4;
|
||||
const int blocksize = 1;
|
||||
const int nonZeroes = 8;
|
||||
using M = Dune::FieldMatrix<T, blocksize, blocksize>;
|
||||
using SpMatrix = Dune::BCRSMatrix<M>;
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>;
|
||||
using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>;
|
||||
|
||||
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
|
||||
for (auto row = B.createbegin(); row != B.createend(); ++row) {
|
||||
row.insert(row.index());
|
||||
if (row.index() == 0) {
|
||||
row.insert(row.index() + 1);
|
||||
row.insert(row.index() + 2);
|
||||
}
|
||||
if (row.index() == 1) {
|
||||
row.insert(row.index() - 1);
|
||||
row.insert(row.index() + 2);
|
||||
}
|
||||
}
|
||||
|
||||
B[0][0][0][0] = 3.0;
|
||||
B[0][1][0][0] = 1.0;
|
||||
B[0][2][0][0] = 1.0;
|
||||
|
||||
B[1][0][0][0] = 2.0;
|
||||
B[1][1][0][0] = 1.0;
|
||||
B[1][3][0][0] = 1.0;
|
||||
|
||||
B[2][2][0][0] = -1.0;
|
||||
B[3][3][0][0] = -1.0;
|
||||
|
||||
auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5));
|
||||
|
||||
Vector vVector(4);
|
||||
Vector dVector(4);
|
||||
dVector[0] = 2.0;
|
||||
dVector[1] = 1.0;
|
||||
dVector[2] = 3.0;
|
||||
dVector[3] = 4.0;
|
||||
|
||||
const T expectedAns[4] = {1.0 / 3.0, 1.0 / 2.0, -3.0 / 2.0, -2.0};
|
||||
|
||||
cujac.apply(vVector, dVector);
|
||||
BOOST_CHECK_CLOSE(vVector[0], expectedAns[0], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[1], expectedAns[1], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[2], expectedAns[2], 1e-7);
|
||||
BOOST_CHECK_CLOSE(vVector[3], expectedAns[3], 1e-7);
|
||||
}
|
Loading…
Reference in New Issue
Block a user