refactor cuseqilu0

This commit is contained in:
Tobias Meyer Andersen
2024-08-22 15:07:53 +02:00
parent 1721a1071f
commit 0c1ea3ee4d
7 changed files with 63 additions and 63 deletions

View File

@@ -555,7 +555,7 @@ if(CUDA_FOUND)
cuVector_operations
cuvector
cusparsematrix
cuseqilu0
GpuSeqILU0
GpuOwnerOverlapCopy
solver_adapter
GpuBuffer

View File

@@ -220,7 +220,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuJac.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg set_device.cpp)
# HEADERS
@@ -256,7 +256,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/deviceBlockOperations.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/gpuThreadUtils.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuSeqILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuSeqILU0.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/fix_zero_diagonal.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg PreconditionerConvertFieldTypeAdapter.hpp)
ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuOwnerOverlapCopy.hpp)
@@ -397,7 +397,7 @@ if (HAVE_CUDA)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuDILU.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuJac.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuOwnerOverlapCopy.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuseqilu0.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuSeqILU0.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_handle.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuSparse_matrix_operations.cpp)
ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparsematrix.cpp)

View File

@@ -26,7 +26,7 @@
#include <opm/simulators/linalg/hipistl/GpuDILU.hpp>
#include <opm/simulators/linalg/hipistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/hipistl/GpuJac.hpp>
#include <opm/simulators/linalg/hipistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/hipistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/hipistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/hipistl/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/hipistl/detail/cuda_safe_call.hpp>
@@ -35,7 +35,7 @@
#include <opm/simulators/linalg/cuistl/GpuDILU.hpp>
#include <opm/simulators/linalg/cuistl/OpmCuILU0.hpp>
#include <opm/simulators/linalg/cuistl/GpuJac.hpp>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>

View File

@@ -322,14 +322,14 @@ struct StandardPreconditioners {
}
#if HAVE_CUDA
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuILU0 = typename gpuistl::
CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
auto cuILU0 = std::make_shared<CuILU0>(op.getmat(), w);
using GpuILU0 = typename gpuistl::
GpuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
auto gpuILU0 = std::make_shared<GpuILU0>(op.getmat(), w);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(cuILU0);
auto adapted = std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuILU0>>(gpuILU0);
auto wrapped = std::make_shared<gpuistl::GpuBlockPreconditioner<V, V, Comm>>(adapted, comm);
return wrapped;
});
@@ -582,27 +582,27 @@ struct StandardPreconditioners<Operator, Dune::Amg::SequentialInformation> {
});
#if HAVE_CUDA
F::addCreator("CUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("GPUILU0", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0);
using field_type = typename V::field_type;
using CuILU0 = typename gpuistl::
CuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, CuILU0>>(
std::make_shared<CuILU0>(op.getmat(), w));
using GpuuILU0 = typename gpuistl::
GpuSeqILU0<M, gpuistl::CuVector<field_type>, gpuistl::CuVector<field_type>>;
return std::make_shared<gpuistl::PreconditionerAdapter<V, V, GpuuILU0>>(
std::make_shared<GpuuILU0>(op.getmat(), w));
});
F::addCreator("CUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
F::addCreator("GPUILU0Float", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) {
const double w = prm.get<double>("relaxation", 1.0);
using block_type = typename V::block_type;
using VTo = Dune::BlockVector<Dune::FieldVector<float, block_type::dimension>>;
using matrix_type_to =
typename Dune::BCRSMatrix<Dune::FieldMatrix<float, block_type::dimension, block_type::dimension>>;
using CuILU0 = typename gpuistl::
CuSeqILU0<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, CuILU0>;
using GpuuILU0 = typename gpuistl::
GpuSeqILU0<matrix_type_to, gpuistl::CuVector<float>, gpuistl::CuVector<float>>;
using Adapter = typename gpuistl::PreconditionerAdapter<VTo, VTo, GpuuILU0>;
using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter<Adapter, M, V, V>;
auto converted = std::make_shared<Converter>(op.getmat());
auto adapted = std::make_shared<Adapter>(std::make_shared<CuILU0>(converted->getConvertedMatrix(), w));
auto adapted = std::make_shared<Adapter>(std::make_shared<GpuuILU0>(converted->getConvertedMatrix(), w));
converted->setUnderlyingPreconditioner(adapted);
return converted;
});

View File

@@ -25,7 +25,7 @@
#include <dune/istl/bvector.hh>
#include <fmt/core.h>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp>
#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp>
@@ -41,7 +41,7 @@ namespace Opm::gpuistl
{
template <class M, class X, class Y, int l>
CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
GpuSeqILU0<M, X, Y, l>::GpuSeqILU0(const M& A, field_type w)
: m_underlyingMatrix(A)
, m_w(w)
, m_LU(CuSparseMatrix<field_type>::fromMatrix(detail::makeMatrixWithNonzeroDiagonal(A)))
@@ -70,13 +70,13 @@ CuSeqILU0<M, X, Y, l>::CuSeqILU0(const M& A, field_type w)
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
GpuSeqILU0<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b)
{
}
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
GpuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
{
// We need to pass the solve routine a scalar to multiply.
@@ -133,20 +133,20 @@ CuSeqILU0<M, X, Y, l>::apply(X& v, const Y& d)
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
GpuSeqILU0<M, X, Y, l>::post([[maybe_unused]] X& x)
{
}
template <class M, class X, class Y, int l>
Dune::SolverCategory::Category
CuSeqILU0<M, X, Y, l>::category() const
GpuSeqILU0<M, X, Y, l>::category() const
{
return Dune::SolverCategory::sequential;
}
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::update()
GpuSeqILU0<M, X, Y, l>::update()
{
m_LU.updateNonzeroValues(detail::makeMatrixWithNonzeroDiagonal(m_underlyingMatrix));
createILU();
@@ -154,7 +154,7 @@ CuSeqILU0<M, X, Y, l>::update()
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::analyzeMatrix()
GpuSeqILU0<M, X, Y, l>::analyzeMatrix()
{
if (!m_buffer) {
@@ -226,7 +226,7 @@ CuSeqILU0<M, X, Y, l>::analyzeMatrix()
template <class M, class X, class Y, int l>
size_t
CuSeqILU0<M, X, Y, l>::findBufferSize()
GpuSeqILU0<M, X, Y, l>::findBufferSize()
{
// We have three calls that need buffers:
// 1) LU decomposition
@@ -290,7 +290,7 @@ CuSeqILU0<M, X, Y, l>::findBufferSize()
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::createILU()
GpuSeqILU0<M, X, Y, l>::createILU()
{
OPM_ERROR_IF(!m_buffer, "Buffer not initialized. Call findBufferSize() then initialize with the appropiate size.");
OPM_ERROR_IF(!m_analysisDone, "Analyzis of matrix not done. Call analyzeMatrix() first.");
@@ -328,7 +328,7 @@ CuSeqILU0<M, X, Y, l>::createILU()
template <class M, class X, class Y, int l>
void
CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
GpuSeqILU0<M, X, Y, l>::updateILUConfiguration()
{
auto bufferSize = findBufferSize();
if (!m_buffer || m_buffer->dim() < bufferSize) {
@@ -338,25 +338,25 @@ CuSeqILU0<M, X, Y, l>::updateILUConfiguration()
createILU();
}
} // namespace Opm::gpuistl
#define INSTANTIATE_CUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
#define INSTANTIATE_GPUSEQILU0_DUNE(realtype, blockdim) \
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>; \
template class ::Opm::gpuistl::CuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
template class ::Opm::gpuistl::GpuSeqILU0<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \
::Opm::gpuistl::CuVector<realtype>, \
::Opm::gpuistl::CuVector<realtype>>
INSTANTIATE_CUSEQILU0_DUNE(double, 1);
INSTANTIATE_CUSEQILU0_DUNE(double, 2);
INSTANTIATE_CUSEQILU0_DUNE(double, 3);
INSTANTIATE_CUSEQILU0_DUNE(double, 4);
INSTANTIATE_CUSEQILU0_DUNE(double, 5);
INSTANTIATE_CUSEQILU0_DUNE(double, 6);
INSTANTIATE_GPUSEQILU0_DUNE(double, 1);
INSTANTIATE_GPUSEQILU0_DUNE(double, 2);
INSTANTIATE_GPUSEQILU0_DUNE(double, 3);
INSTANTIATE_GPUSEQILU0_DUNE(double, 4);
INSTANTIATE_GPUSEQILU0_DUNE(double, 5);
INSTANTIATE_GPUSEQILU0_DUNE(double, 6);
INSTANTIATE_CUSEQILU0_DUNE(float, 1);
INSTANTIATE_CUSEQILU0_DUNE(float, 2);
INSTANTIATE_CUSEQILU0_DUNE(float, 3);
INSTANTIATE_CUSEQILU0_DUNE(float, 4);
INSTANTIATE_CUSEQILU0_DUNE(float, 5);
INSTANTIATE_CUSEQILU0_DUNE(float, 6);
INSTANTIATE_GPUSEQILU0_DUNE(float, 1);
INSTANTIATE_GPUSEQILU0_DUNE(float, 2);
INSTANTIATE_GPUSEQILU0_DUNE(float, 3);
INSTANTIATE_GPUSEQILU0_DUNE(float, 4);
INSTANTIATE_GPUSEQILU0_DUNE(float, 5);
INSTANTIATE_GPUSEQILU0_DUNE(float, 6);

View File

@@ -16,8 +16,8 @@
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_CUSEQILU0_HPP
#define OPM_CUSEQILU0_HPP
#ifndef OPM_GPUSEQILU0_HPP
#define OPM_GPUSEQILU0_HPP
#include <dune/istl/preconditioner.hh>
#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp>
@@ -46,7 +46,7 @@ namespace Opm::gpuistl
//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template
//! arguments in case of future additions.
template <class M, class X, class Y, int l = 1>
class CuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
class GpuSeqILU0 : public Dune::PreconditionerWithUpdate<X, Y>
{
public:
//! \brief The matrix type the preconditioner is for.
@@ -64,7 +64,7 @@ public:
//! \param A The matrix to operate on.
//! \param w The relaxation factor.
//!
CuSeqILU0(const M& A, field_type w);
GpuSeqILU0(const M& A, field_type w);
//! \brief Prepare the preconditioner.
//! \note Does nothing at the time being.

View File

@@ -18,7 +18,7 @@
*/
#include <config.h>
#define BOOST_TEST_MODULE TestCuSeqILU0
#define BOOST_TEST_MODULE TestGpuSeqILU0
#define BOOST_TEST_NO_MAIN
@@ -27,7 +27,7 @@
#include <dune/common/parallel/mpihelper.hh>
#include <dune/istl/bcrsmatrix.hh>
#include <dune/istl/preconditioners.hh>
#include <opm/simulators/linalg/cuistl/CuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/GpuSeqILU0.hpp>
#include <opm/simulators/linalg/cuistl/CuVector.hpp>
#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp>
#include <opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp>
@@ -63,7 +63,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
using M = Dune::FieldMatrix<T, 1, 1>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 1>>;
using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@@ -101,7 +101,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILU.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@@ -113,7 +113,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
// Now we check that we can update the matrix. We basically just negate B
B *= -1.0;
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
cuILU.update();
gpuILU.update();
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
for (int i = 0; i < N; ++i) {
@@ -122,7 +122,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifference1D, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILUNew.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@@ -158,7 +158,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
using M = Dune::FieldMatrix<T, 2, 2>;
using SpMatrix = Dune::BCRSMatrix<M>;
using Vector = Dune::BlockVector<Dune::FieldVector<T, 2>>;
using CuILU0 = Opm::gpuistl::CuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
using GpuILU0 = Opm::gpuistl::GpuSeqILU0<SpMatrix, Opm::gpuistl::CuVector<T>, Opm::gpuistl::CuVector<T>>;
SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise);
for (auto row = B.createbegin(); row != B.createend(); ++row) {
@@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
auto duneILU = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
auto cuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, CuILU0>(std::make_shared<CuILU0>(B, 1.0));
auto gpuILU = Opm::gpuistl::PreconditionerAdapter<Vector, Vector, GpuILU0>(std::make_shared<GpuILU0>(B, 1.0));
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
@@ -191,7 +191,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILU.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],
@@ -203,7 +203,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
// Now we check that we can update the matrix. We basically just negate B
B *= -1.0;
auto duneILUNew = Dune::SeqILU<SpMatrix, Vector, Vector>(B, 1.0);
cuILU.update();
gpuILU.update();
// check for the standard basis {e_i}
// (e_i=(0,...,0, 1 (i-th place), 0, ..., 0))
for (int i = 0; i < N; ++i) {
@@ -212,7 +212,7 @@ BOOST_AUTO_TEST_CASE_TEMPLATE(TestFiniteDifferenceBlock2, T, NumericTypes)
Vector outputVectorDune(N);
Vector outputVectorCuistl(N);
duneILUNew.apply(outputVectorDune, inputVector);
cuILU.apply(outputVectorCuistl, inputVector);
gpuILU.apply(outputVectorCuistl, inputVector);
for (int component = 0; component < N; ++component) {
BOOST_CHECK_CLOSE(outputVectorDune[component][0],