diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c0418d3d..01fe8e656 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -659,7 +659,10 @@ if(CUDA_FOUND) cuda_safe_call cuda_check_last_error cublas_handle + cujac cusparse_handle + cuSparse_matrix_operations + cuVector_operations cuvector cusparsematrix cuseqilu0 diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index ffedbce48..624d9d416 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -153,20 +153,24 @@ endif() if(CUDA_FOUND) # CUISTL SOURCE list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuVector.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/vector_operations.cu) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.cpp) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuJac.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/CuSeqILU0.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/set_device.cpp) # CUISTL HEADERS list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_safe_call.hpp) + list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/cuda_check_last_error.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp) + list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuJac.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuVector.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/CuSparseMatrix.hpp) list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp) @@ -284,18 +288,21 @@ if(CUDA_FOUND) if(USE_BDA_BRIDGE) list(APPEND TEST_SOURCE_FILES tests/test_cusparseSolver.cpp) endif() - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_safe_call.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_safe_call.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_safe_call.cpp) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuda_safe_call.cpp) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuda_check_last_error.cpp) - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cublas_handle.cpp) - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp) - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp) - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparsematrix.cpp) - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp) - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp) - list(APPEND TEST_SOURCE_FILES tests/cuistl/test_converttofloatadapter.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cujac.cpp) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuowneroverlapcopy.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuseqilu0.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparse_handle.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuSparse_matrix_operations.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cusparsematrix.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuvector.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_cuVector_operations.cpp) + list(APPEND TEST_SOURCE_FILES tests/cuistl/test_safe_conversion.cpp) list(APPEND TEST_SOURCE_FILES tests/cuistl/test_solver_adapter.cpp) diff --git a/opm/simulators/linalg/PreconditionerFactory_impl.hpp b/opm/simulators/linalg/PreconditionerFactory_impl.hpp index 521f07caf..27c918f71 100644 --- a/opm/simulators/linalg/PreconditionerFactory_impl.hpp +++ b/opm/simulators/linalg/PreconditionerFactory_impl.hpp @@ -47,6 +47,7 @@ #include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> #include <opm/simulators/linalg/cuistl/PreconditionerConvertFieldTypeAdapter.hpp> #include <opm/simulators/linalg/cuistl/CuBlockPreconditioner.hpp> +#include <opm/simulators/linalg/cuistl/CuJac.hpp> #endif @@ -239,6 +240,17 @@ struct StandardPreconditioners auto wrapped = std::make_shared<Opm::cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm); return wrapped; }); + + F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t, const C& comm) { + const double w = prm.get<double>("relaxation", 1.0); + using field_type = typename V::field_type; + using CuJac = typename Opm::cuistl::CuJac<M, Opm::cuistl::CuVector<field_type>, Opm::cuistl::CuVector<field_type>>; + auto cuJac = std::make_shared<CuJac>(op.getmat(), w); + + auto adapted = std::make_shared<Opm::cuistl::PreconditionerAdapter<V, V, CuJac>>(cuJac); + auto wrapped = std::make_shared<Opm::cuistl::CuBlockPreconditioner<V, V, Comm>>(adapted, comm); + return wrapped; + }); #endif } @@ -445,6 +457,13 @@ struct StandardPreconditioners<Operator,Dune::Amg::SequentialInformation> return converted; }); + + F::addCreator("CUJac", [](const O& op, const P& prm, const std::function<V()>&, std::size_t) { + const double w = prm.get<double>("relaxation", 1.0); + using field_type = typename V::field_type; + using CUJac = typename Opm::cuistl::CuJac<M, Opm::cuistl::CuVector<field_type>, Opm::cuistl::CuVector<field_type>>; + return std::make_shared<Opm::cuistl::PreconditionerAdapter<V, V, CUJac>>(std::make_shared<CUJac>(op.getmat(), w)); + }); #endif } }; diff --git a/opm/simulators/linalg/cuistl/CuJac.cpp b/opm/simulators/linalg/cuistl/CuJac.cpp new file mode 100644 index 000000000..9472d51f4 --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuJac.cpp @@ -0,0 +1,148 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see <http://www.gnu.org/licenses/>. +*/ +#include <cuda.h> +#include <cuda_runtime.h> +#include <cusparse.h> +#include <dune/common/fmatrix.hh> +#include <dune/common/fvector.hh> +#include <dune/istl/bcrsmatrix.hh> +#include <dune/istl/bvector.hh> +#include <fmt/core.h> +#include <opm/common/ErrorMacros.hpp> +#include <opm/simulators/linalg/cuistl/CuJac.hpp> +#include <opm/simulators/linalg/cuistl/CuVector.hpp> +#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> +#include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp> +#include <opm/simulators/linalg/cuistl/detail/cublas_safe_call.hpp> +#include <opm/simulators/linalg/cuistl/detail/cublas_wrapper.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_constants.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_safe_call.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_wrapper.hpp> +#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp> +#include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> +#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> +#include <opm/simulators/linalg/matrixblock.hh> + +namespace Opm::cuistl +{ + +template <class M, class X, class Y, int l> +CuJac<M, X, Y, l>::CuJac(const M& A, field_type w) + : m_cpuMatrix(A) + , m_relaxationFactor(w) + , m_gpuMatrix(CuSparseMatrix<field_type>::fromMatrix(A)) + , m_diagInvFlattened(m_gpuMatrix.N() * m_gpuMatrix.blockSize() * m_gpuMatrix.blockSize()) +{ + // Some sanity check + OPM_ERROR_IF(A.N() != m_gpuMatrix.N(), + fmt::format("CuSparse matrix not same size as DUNE matrix. {} vs {}.", m_gpuMatrix.N(), A.N())); + OPM_ERROR_IF(A[0][0].N() != m_gpuMatrix.blockSize(), + fmt::format("CuSparse matrix not same blocksize as DUNE matrix. {} vs {}.", + m_gpuMatrix.blockSize(), + A[0][0].N())); + OPM_ERROR_IF(A.N() * A[0][0].N() != m_gpuMatrix.dim(), + fmt::format("CuSparse matrix not same dimension as DUNE matrix. {} vs {}.", + m_gpuMatrix.dim(), + A.N() * A[0][0].N())); + OPM_ERROR_IF(A.nonzeroes() != m_gpuMatrix.nonzeroes(), + fmt::format("CuSparse matrix not same number of non zeroes as DUNE matrix. {} vs {}. ", + m_gpuMatrix.nonzeroes(), + A.nonzeroes())); + + // Compute the inverted diagonal of A and store it in a vector format in m_diagInvFlattened + detail::invertDiagonalAndFlatten(m_gpuMatrix.getNonZeroValues().data(), + m_gpuMatrix.getRowIndices().data(), + m_gpuMatrix.getColumnIndices().data(), + m_gpuMatrix.N(), + m_gpuMatrix.blockSize(), + m_diagInvFlattened.data()); +} + +template <class M, class X, class Y, int l> +void +CuJac<M, X, Y, l>::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) +{ +} + +template <class M, class X, class Y, int l> +void +CuJac<M, X, Y, l>::apply(X& v, const Y& d) +{ + // Jacobi preconditioner: x_{n+1} = x_n + w * (D^-1 * (b - Ax_n) ) + // Working with defect d and update v it we only need to set v = w*(D^-1)*d + + // Compute the MV product where the matrix is diagonal and therefore stored as a vector. + // The product is thus computed as a hadamard product. + detail::weightedDiagMV(m_diagInvFlattened.data(), + m_gpuMatrix.N(), + m_gpuMatrix.blockSize(), + m_relaxationFactor, + d.data(), + v.data()); +} + +template <class M, class X, class Y, int l> +void +CuJac<M, X, Y, l>::post([[maybe_unused]] X& x) +{ +} + +template <class M, class X, class Y, int l> +Dune::SolverCategory::Category +CuJac<M, X, Y, l>::category() const +{ + return Dune::SolverCategory::sequential; +} + +template <class M, class X, class Y, int l> +void +CuJac<M, X, Y, l>::update() +{ + m_gpuMatrix.updateNonzeroValues(m_cpuMatrix); + detail::invertDiagonalAndFlatten(m_gpuMatrix.getNonZeroValues().data(), + m_gpuMatrix.getRowIndices().data(), + m_gpuMatrix.getColumnIndices().data(), + m_gpuMatrix.N(), + m_gpuMatrix.blockSize(), + m_diagInvFlattened.data()); +} + +} // namespace Opm::cuistl +#define INSTANTIATE_CUJAC_DUNE(realtype, blockdim) \ + template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Dune::FieldMatrix<realtype, blockdim, blockdim>>, \ + ::Opm::cuistl::CuVector<realtype>, \ + ::Opm::cuistl::CuVector<realtype>>; \ + template class ::Opm::cuistl::CuJac<Dune::BCRSMatrix<Opm::MatrixBlock<realtype, blockdim, blockdim>>, \ + ::Opm::cuistl::CuVector<realtype>, \ + ::Opm::cuistl::CuVector<realtype>> + +INSTANTIATE_CUJAC_DUNE(double, 1); +INSTANTIATE_CUJAC_DUNE(double, 2); +INSTANTIATE_CUJAC_DUNE(double, 3); +INSTANTIATE_CUJAC_DUNE(double, 4); +INSTANTIATE_CUJAC_DUNE(double, 5); +INSTANTIATE_CUJAC_DUNE(double, 6); + +INSTANTIATE_CUJAC_DUNE(float, 1); +INSTANTIATE_CUJAC_DUNE(float, 2); +INSTANTIATE_CUJAC_DUNE(float, 3); +INSTANTIATE_CUJAC_DUNE(float, 4); +INSTANTIATE_CUJAC_DUNE(float, 5); +INSTANTIATE_CUJAC_DUNE(float, 6); diff --git a/opm/simulators/linalg/cuistl/CuJac.hpp b/opm/simulators/linalg/cuistl/CuJac.hpp new file mode 100644 index 000000000..31990dc11 --- /dev/null +++ b/opm/simulators/linalg/cuistl/CuJac.hpp @@ -0,0 +1,109 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see <http://www.gnu.org/licenses/>. +*/ +#ifndef OPM_CUJAC_HPP +#define OPM_CUJAC_HPP + +#include <dune/istl/preconditioner.hh> +#include <opm/simulators/linalg/PreconditionerWithUpdate.hpp> +#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> +#include <opm/simulators/linalg/cuistl/detail/CuMatrixDescription.hpp> +#include <opm/simulators/linalg/cuistl/detail/CuSparseHandle.hpp> +#include <opm/simulators/linalg/cuistl/detail/CuSparseResource.hpp> + + + +namespace Opm::cuistl +{ +//! \brief Jacobi preconditioner on the GPU. +//! +//! \note This is a fast but weak preconditioner +//! +//! \tparam M The matrix type to operate on +//! \tparam X Type of the update +//! \tparam Y Type of the defect +//! \tparam l Ignored. Just there to have the same number of template arguments +//! as other preconditioners. +//! +//! \note We assume X and Y are both CuVector<real_type>, but we leave them as template +//! arguments in case of future additions. +template <class M, class X, class Y, int l = 1> +class CuJac : public Dune::PreconditionerWithUpdate<X, Y> +{ +public: + //! \brief The matrix type the preconditioner is for. + using matrix_type = typename std::remove_const<M>::type; + //! \brief The domain type of the preconditioner. + using domain_type = X; + //! \brief The range type of the preconditioner. + using range_type = Y; + //! \brief The field type of the preconditioner. + using field_type = typename X::field_type; + + //! \brief Constructor. + //! + //! Constructor gets all parameters to operate the prec. + //! \param A The matrix to operate on. + //! \param w The relaxation factor. + //! + CuJac(const M& A, field_type w); + + //! \brief Prepare the preconditioner. + //! \note Does nothing at the time being. + virtual void pre(X& x, Y& b) override; + + //! \brief Apply the preconditoner. + virtual void apply(X& v, const Y& d) override; + + //! \brief Post processing + //! \note Does nothing at the moment + virtual void post(X& x) override; + + //! Category of the preconditioner (see SolverCategory::Category) + virtual Dune::SolverCategory::Category category() const override; + + //! \brief Updates the matrix data. + virtual void update() override; + + + //! \returns false + static constexpr bool shouldCallPre() + { + return false; + } + + //! \returns false + static constexpr bool shouldCallPost() + { + return false; + } + + +private: + //! \brief Reference to the underlying matrix + const M& m_cpuMatrix; + //! \brief The relaxation factor to use. + const field_type m_relaxationFactor; + //! \brief The A matrix stored on the gpu + CuSparseMatrix<field_type> m_gpuMatrix; + //! \brief the diagonal of cuMatrix inverted, and then flattened to fit in a vector + CuVector<field_type> m_diagInvFlattened; +}; +} // end namespace Opm::cuistl + +#endif diff --git a/opm/simulators/linalg/cuistl/CuVector.hpp b/opm/simulators/linalg/cuistl/CuVector.hpp index 1b46c6b1d..4ab8392f6 100644 --- a/opm/simulators/linalg/cuistl/CuVector.hpp +++ b/opm/simulators/linalg/cuistl/CuVector.hpp @@ -26,6 +26,7 @@ #include <opm/simulators/linalg/cuistl/detail/CuBlasHandle.hpp> #include <opm/simulators/linalg/cuistl/detail/safe_conversion.hpp> #include <vector> +#include <string> namespace Opm::cuistl @@ -361,6 +362,18 @@ public: */ void setZeroAtIndexSet(const CuVector<int>& indexSet); + // Slow method that creates a string representation of a CuVector for debug purposes + std::string toDebugString() + { + std::vector<T> v = asStdVector(); + std::string res = ""; + for (T element : v){ + res += std::to_string(element) + " "; + } + res += std::to_string(v[v.size()-1]); + return res; + } + private: T* m_dataOnDevice = nullptr; @@ -374,5 +387,6 @@ private: void assertHasElements() const; }; + } // namespace Opm::cuistl #endif diff --git a/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu new file mode 100644 index 000000000..c31172204 --- /dev/null +++ b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.cu @@ -0,0 +1,174 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see <http://www.gnu.org/licenses/>. +*/ +#include <opm/common/ErrorMacros.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> +#include <stdexcept> +namespace Opm::cuistl::detail +{ + +namespace +{ + // TODO: combine the three following kernels into one using template arguments so that + // no compile time optimization is lost while making the code more compact + template <class T> + __global__ void cuInvertDiagonalAndFlattenBlocksize1( + T* matNonZeroValues, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec) + { + const auto row = blockDim.x * blockIdx.x + threadIdx.x; + const int blocksize = 1; + + if (row < numberOfRows) { + size_t nnzIdx = rowIndices[row]; + size_t nnzIdxLim = rowIndices[row + 1]; + + // this loop will cause some extra checks that we are within the limit in the case of the diagonal having a + // zero element + while (colIndices[nnzIdx] != row && nnzIdx <= nnzIdxLim) { + ++nnzIdx; + } + + // diagBlock points to the start of where the diagonal block is stored + T* diagBlock = &matNonZeroValues[blocksize * blocksize * nnzIdx]; + // vecBlock points to the start of the block element in the vector where the inverse of the diagonal block + // element should be stored + T* vecBlock = &vec[blocksize * blocksize * row]; + + vecBlock[0] = 1.0 / (diagBlock[0]); + } + } + + template <class T> + __global__ void cuInvertDiagonalAndFlattenBlocksize2( + T* matNonZeroValues, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec) + { + const auto row = blockDim.x * blockIdx.x + threadIdx.x; + const int blocksize = 2; + + if (row < numberOfRows) { + size_t nnzIdx = rowIndices[row]; + size_t nnzIdxLim = rowIndices[row + 1]; + + // this loop will cause some extra checks that we are within the limit in the case of the diagonal having a + // zero element + while (colIndices[nnzIdx] != row && nnzIdx <= nnzIdxLim) { + ++nnzIdx; + } + + // diagBlock points to the start of where the diagonal block is stored + T* diagBlock = &matNonZeroValues[blocksize * blocksize * nnzIdx]; + // vecBlock points to the start of the block element in the vector where the inverse of the diagonal block + // element should be stored + T* vecBlock = &vec[blocksize * blocksize * row]; + + // based on Dune implementation + T detInv = 1.0 / (diagBlock[0] * diagBlock[3] - diagBlock[1] * diagBlock[2]); + vecBlock[0] = diagBlock[3] * detInv; + vecBlock[1] = -diagBlock[1] * detInv; + vecBlock[2] = -diagBlock[2] * detInv; + vecBlock[3] = diagBlock[0] * detInv; + } + } + template <class T> + __global__ void cuInvertDiagonalAndFlattenBlocksize3( + T* matNonZeroValues, int* rowIndices, int* colIndices, size_t numberOfRows, T* vec) + { + const auto row = blockDim.x * blockIdx.x + threadIdx.x; + const int blocksize = 3; + + if (row < numberOfRows) { + size_t nnzIdx = rowIndices[row]; + size_t nnzIdxLim = rowIndices[row + 1]; + + // this loop will cause some extra checks that we are within the limit in the case of the diagonal having a + // zero element + while (colIndices[nnzIdx] != row && nnzIdx <= nnzIdxLim) { + ++nnzIdx; + } + + // diagBlock points to the start of where the diagonal block is stored + T* diagBlock = &matNonZeroValues[blocksize * blocksize * nnzIdx]; + // vecBlock points to the start of the block element in the vector where the inverse of the diagonal block + // element should be stored + T* vecBlock = &vec[blocksize * blocksize * row]; + + // based on Dune implementation + T t4 = diagBlock[0] * diagBlock[4]; + T t6 = diagBlock[0] * diagBlock[5]; + T t8 = diagBlock[1] * diagBlock[3]; + T t10 = diagBlock[2] * diagBlock[3]; + T t12 = diagBlock[1] * diagBlock[6]; + T t14 = diagBlock[2] * diagBlock[6]; + + T t17 = 1.0 + / (t4 * diagBlock[8] - t6 * diagBlock[7] - t8 * diagBlock[8] + t10 * diagBlock[7] + t12 * diagBlock[5] + - t14 * diagBlock[4]); // t17 is 1/determinant + + vecBlock[0] = (diagBlock[4] * diagBlock[8] - diagBlock[5] * diagBlock[7]) * t17; + vecBlock[1] = -(diagBlock[1] * diagBlock[8] - diagBlock[2] * diagBlock[7]) * t17; + vecBlock[2] = (diagBlock[1] * diagBlock[5] - diagBlock[2] * diagBlock[4]) * t17; + vecBlock[3] = -(diagBlock[3] * diagBlock[8] - diagBlock[5] * diagBlock[6]) * t17; + vecBlock[4] = (diagBlock[0] * diagBlock[8] - t14) * t17; + vecBlock[5] = -(t6 - t10) * t17; + vecBlock[6] = (diagBlock[3] * diagBlock[7] - diagBlock[4] * diagBlock[6]) * t17; + vecBlock[7] = -(diagBlock[0] * diagBlock[7] - t12) * t17; + vecBlock[8] = (t4 - t8) * t17; + } + } + + constexpr inline size_t getThreads([[maybe_unused]] size_t numberOfRows) + { + return 1024; + } + + inline size_t getBlocks(size_t numberOfRows) + { + const auto threads = getThreads(numberOfRows); + return (numberOfRows + threads - 1) / threads; + } +} // namespace + +template <class T> +void +invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, size_t blocksize, T* vec) +{ + switch (blocksize) { + case 1: + cuInvertDiagonalAndFlattenBlocksize1<<<getBlocks(numberOfRows), getThreads(numberOfRows)>>>( + mat, rowIndices, colIndices, numberOfRows, vec); + break; + case 2: + cuInvertDiagonalAndFlattenBlocksize2<<<getBlocks(numberOfRows), getThreads(numberOfRows)>>>( + mat, rowIndices, colIndices, numberOfRows, vec); + break; + case 3: + cuInvertDiagonalAndFlattenBlocksize3<<<getBlocks(numberOfRows), getThreads(numberOfRows)>>>( + mat, rowIndices, colIndices, numberOfRows, vec); + break; + default: + // TODO: Figure out what is why it did not produce an error or any output in the output stream or the DBG file + // when I forced this case to execute + OPM_THROW(std::invalid_argument, "Inverting diagonal is not implemented for blocksizes > 3"); + break; + } +} + +template void invertDiagonalAndFlatten(double*, int*, int*, size_t, size_t, double*); +template void invertDiagonalAndFlatten(float*, int*, int*, size_t, size_t, float*); + +} // namespace Opm::cuistl::detail diff --git a/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp new file mode 100644 index 000000000..59fd45d68 --- /dev/null +++ b/opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp @@ -0,0 +1,39 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see <http://www.gnu.org/licenses/>. +*/ +#ifndef OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP +#define OPM_CUISTL_CUSPARSE_MATRIX_OPERATIONS_HPP +#include <cstddef> +#include <vector> +namespace Opm::cuistl::detail +{ + +/** + * @brief This function receives a matrix, and the inverse of the matrix containing only its diagonal is stored in d_vec + * @param mat pointer to GPU memory containing nonzerovalues of the sparse matrix + * @param rowIndices Pointer to vector on GPU containing row indices compliant wiht bsr format + * @param colIndices Pointer to vector on GPU containing col indices compliant wiht bsr format + * @param numberOfRows Integer describing the number of rows in the matrix + * @param blocksize Integer describing the sidelength of a block in the sparse matrix + * @param[out] vec Pointer to the vector where the inverse of the diagonal matrix should be stored + */ +template <class T> +void invertDiagonalAndFlatten(T* mat, int* rowIndices, int* colIndices, size_t numberOfRows, size_t blocksize, T* vec); + +} // namespace Opm::cuistl::detail +#endif diff --git a/opm/simulators/linalg/cuistl/detail/vector_operations.cu b/opm/simulators/linalg/cuistl/detail/vector_operations.cu index c557c9b7a..18ccf2ac2 100644 --- a/opm/simulators/linalg/cuistl/detail/vector_operations.cu +++ b/opm/simulators/linalg/cuistl/detail/vector_operations.cu @@ -16,8 +16,10 @@ You should have received a copy of the GNU General Public License along with OPM. If not, see <http://www.gnu.org/licenses/>. */ +#include <opm/common/ErrorMacros.hpp> #include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> // TODO: [perf] Get rid of thrust. +#include <stdexcept> #include <thrust/device_ptr.h> #include <thrust/reduce.h> namespace Opm::cuistl::detail @@ -45,6 +47,27 @@ namespace } } + template <class T, int blocksize> + __global__ void weightedDiagMV( + const T* squareBlockVector, const size_t numberOfElements, const T w, const T* globalSrcVec, T* globalDstVec) + { + const auto globalIndex = blockDim.x * blockIdx.x + threadIdx.x; + + if (globalIndex < numberOfElements) { + const T* localBlock = (squareBlockVector + (blocksize * blocksize * globalIndex)); + const T* localSrcVec = (globalSrcVec + (blocksize * globalIndex)); + T* localDstVec = (globalDstVec + (blocksize * globalIndex)); + + for (int i = 0; i < blocksize; ++i) { + T rowResult = 0.0; + for (int j = 0; j < blocksize; ++j) { + rowResult += localBlock[i * blocksize + j] * localSrcVec[j]; + } + localDstVec[i] = rowResult * w; + } + } + } + template <class T> __global__ void elementWiseMultiplyKernel(const T* a, const T* b, T* buffer, size_t numberOfElements, const int* indices) @@ -109,4 +132,36 @@ template double innerProductAtIndices(const double*, const double*, double* buff template float innerProductAtIndices(const float*, const float*, float* buffer, size_t, const int*); template int innerProductAtIndices(const int*, const int*, int* buffer, size_t, const int*); -} // namespace Opm::cuistl::impl + +template <class T> +void +weightedDiagMV(const T* squareBlockVector, + const size_t numberOfElements, + const size_t blocksize, + T relaxationFactor, + const T* srcVec, + T* dstVec) +{ + switch (blocksize) { + case 1: + weightedDiagMV<T, 1><<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>( + squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec); + break; + case 2: + weightedDiagMV<T, 2><<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>( + squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec); + break; + case 3: + weightedDiagMV<T, 3><<<getBlocks(numberOfElements), getThreads(numberOfElements)>>>( + squareBlockVector, numberOfElements, relaxationFactor, srcVec, dstVec); + break; + default: + OPM_THROW(std::invalid_argument, "blockvector Hadamard product not implemented for blocksize>3"); + break; + } +} + +template void weightedDiagMV(const double*, const size_t, const size_t, double, const double*, double*); +template void weightedDiagMV(const float*, const size_t, const size_t, float, const float*, float*); + +} // namespace Opm::cuistl::detail diff --git a/opm/simulators/linalg/cuistl/detail/vector_operations.hpp b/opm/simulators/linalg/cuistl/detail/vector_operations.hpp index 5ed31e8c6..0ea08155b 100644 --- a/opm/simulators/linalg/cuistl/detail/vector_operations.hpp +++ b/opm/simulators/linalg/cuistl/detail/vector_operations.hpp @@ -54,5 +54,25 @@ void setZeroAtIndexSet(T* deviceData, size_t numberOfElements, const int* indice */ template <class T> T innerProductAtIndices(const T* deviceA, const T* deviceB, T* buffer, size_t numberOfElements, const int* indices); + +/** + * @brief Compue the weighted matrix vector product where the matrix is diagonal, the diagonal is a vector, meaning we + * compute the Hadamard product. + * @param squareBlockVector A CuVector whose elements are NxN matrix blocks + * @param numberOfRows The number of rows in the vector + * @param blocksize The sidelength of the square block elements in the vector + * @param src_vec A pointer to the data of the CuVector we multiply the blockvector with + * @param[out] dst_vec A pointer to the data of the CuVector we store the result in + * + * @note This is implemented as a faster way to multiply a diagonal matrix with a blockvector. We need only store the + * diagonal of the matrix and use this product. + */ +template <class T> +void weightedDiagMV(const T* squareBlockVector, + const size_t numberOfRows, + const size_t blocksize, + T relaxationFactor, + const T* srcVec, + T* dstVec); } // namespace Opm::cuistl::detail #endif diff --git a/tests/cuistl/test_cuSparse_matrix_operations.cpp b/tests/cuistl/test_cuSparse_matrix_operations.cpp new file mode 100644 index 000000000..eb7fd3917 --- /dev/null +++ b/tests/cuistl/test_cuSparse_matrix_operations.cpp @@ -0,0 +1,182 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see <http://www.gnu.org/licenses/>. +*/ +#include <config.h> + +#define BOOST_TEST_MODULE TestCuSparseMatrixOperations +#include <boost/mpl/list.hpp> +#include <boost/test/unit_test.hpp> +#include <cuda_runtime.h> +#include <dune/istl/bcrsmatrix.hh> +#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> +#include <opm/simulators/linalg/cuistl/CuVector.hpp> +#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> +#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp> + +using NumericTypes = boost::mpl::list<double, float>; + +BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith3By3Blocks, T, NumericTypes) +{ + const size_t blocksize = 3; + const size_t N = 2; + const int nonZeroes = 3; + using M = Dune::FieldMatrix<T, blocksize, blocksize>; + using SpMatrix = Dune::BCRSMatrix<M>; + /* + create this sparse matrix + | |1 2 3| | 1 0 0| | + | |5 2 3| | 0 1 0| | + | |2 1 1| | 0 0 1| | + | | + | |0 0 0| |-1 0 0| | + | |0 0 0| | 0 -1 0| | + | |0 0 0| | 0 0 -1| | + + The diagonal elements inverted, and put in a vector should look like this + | |-1/4 1/4 0| | + | | 1/4 -4/5 3| | + | | 1/4 3/4 -2| | + | | + | | -1 0 0| | + | | 0 -1 0| | + | | 0 0 -1| | + */ + + SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); + for (auto row = B.createbegin(); row != B.createend(); ++row) { + row.insert(row.index()); + if (row.index() == 0) { + row.insert(row.index() + 1); + } + } + + B[0][0][0][0] = 1.0; + B[0][0][0][1] = 2.0; + B[0][0][0][2] = 3.0; + B[0][0][1][0] = 5.0; + B[0][0][1][1] = 2.0; + B[0][0][1][2] = 3.0; + B[0][0][2][0] = 2.0; + B[0][0][2][1] = 1.0; + B[0][0][2][2] = 1.0; + + B[0][1][0][0] = 1.0; + B[0][1][1][1] = 1.0; + B[0][1][2][2] = 1.0; + + B[1][1][0][0] = -1.0; + B[1][1][1][1] = -1.0; + B[1][1][2][2] = -1.0; + + Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B); + Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N); + + Opm::cuistl::detail::invertDiagonalAndFlatten(m.getNonZeroValues().data(), + m.getRowIndices().data(), + m.getColumnIndices().data(), + N, + blocksize, + dInvDiag.data()); + + std::vector<T> expectedInvDiag {-1.0 / 4.0, + 1.0 / 4.0, + 0.0, + 1.0 / 4.0, + -5.0 / 4.0, + 3.0, + 1.0 / 4.0, + 3.0 / 4.0, + -2.0, + -1.0, + 0.0, + 0.0, + 0.0, + -1.0, + 0.0, + 0.0, + 0.0, + -1.0}; + std::vector<T> computedInvDiag = dInvDiag.asStdVector(); + + BOOST_REQUIRE_EQUAL(expectedInvDiag.size(), computedInvDiag.size()); + for (size_t i = 0; i < expectedInvDiag.size(); ++i) { + BOOST_CHECK_CLOSE(expectedInvDiag[i], computedInvDiag[i], 1e-7); + } +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(FlattenAndInvertDiagonalWith2By2Blocks, T, NumericTypes) +{ + const size_t blocksize = 2; + const size_t N = 2; + const int nonZeroes = 3; + using M = Dune::FieldMatrix<T, blocksize, blocksize>; + using SpMatrix = Dune::BCRSMatrix<M>; + /* + create this sparse matrix + | | 1 2| | 1 0| | + | |1/2 2| | 0 1| | + | | + | | 0 0| |-1 0| | + | | 0 0| | 0 -1| | + + The diagonal elements inverted, and put in a vector should look like this + | | 2 - 2| | + | |-1/2 1| | + | | + | | -1 0| | + | | 0 -1| | + */ + + SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); + for (auto row = B.createbegin(); row != B.createend(); ++row) { + row.insert(row.index()); + if (row.index() == 0) { + row.insert(row.index() + 1); + } + } + + B[0][0][0][0] = 1.0; + B[0][0][0][1] = 2.0; + B[0][0][1][0] = 1.0 / 2.0; + B[0][0][1][1] = 2.0; + + B[0][1][0][0] = 1.0; + B[0][1][1][1] = 1.0; + + B[1][1][0][0] = -1.0; + B[1][1][1][1] = -1.0; + + Opm::cuistl::CuSparseMatrix<T> m = Opm::cuistl::CuSparseMatrix<T>::fromMatrix(B); + Opm::cuistl::CuVector<T> dInvDiag(blocksize * blocksize * N); + + Opm::cuistl::detail::invertDiagonalAndFlatten(m.getNonZeroValues().data(), + m.getRowIndices().data(), + m.getColumnIndices().data(), + N, + blocksize, + dInvDiag.data()); + + std::vector<T> expectedInvDiag {2.0, -2.0, -1.0 / 2.0, 1.0, -1.0, 0.0, 0.0, -1.0}; + std::vector<T> computedInvDiag = dInvDiag.asStdVector(); + + BOOST_REQUIRE_EQUAL(expectedInvDiag.size(), computedInvDiag.size()); + for (size_t i = 0; i < expectedInvDiag.size(); ++i) { + BOOST_CHECK_CLOSE(expectedInvDiag[i], computedInvDiag[i], 1e-7); + } +} diff --git a/tests/cuistl/test_cuVector_operations.cpp b/tests/cuistl/test_cuVector_operations.cpp new file mode 100644 index 000000000..c8a9783ce --- /dev/null +++ b/tests/cuistl/test_cuVector_operations.cpp @@ -0,0 +1,98 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see <http://www.gnu.org/licenses/>. +*/ +#include <config.h> + +#define BOOST_TEST_MODULE TestCuVectorOperations +#include <boost/mpl/list.hpp> +#include <boost/test/unit_test.hpp> +#include <cuda_runtime.h> +#include <dune/istl/bcrsmatrix.hh> +#include <opm/simulators/linalg/cuistl/CuJac.hpp> +#include <opm/simulators/linalg/cuistl/CuVector.hpp> +#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> +#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> + +using NumericTypes = boost::mpl::list<double, float>; + +BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf3By3BlockVectorAndVectorVector, T, NumericTypes) +{ + /* + Example in the test for multiplying by element a blockvector with a vector of vectors + | |1 2 3| | | |3| | | |10| | + | |5 2 3| | X | |2| | = | |22| | + | |2 1 1| | | |1| | | |10| | + */ + + const size_t blocksize = 3; + const size_t N = 1; + const T weight = 1.0; + + std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 5.0, 2.0, 3.0, 2.0, 1.0, 2.0}); + std::vector<T> hostVecVector({3.0, 2.0, 1.0}); + std::vector<T> hostDstVector({0, 0, 0}); + Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector); + Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector); + Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector); + + Opm::cuistl::detail::weightedDiagMV( + deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data()); + + std::vector<T> expectedVec {10.0, 22.0, 10.0}; + std::vector<T> computedVec = deviceDstVector.asStdVector(); + + BOOST_REQUIRE_EQUAL(expectedVec.size(), computedVec.size()); + for (size_t i = 0; i < expectedVec.size(); i++) { + BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7); + } +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(ElementWiseMultiplicationOf2By2BlockVectorAndVectorVector, T, NumericTypes) +{ + /* + Example in the test for multiplying by element a blockvector with a vector of vectors + | |1 2| | | |1| | | | 3.5| | + 0.5 * | |3 4| | X | |3| | = | | 7.5| | + | | | | | | + | |4 3| | | |2| | | |10.0| | + | |2 1| | | |4| | | | 4.0| | + */ + + const size_t blocksize = 2; + const size_t N = 2; + const T weight = 0.5; + + std::vector<T> hostBlockVector({1.0, 2.0, 3.0, 4.0, 4.0, 3.0, 2.0, 1.0}); + std::vector<T> hostVecVector({1.0, 3.0, 2.0, 4.0}); + std::vector<T> hostDstVector({0, 0, 0, 0}); + Opm::cuistl::CuVector<T> deviceBlockVector(hostBlockVector); + Opm::cuistl::CuVector<T> deviceVecVector(hostVecVector); + Opm::cuistl::CuVector<T> deviceDstVector(hostDstVector); + + Opm::cuistl::detail::weightedDiagMV( + deviceBlockVector.data(), N, blocksize, weight, deviceVecVector.data(), deviceDstVector.data()); + + std::vector<T> expectedVec {3.5, 7.5, 10.0, 4.0}; + std::vector<T> computedVec = deviceDstVector.asStdVector(); + + BOOST_REQUIRE_EQUAL(expectedVec.size(), computedVec.size()); + for (size_t i = 0; i < expectedVec.size(); i++) { + BOOST_CHECK_CLOSE(expectedVec[i], computedVec[i], 1e-7); + } +} \ No newline at end of file diff --git a/tests/cuistl/test_cujac.cpp b/tests/cuistl/test_cujac.cpp new file mode 100644 index 000000000..7f093e02b --- /dev/null +++ b/tests/cuistl/test_cujac.cpp @@ -0,0 +1,148 @@ +/* + Copyright 2022-2023 SINTEF AS + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see <http://www.gnu.org/licenses/>. +*/ +#include <config.h> + +#define BOOST_TEST_MODULE TestCuJac +#include <boost/mpl/list.hpp> +#include <boost/test/unit_test.hpp> +#include <cuda_runtime.h> +#include <dune/istl/bcrsmatrix.hh> +#include <opm/simulators/linalg/cuistl/CuJac.hpp> +#include <opm/simulators/linalg/cuistl/CuSparseMatrix.hpp> +#include <opm/simulators/linalg/cuistl/CuVector.hpp> +#include <opm/simulators/linalg/cuistl/PreconditionerAdapter.hpp> +#include <opm/simulators/linalg/cuistl/detail/cusparse_matrix_operations.hpp> +#include <opm/simulators/linalg/cuistl/detail/fix_zero_diagonal.hpp> +#include <opm/simulators/linalg/cuistl/detail/vector_operations.hpp> + +using NumericTypes = boost::mpl::list<double, float>; + +BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize2, T, NumericTypes) +{ + /* + Test data to validate jacobi preconditioner, expected result is x_1, and relaxation factor is 0.5 + | |3 1| | 1 0| | | |2| | | | 0.5| | + | |2 1| | 0 1| | | |1| | | |-0.5| | + A = | | d = | | v = | | + | |0 0| |-1 0| | | |3| | | |-1.5| | + | |0 0| | 0 -1| | | |4| | | |-2.0| | + */ + const int N = 2; + const int blocksize = 2; + const int nonZeroes = 3; + using M = Dune::FieldMatrix<T, blocksize, blocksize>; + using SpMatrix = Dune::BCRSMatrix<M>; + using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>; + using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>; + + SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); + for (auto row = B.createbegin(); row != B.createend(); ++row) { + row.insert(row.index()); + if (row.index() == 0) { + row.insert(row.index() + 1); + } + } + + B[0][0][0][0] = 3.0; + B[0][0][0][1] = 1.0; + B[0][0][1][0] = 2.0; + B[0][0][1][1] = 1.0; + + B[0][1][0][0] = 1.0; + B[0][1][1][1] = 1.0; + + B[1][1][0][0] = -1.0; + B[1][1][1][1] = -1.0; + + auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5)); + + Vector vVector(2); + Vector dVector(2); + dVector[0][0] = 2.0; + dVector[0][1] = 1.0; + dVector[1][0] = 3.0; + dVector[1][1] = 4.0; + + const T expectedAns[2][2] = {{1.0 / 2.0, -1.0 / 2.0}, {-3.0 / 2.0, -2.0}}; + + cujac.apply(vVector, dVector); + BOOST_CHECK_CLOSE(vVector[0][0], expectedAns[0][0], 1e-7); + BOOST_CHECK_CLOSE(vVector[0][1], expectedAns[0][1], 1e-7); + BOOST_CHECK_CLOSE(vVector[1][0], expectedAns[1][0], 1e-7); + BOOST_CHECK_CLOSE(vVector[1][1], expectedAns[1][1], 1e-7); +} + +BOOST_AUTO_TEST_CASE_TEMPLATE(CUJACApplyBlocksize1, T, NumericTypes) +{ + /* + Test data to validate jacobi preconditioner, expected result is x_1, relaxation factor is 0.5 + | 3 1 1 0| |1| | 1/3| + | 2 1 0 1| |2| | 1/2| + A = | 0 0 -1 0| d = |1| v = |-3/2| + | 0 0 0 -1| |1| | -2| + */ + const int N = 4; + const int blocksize = 1; + const int nonZeroes = 8; + using M = Dune::FieldMatrix<T, blocksize, blocksize>; + using SpMatrix = Dune::BCRSMatrix<M>; + using Vector = Dune::BlockVector<Dune::FieldVector<T, blocksize>>; + using CuJac = Opm::cuistl::CuJac<SpMatrix, Opm::cuistl::CuVector<T>, Opm::cuistl::CuVector<T>>; + + SpMatrix B(N, N, nonZeroes, SpMatrix::row_wise); + for (auto row = B.createbegin(); row != B.createend(); ++row) { + row.insert(row.index()); + if (row.index() == 0) { + row.insert(row.index() + 1); + row.insert(row.index() + 2); + } + if (row.index() == 1) { + row.insert(row.index() - 1); + row.insert(row.index() + 2); + } + } + + B[0][0][0][0] = 3.0; + B[0][1][0][0] = 1.0; + B[0][2][0][0] = 1.0; + + B[1][0][0][0] = 2.0; + B[1][1][0][0] = 1.0; + B[1][3][0][0] = 1.0; + + B[2][2][0][0] = -1.0; + B[3][3][0][0] = -1.0; + + auto cujac = Opm::cuistl::PreconditionerAdapter<Vector, Vector, CuJac>(std::make_shared<CuJac>(B, 0.5)); + + Vector vVector(4); + Vector dVector(4); + dVector[0] = 2.0; + dVector[1] = 1.0; + dVector[2] = 3.0; + dVector[3] = 4.0; + + const T expectedAns[4] = {1.0 / 3.0, 1.0 / 2.0, -3.0 / 2.0, -2.0}; + + cujac.apply(vVector, dVector); + BOOST_CHECK_CLOSE(vVector[0], expectedAns[0], 1e-7); + BOOST_CHECK_CLOSE(vVector[1], expectedAns[1], 1e-7); + BOOST_CHECK_CLOSE(vVector[2], expectedAns[2], 1e-7); + BOOST_CHECK_CLOSE(vVector[3], expectedAns[3], 1e-7); +} \ No newline at end of file