diff --git a/CMakeLists.txt b/CMakeLists.txt index e78d09945..ca8fc7fc8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -549,7 +549,7 @@ if(CUDA_FOUND) cuda_check_last_error cublas_handle cujac - cudilu + GpuDILU cusparse_handle cuSparse_matrix_operations cuVector_operations diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index b7423ceb0..862090b64 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -217,7 +217,7 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuView.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg detail/vector_operations.cu) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSparseMatrix.cpp) - ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuDILU.cpp) + ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg GpuDILU.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg OpmCuILU0.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuJac.cpp) ADD_CUDA_OR_HIP_FILE(MAIN_SOURCE_FILES opm/simulators/linalg CuSeqILU0.cpp) @@ -237,7 +237,7 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/DILUKernels.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/ILU0Kernels.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg detail/preconditionerKernels/JacKernels.hpp) - ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuDILU.hpp) + ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg GpuDILU.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg OpmCuILU0.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuJac.hpp) ADD_CUDA_OR_HIP_FILE(PUBLIC_HEADER_FILES opm/simulators/linalg CuVector.hpp) @@ -394,7 +394,7 @@ if (HAVE_CUDA) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cusparse_safe_call.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_safe_call.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuda_check_last_error.cpp) - ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cudilu.cpp) + ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_GpuDILU.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cujac.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuowneroverlapcopy.cpp) ADD_CUDA_OR_HIP_FILE(TEST_SOURCE_FILES tests test_cuseqilu0.cpp) diff --git a/opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp b/opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp index 455c5a444..a9291ecc1 100644 --- a/opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp +++ b/opm/simulators/linalg/PreconditionerFactoryGPUIncludeWrapper.hpp @@ -23,7 +23,7 @@ #if HAVE_CUDA #if USE_HIP #include -#include +#include #include #include #include @@ -32,7 +32,7 @@ #include #else #include -#include +#include #include #include #include diff --git a/opm/simulators/linalg/PreconditionerFactory_impl.hpp b/opm/simulators/linalg/PreconditionerFactory_impl.hpp index 0ed82b1eb..c669ec037 100644 --- a/opm/simulators/linalg/PreconditionerFactory_impl.hpp +++ b/opm/simulators/linalg/PreconditionerFactory_impl.hpp @@ -350,10 +350,10 @@ struct StandardPreconditioners { const bool split_matrix = prm.get("split_matrix", true); const bool tune_gpu_kernels = prm.get("tune_gpu_kernels", true); using field_type = typename V::field_type; - using CuDILU = typename gpuistl::CuDILU, gpuistl::CuVector>; - auto cuDILU = std::make_shared(op.getmat(), split_matrix, tune_gpu_kernels); + using GpuDILU = typename gpuistl::GpuDILU, gpuistl::CuVector>; + auto cuDILU = std::make_shared(op.getmat(), split_matrix, tune_gpu_kernels); - auto adapted = std::make_shared>(cuDILU); + auto adapted = std::make_shared>(cuDILU); auto wrapped = std::make_shared>(adapted, comm); return wrapped; }); @@ -629,7 +629,7 @@ struct StandardPreconditioners { const bool split_matrix = prm.get("split_matrix", true); const bool tune_gpu_kernels = prm.get("tune_gpu_kernels", true); using field_type = typename V::field_type; - using CUDILU = typename gpuistl::CuDILU, gpuistl::CuVector>; + using CUDILU = typename gpuistl::GpuDILU, gpuistl::CuVector>; return std::make_shared>(std::make_shared(op.getmat(), split_matrix, tune_gpu_kernels)); }); @@ -639,11 +639,11 @@ struct StandardPreconditioners { using block_type = typename V::block_type; using VTo = Dune::BlockVector>; using matrix_type_to = typename Dune::BCRSMatrix>; - using CuDILU = typename gpuistl::CuDILU, gpuistl::CuVector>; - using Adapter = typename gpuistl::PreconditionerAdapter; + using GpuDILU = typename gpuistl::GpuDILU, gpuistl::CuVector>; + using Adapter = typename gpuistl::PreconditionerAdapter; using Converter = typename gpuistl::PreconditionerConvertFieldTypeAdapter; auto converted = std::make_shared(op.getmat()); - auto adapted = std::make_shared(std::make_shared(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels)); + auto adapted = std::make_shared(std::make_shared(converted->getConvertedMatrix(), split_matrix, tune_gpu_kernels)); converted->setUnderlyingPreconditioner(adapted); return converted; }); diff --git a/opm/simulators/linalg/cuistl/CuDILU.cpp b/opm/simulators/linalg/cuistl/GpuDILU.cpp similarity index 93% rename from opm/simulators/linalg/cuistl/CuDILU.cpp rename to opm/simulators/linalg/cuistl/GpuDILU.cpp index 77d4e049f..4e374cea1 100644 --- a/opm/simulators/linalg/cuistl/CuDILU.cpp +++ b/opm/simulators/linalg/cuistl/GpuDILU.cpp @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include @@ -41,7 +41,7 @@ namespace Opm::gpuistl { template -CuDILU::CuDILU(const M& A, bool splitMatrix, bool tuneKernels) +GpuDILU::GpuDILU(const M& A, bool splitMatrix, bool tuneKernels) : m_cpuMatrix(A) , m_levelSets(Opm::getMatrixRowColoring(m_cpuMatrix, Opm::ColoringType::LOWER)) , m_reorderedToNatural(detail::createReorderedToNatural(m_levelSets)) @@ -89,13 +89,13 @@ CuDILU::CuDILU(const M& A, bool splitMatrix, bool tuneKernels) template void -CuDILU::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) +GpuDILU::pre([[maybe_unused]] X& x, [[maybe_unused]] Y& b) { } template void -CuDILU::apply(X& v, const Y& d) +GpuDILU::apply(X& v, const Y& d) { OPM_TIMEBLOCK(prec_apply); { @@ -105,7 +105,7 @@ CuDILU::apply(X& v, const Y& d) template void -CuDILU::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize) +GpuDILU::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int upperSolveThreadBlockSize) { int levelStartIdx = 0; for (int level = 0; level < m_levelSets.size(); ++level) { @@ -172,20 +172,20 @@ CuDILU::apply(X& v, const Y& d, int lowerSolveThreadBlockSize, int u template void -CuDILU::post([[maybe_unused]] X& x) +GpuDILU::post([[maybe_unused]] X& x) { } template Dune::SolverCategory::Category -CuDILU::category() const +GpuDILU::category() const { return Dune::SolverCategory::sequential; } template void -CuDILU::update() +GpuDILU::update() { OPM_TIMEBLOCK(prec_update); { @@ -195,7 +195,7 @@ CuDILU::update() template void -CuDILU::update(int moveThreadBlockSize, int factorizationBlockSize) +GpuDILU::update(int moveThreadBlockSize, int factorizationBlockSize) { m_gpuMatrix.updateNonzeroValues(m_cpuMatrix, true); // send updated matrix to the gpu computeDiagAndMoveReorderedData(moveThreadBlockSize, factorizationBlockSize); @@ -203,7 +203,7 @@ CuDILU::update(int moveThreadBlockSize, int factorizationBlockSize) template void -CuDILU::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize) +GpuDILU::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int factorizationBlockSize) { if (m_splitMatrix) { detail::copyMatDataToReorderedSplit( @@ -264,7 +264,7 @@ CuDILU::computeDiagAndMoveReorderedData(int moveThreadBlockSize, int template void -CuDILU::tuneThreadBlockSizes() +GpuDILU::tuneThreadBlockSizes() { // tune the thread-block size of the update function auto tuneMoveThreadBlockSizeInUpdate = [this](int moveThreadBlockSize){ @@ -295,10 +295,10 @@ CuDILU::tuneThreadBlockSizes() } // namespace Opm::gpuistl #define INSTANTIATE_CUDILU_DUNE(realtype, blockdim) \ - template class ::Opm::gpuistl::CuDILU>, \ + template class ::Opm::gpuistl::GpuDILU>, \ ::Opm::gpuistl::CuVector, \ ::Opm::gpuistl::CuVector>; \ - template class ::Opm::gpuistl::CuDILU>, \ + template class ::Opm::gpuistl::GpuDILU>, \ ::Opm::gpuistl::CuVector, \ ::Opm::gpuistl::CuVector> diff --git a/opm/simulators/linalg/cuistl/CuDILU.hpp b/opm/simulators/linalg/cuistl/GpuDILU.hpp similarity index 96% rename from opm/simulators/linalg/cuistl/CuDILU.hpp rename to opm/simulators/linalg/cuistl/GpuDILU.hpp index 7a72eb246..af26306c2 100644 --- a/opm/simulators/linalg/cuistl/CuDILU.hpp +++ b/opm/simulators/linalg/cuistl/GpuDILU.hpp @@ -16,8 +16,8 @@ You should have received a copy of the GNU General Public License along with OPM. If not, see . */ -#ifndef OPM_CUDILU_HPP -#define OPM_CUDILU_HPP +#ifndef OPM_GPUDILU_HPP +#define OPM_GPUDILU_HPP #include #include @@ -40,7 +40,7 @@ namespace Opm::gpuistl //! \note We assume X and Y are both CuVector, but we leave them as template //! arguments in case of future additions. template -class CuDILU : public Dune::PreconditionerWithUpdate +class GpuDILU : public Dune::PreconditionerWithUpdate { public: //! \brief The matrix type the preconditioner is for. @@ -60,7 +60,7 @@ public: //! \param A The matrix to operate on. //! \param w The relaxation factor. //! - explicit CuDILU(const M& A, bool splitMatrix, bool tuneKernels); + explicit GpuDILU(const M& A, bool splitMatrix, bool tuneKernels); //! \brief Prepare the preconditioner. //! \note Does nothing at the time being. diff --git a/tests/cuistl/test_cudilu.cpp b/tests/cuistl/test_GpuDILU.cpp similarity index 94% rename from tests/cuistl/test_cudilu.cpp rename to tests/cuistl/test_GpuDILU.cpp index 4d023eb9b..072e81a9f 100644 --- a/tests/cuistl/test_cudilu.cpp +++ b/tests/cuistl/test_GpuDILU.cpp @@ -18,14 +18,14 @@ */ #include -#define BOOST_TEST_MODULE TestCuDiluHelpers +#define BOOST_TEST_MODULE TestGpuDILU #include #include #include #include #include -#include +#include #include #include #include @@ -44,8 +44,8 @@ using Sp2x2BlockMatrix = Dune::BCRSMatrix; using CuMatrix = Opm::gpuistl::CuSparseMatrix; using CuIntVec = Opm::gpuistl::CuVector; using CuFloatingPointVec = Opm::gpuistl::CuVector; -using CuDilu1x1 = Opm::gpuistl::CuDILU; -using CuDilu2x2 = Opm::gpuistl::CuDILU; +using GpuDilu1x1 = Opm::gpuistl::GpuDILU; +using GpuDilu2x2 = Opm::gpuistl::GpuDILU; Sp1x1BlockMatrix get1x1BlockTestMatrix() @@ -211,7 +211,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply) // Initialize preconditioner objects Dune::MultithreadDILU cpudilu(matA); - auto gpudilu = CuDilu1x1(matA, true, true); + auto gpudilu = GpuDilu1x1(matA, true, true); // Use the apply gpudilu.apply(d_output, d_input); @@ -224,7 +224,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApply) } auto cudilures = d_output.asStdVector(); - // check that CuDilu results matches that of CPU dilu + // check that GpuDilu results matches that of CPU dilu for (size_t i = 0; i < cudilures.size(); ++i) { BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7); } @@ -235,7 +235,7 @@ BOOST_AUTO_TEST_CASE(TestDiluApplyBlocked) // init matrix with 2x2 blocks Sp2x2BlockMatrix matA = get2x2BlockTestMatrix(); - auto gpudilu = CuDilu2x2(matA, true, true); + auto gpudilu = GpuDilu2x2(matA, true, true); Dune::MultithreadDILU cpudilu(matA); // create input/output buffers for the apply @@ -275,7 +275,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge) { // create gpu dilu preconditioner Sp1x1BlockMatrix matA = get1x1BlockTestMatrix(); - auto gpudilu = CuDilu1x1(matA, true, true); + auto gpudilu = GpuDilu1x1(matA, true, true); matA[0][0][0][0] = 11.0; matA[0][1][0][0] = 12.0; @@ -325,7 +325,7 @@ BOOST_AUTO_TEST_CASE(TestDiluInitAndUpdateLarge) } auto cudilures = d_output.asStdVector(); - // check that CuDilu results matches that of CPU dilu + // check that GpuDilu results matches that of CPU dilu for (size_t i = 0; i < cudilures.size(); ++i) { BOOST_CHECK_CLOSE(cudilures[i], cpudilures[i], 1e-7); }