diff --git a/CMakeLists.txt b/CMakeLists.txt index d722402a7..39dff6180 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -221,9 +221,6 @@ if(amgcl_FOUND) # Hence we set AMGCL_INCLUDE_DIRS. get_property(AMGCL_INCLUDE_DIRS TARGET amgcl::amgcl PROPERTY INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${AMGCL_INCLUDE_DIRS}) - if(CUDA_FOUND) - set_source_files_properties(opm/simulators/linalg/bda/amgclSolverBackend.cpp PROPERTIES LANGUAGE CUDA) - endif() endif() if(OpenCL_FOUND) diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index 202fc1cdc..d72a0b420 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -119,6 +119,9 @@ if(HAVE_AMGCL) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp) + if(CUDA_FOUND) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cu) + endif() endif() if(MPI_FOUND) list(APPEND MAIN_SOURCE_FILES opm/simulators/utils/ParallelEclipseState.cpp diff --git a/opm/simulators/linalg/bda/amgclSolverBackend.cpp b/opm/simulators/linalg/bda/amgclSolverBackend.cpp index dfb39065f..9602f26c0 100644 --- a/opm/simulators/linalg/bda/amgclSolverBackend.cpp +++ b/opm/simulators/linalg/bda/amgclSolverBackend.cpp @@ -30,6 +30,10 @@ #include +#if HAVE_VEXCL +#include +#include +#endif namespace bda { @@ -74,10 +78,10 @@ void amgclSolverBackend::initialize(int N_, int nnz_, int dim) { if (file.is_open()) { // if file exists, read parameters from file boost::property_tree::read_json(file, prm); - backend_type_string = prm.get("backend_type"); // defaults to cpu if not specified + backend_type_string = prm.get("backend_type", "cpu"); // defaults to cpu if not specified out << "Using parameters from " << filename << ":\n"; } else { // otherwise use default parameters, same as Dune - prm.put("backend_type", "cpu"); + prm.put("backend_type", "cpu"); // put it in the tree so it gets printed prm.put("precond.class", "relaxation"); prm.put("precond.type", "ilu0"); prm.put("precond.damping", 0.9); @@ -85,6 +89,7 @@ void amgclSolverBackend::initialize(int N_, int nnz_, int dim) { prm.put("solver.tol", tolerance); prm.put("solver.maxiter", maxit); prm.put("solver.verbose", verbosity >= 2); + backend_type_string = prm.get("backend_type", "cpu"); out << "Using default amgcl parameters:\n"; } @@ -98,16 +103,11 @@ void amgclSolverBackend::initialize(int N_, int nnz_, int dim) { } else if (backend_type_string == "vexcl") { backend_type = Amgcl_backend_type::vexcl; } else { - OPM_THROW(std::logic_error, "Error unknown value for amgcl parameter 'backend_type'"); + OPM_THROW(std::logic_error, "Error unknown value for amgcl parameter 'backend_type', use [cpu|cuda|vexcl]"); } if (backend_type == Amgcl_backend_type::cuda) { -#if HAVE_CUDA - cudaDeviceProp prop; - cudaGetDeviceProperties(&prop, deviceID); - out << prop.name << std::endl; - cusparseCreate(&CUDA_bprm.cusparse_handle); -#else +#if !HAVE_CUDA OPM_THROW(std::logic_error, "Error amgcl is trying to use CUDA, but CUDA was not found by CMake"); #endif } @@ -211,33 +211,11 @@ void solve_vexcl( template void amgclSolverBackend::solve_system(double *b, BdaResult &res) { Timer t; - int iters = 0; - double error = 0.0; try { if (backend_type == Amgcl_backend_type::cuda) { // use CUDA #if HAVE_CUDA - // create matrix object - auto A = std::tie(N, A_rows, A_cols, A_vals); - - // create solver and construct preconditioner - // don't reuse this unless the preconditioner can be reused - CUDA_Solver solve(A, prm, CUDA_bprm); - - // print solver structure (once) - std::call_once(print_info, [&](){ - std::ostringstream out; - out << solve << std::endl; - OpmLog::info(out.str()); - }); - - thrust::device_vector B(b, b + N); - thrust::device_vector X(N, 0.0); - - // actually solve - std::tie(iters, error) = solve(B, X); - - thrust::copy(X.begin(), X.end(), x.begin()); + solve_cuda(b); #endif } else if (backend_type == Amgcl_backend_type::cpu) { // use builtin backend (CPU) // create matrix object diff --git a/opm/simulators/linalg/bda/amgclSolverBackend.cu b/opm/simulators/linalg/bda/amgclSolverBackend.cu new file mode 100644 index 000000000..a19901c01 --- /dev/null +++ b/opm/simulators/linalg/bda/amgclSolverBackend.cu @@ -0,0 +1,89 @@ +/* + Copyright 2021 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#include +#include + +#include + +#include +#include +#include + +/// This file is only compiled when both amgcl and CUDA are found by CMake + +namespace bda +{ + +using Opm::OpmLog; + + +template +void amgclSolverBackend::solve_cuda(double *b) { + typedef amgcl::backend::cuda CUDA_Backend; + typedef amgcl::make_solver, amgcl::runtime::solver::wrapper > CUDA_Solver; + + static typename CUDA_Backend::params CUDA_bprm; // amgcl backend parameters, only used for cusparseHandle + + // initialize cusparse handle for amgcl, cannot merge this call_once with 'print solver structure' + std::call_once(cuda_initialize, [&](){ + cudaDeviceProp prop; + cudaGetDeviceProperties(&prop, deviceID); + std::ostringstream out; + out << prop.name << std::endl; + OpmLog::info(out.str()); + cusparseCreate(&CUDA_bprm.cusparse_handle); + }); + + // create matrix object + auto A = std::tie(N, A_rows, A_cols, A_vals); + + // create solver and construct preconditioner + // don't reuse this unless the preconditioner can be reused + CUDA_Solver solve(A, prm, CUDA_bprm); + + // print solver structure (once) + std::call_once(print_info, [&](){ + std::ostringstream out; + out << solve << std::endl; + OpmLog::info(out.str()); + }); + + thrust::device_vector B(b, b + N); + thrust::device_vector X(N, 0.0); + + // actually solve + std::tie(iters, error) = solve(B, X); + + thrust::copy(X.begin(), X.end(), x.begin()); +} + + +#define INSTANTIATE_BDA_FUNCTIONS(n) \ +template void amgclSolverBackend::solve_cuda(double*); \ + +INSTANTIATE_BDA_FUNCTIONS(1); +INSTANTIATE_BDA_FUNCTIONS(2); +INSTANTIATE_BDA_FUNCTIONS(3); +INSTANTIATE_BDA_FUNCTIONS(4); + +#undef INSTANTIATE_BDA_FUNCTIONS + +} // namespace bda + diff --git a/opm/simulators/linalg/bda/amgclSolverBackend.hpp b/opm/simulators/linalg/bda/amgclSolverBackend.hpp index 54264e37b..b8bcf2e55 100644 --- a/opm/simulators/linalg/bda/amgclSolverBackend.hpp +++ b/opm/simulators/linalg/bda/amgclSolverBackend.hpp @@ -38,16 +38,6 @@ #include #include -#if HAVE_CUDA -#include -#include -#endif - -#if HAVE_VEXCL -#include -#include -#endif - namespace bda { @@ -69,11 +59,6 @@ class amgclSolverBackend : public BdaSolver using Base::tolerance; using Base::initialized; -#if HAVE_CUDA - typedef amgcl::backend::cuda CUDA_Backend; - typedef amgcl::make_solver, amgcl::runtime::solver::wrapper > CUDA_Solver; -#endif - typedef amgcl::static_matrix dmat_type; // matrix value type in double precision typedef amgcl::static_matrix dvec_type; // the corresponding vector value type typedef amgcl::backend::builtin CPU_Backend; @@ -97,8 +82,12 @@ private: Amgcl_backend_type backend_type = cpu; boost::property_tree::ptree prm; // amgcl parameters + int iters = 0; + double error = 0.0; + #if HAVE_CUDA - typename CUDA_Backend::params CUDA_bprm; // amgcl backend parameters, only used for cusparseHandle + std::once_flag cuda_initialize; + void solve_cuda(double *b); #endif /// Initialize GPU and allocate memory