mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
commit
8142788b58
@ -240,12 +240,6 @@ if(OpenCL_FOUND)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package(rocalution)
|
||||
if(ROCALUTION_FOUND)
|
||||
set(HAVE_ROCALUTION 1)
|
||||
set(COMPILE_BDA_BRIDGE 1)
|
||||
endif()
|
||||
|
||||
|
||||
macro (config_hook)
|
||||
opm_need_version_of ("dune-common")
|
||||
@ -278,6 +272,27 @@ macro (fortran_hook)
|
||||
endmacro (fortran_hook)
|
||||
|
||||
macro (files_hook)
|
||||
if(hip_FOUND)
|
||||
get_filename_component(CXX_COMPILER ${CMAKE_CXX_COMPILER} NAME)
|
||||
if(hip_VERSION VERSION_LESS "5.3")
|
||||
if(ROCALUTION_FOUND AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
message(WARNING " Cannot use hipcc/clang for rocalution with rocm < 5.3\n Disabling rocalutionSolver")
|
||||
unset(ROCALUTION_FOUND)
|
||||
unset(HAVE_ROCALUTION)
|
||||
endif()
|
||||
else()
|
||||
if(rocsparse_FOUND AND rocblas_FOUND)
|
||||
set(HAVE_ROCSPARSE 1)
|
||||
set(COMPILE_BDA_BRIDGE 1)
|
||||
else()
|
||||
unset(HAVE_ROCSPARSE)
|
||||
endif()
|
||||
endif()
|
||||
if(ROCALUTION_FOUND)
|
||||
set(HAVE_ROCALUTION 1)
|
||||
set(COMPILE_BDA_BRIDGE 1)
|
||||
endif()
|
||||
endif()
|
||||
if(MPI_FOUND AND HDF5_FOUND AND NOT HDF5_IS_PARALLEL)
|
||||
message(WARNING "When building parallel OPM flow we need a "
|
||||
"parallel version of hdf5, but found only a serial one. "
|
||||
@ -531,6 +546,11 @@ if(ROCALUTION_FOUND)
|
||||
target_include_directories(opmsimulators PUBLIC ${rocalution_INCLUDE_DIR}/rocalution)
|
||||
endif()
|
||||
|
||||
if(rocsparse_FOUND AND rocblas_FOUND)
|
||||
target_link_libraries( opmsimulators PUBLIC roc::rocsparse )
|
||||
target_link_libraries( opmsimulators PUBLIC roc::rocblas )
|
||||
endif()
|
||||
|
||||
if(VexCL_FOUND)
|
||||
target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL )
|
||||
endif()
|
||||
|
@ -161,6 +161,9 @@ endif()
|
||||
if(ROCALUTION_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocalutionSolverBackend.cpp)
|
||||
endif()
|
||||
if(rocsparse_FOUND AND rocblas_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseSolverBackend.cpp)
|
||||
endif()
|
||||
if(COMPILE_BDA_BRIDGE)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
endif()
|
||||
@ -226,6 +229,9 @@ endif()
|
||||
if(ROCALUTION_FOUND)
|
||||
list(APPEND TEST_SOURCE_FILES tests/test_rocalutionSolver.cpp)
|
||||
endif()
|
||||
if(rocsparse_FOUND AND rocblas_FOUND)
|
||||
list(APPEND TEST_SOURCE_FILES tests/test_rocsparseSolver.cpp)
|
||||
endif()
|
||||
if(HDF5_FOUND)
|
||||
list(APPEND TEST_SOURCE_FILES tests/test_HDF5File.cpp)
|
||||
list(APPEND TEST_SOURCE_FILES tests/test_HDF5Serializer.cpp)
|
||||
@ -337,6 +343,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
||||
opm/simulators/linalg/bda/Matrix.hpp
|
||||
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
|
||||
opm/simulators/linalg/bda/rocalutionSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/rocsparseSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/WellContributions.hpp
|
||||
opm/simulators/linalg/amgcpr.hh
|
||||
opm/simulators/linalg/twolevelmethodcpr.hh
|
||||
|
@ -12,6 +12,7 @@ set (opm-simulators_CONFIG_VAR
|
||||
HAVE_AMGCL
|
||||
HAVE_VEXCL
|
||||
HAVE_ROCALUTION
|
||||
HAVE_ROCSPARSE
|
||||
HAVE_SUITESPARSE_UMFPACK_H
|
||||
HAVE_DUNE_ISTL
|
||||
DUNE_ISTL_WITH_CHECKING
|
||||
@ -44,6 +45,9 @@ set (opm-simulators_DEPS
|
||||
"SuperLU"
|
||||
# ROCALUTION from ROCM framework
|
||||
"rocalution"
|
||||
# packages from ROCm framework
|
||||
"rocblas"
|
||||
"rocsparse"
|
||||
# OPM dependency
|
||||
"opm-common REQUIRED"
|
||||
"opm-grid REQUIRED"
|
||||
|
@ -45,6 +45,10 @@
|
||||
#include <opm/simulators/linalg/bda/rocalutionSolverBackend.hpp>
|
||||
#endif
|
||||
|
||||
#if HAVE_ROCSPARSE
|
||||
#include <opm/simulators/linalg/bda/rocsparseSolverBackend.hpp>
|
||||
#endif
|
||||
|
||||
typedef Dune::InverseOperatorResult InverseOperatorResult;
|
||||
|
||||
namespace Opm
|
||||
@ -91,6 +95,13 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
|
||||
backend.reset(new Opm::Accelerator::rocalutionSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance));
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error rocalutionSolver was chosen, but rocalution was not found by CMake");
|
||||
#endif
|
||||
} else if (accelerator_mode.compare("rocsparse") == 0) {
|
||||
#if HAVE_ROCSPARSE
|
||||
use_gpu = true; // should be replaced by a 'use_bridge' boolean
|
||||
backend.reset(new Opm::Accelerator::rocsparseSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error rocsparseSolver was chosen, but rocsparse/rocblas was not found by CMake");
|
||||
#endif
|
||||
} else if (accelerator_mode.compare("none") == 0) {
|
||||
use_gpu = false;
|
||||
|
@ -52,6 +52,12 @@ WellContributions::create(const std::string& accelerator_mode, bool useWellConn)
|
||||
OPM_THROW(std::runtime_error, "Cannot initialize well contributions: OpenCL is not enabled");
|
||||
#endif
|
||||
}
|
||||
else if(accelerator_mode.compare("rocsparse") == 0){
|
||||
if (!useWellConn) {
|
||||
OPM_THROW(std::logic_error, "Error rocsparse requires --matrix-add-well-contributions=true");
|
||||
}
|
||||
return std::make_unique<WellContributions>();
|
||||
}
|
||||
else if(accelerator_mode.compare("amgcl") == 0){
|
||||
if (!useWellConn) {
|
||||
OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true");
|
||||
|
@ -26,11 +26,27 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
// WellContributions are included via the solver
|
||||
// MultisegmentWellContribution includes the cuda runtime if found by CMake
|
||||
// this leads to inclusion of both amd_hip_vector_types.h and vector_types.h
|
||||
// which both define vector types like uchar2, short3 and double4.
|
||||
// Restore the value (if defined) afterwards.
|
||||
#ifdef HAVE_CUDA
|
||||
#define HIP_HAVE_CUDA_DEFINED HAVE_CUDA
|
||||
#endif
|
||||
|
||||
#undef HAVE_CUDA
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocalutionSolverBackend.hpp>
|
||||
|
||||
#include <rocalution.hpp>
|
||||
#include <base/matrix_formats_ind.hpp> // check if blocks are interpreted as row-major or column-major
|
||||
|
||||
#ifdef HIP_HAVE_CUDA_DEFINED
|
||||
#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED
|
||||
#undef HIP_HAVE_CUDA_DEFINED
|
||||
#endif
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Accelerator
|
||||
|
612
opm/simulators/linalg/bda/rocsparseSolverBackend.cpp
Normal file
612
opm/simulators/linalg/bda/rocsparseSolverBackend.cpp
Normal file
@ -0,0 +1,612 @@
|
||||
/*
|
||||
Copyright 2023 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
// WellContributions are included via the solver
|
||||
// MultisegmentWellContribution includes the cuda runtime if found by CMake
|
||||
// this leads to inclusion of both amd_hip_vector_types.h and vector_types.h
|
||||
// which both define vector types like uchar2, short3 and double4.
|
||||
// Restore the value (if defined) afterwards.
|
||||
#ifdef HAVE_CUDA
|
||||
#define HIP_HAVE_CUDA_DEFINED HAVE_CUDA
|
||||
#endif
|
||||
|
||||
#undef HAVE_CUDA
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocsparseSolverBackend.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
#ifdef HIP_HAVE_CUDA_DEFINED
|
||||
#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED
|
||||
#undef HIP_HAVE_CUDA_DEFINED
|
||||
#endif
|
||||
|
||||
#define HIP_CHECK(stat) \
|
||||
{ \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
OPM_THROW(std::logic_error, "HIP error"); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCSPARSE_CHECK(stat) \
|
||||
{ \
|
||||
if(stat != rocsparse_status_success) \
|
||||
{ \
|
||||
OPM_THROW(std::logic_error, "rocsparse error"); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ROCBLAS_CHECK(stat) \
|
||||
{ \
|
||||
if(stat != rocblas_status_success) \
|
||||
{ \
|
||||
OPM_THROW(std::logic_error, "rocblas error"); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Accelerator
|
||||
{
|
||||
|
||||
using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
template <unsigned int block_size>
|
||||
rocsparseSolverBackend<block_size>::rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
|
||||
hipDevice_t device;
|
||||
if(hipDeviceGet(&device, deviceID) != hipSuccess)
|
||||
{
|
||||
OPM_THROW(std::logic_error, "HIP Error: could not get device");
|
||||
}
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_handle(&handle));
|
||||
ROCBLAS_CHECK(rocblas_create_handle(&blas_handle));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_get_version(handle, &ver));
|
||||
ROCSPARSE_CHECK(rocsparse_get_git_rev(handle, rev));
|
||||
|
||||
std::ostringstream out;
|
||||
out << "rocSPARSE version: " << ver / 100000 << "." << ver / 100 % 1000 << "."
|
||||
<< ver % 100 << "-" << rev << "\n";
|
||||
OpmLog::info(out.str());
|
||||
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
ROCSPARSE_CHECK(rocsparse_set_stream(handle, stream));
|
||||
ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream));
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
rocsparseSolverBackend<block_size>::~rocsparseSolverBackend() {
|
||||
hipError_t hipstatus = hipStreamSynchronize(stream);
|
||||
if(hipstatus != hipSuccess){
|
||||
OpmLog::error("Could not synchronize with hipStream");
|
||||
}
|
||||
hipstatus = hipStreamDestroy(stream);
|
||||
if(hipstatus != hipSuccess){
|
||||
OpmLog::error("Could not destroy hipStream");
|
||||
}
|
||||
rocsparse_status status1 = rocsparse_destroy_handle(handle);
|
||||
if(status1 != rocsparse_status_success){
|
||||
OpmLog::error("Could not destroy rocsparse handle");
|
||||
}
|
||||
rocblas_status status2 = rocblas_destroy_handle(blas_handle);
|
||||
if(status2 != rocblas_status_success){
|
||||
OpmLog::error("Could not destroy rocblas handle");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::gpu_pbicgstab([[maybe_unused]] WellContributions& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
float it = 0.5;
|
||||
double rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2;
|
||||
double norm, norm_0;
|
||||
double zero = 0.0;
|
||||
double one = 1.0;
|
||||
double mone = -1.0;
|
||||
|
||||
Timer t_total, t_prec(false), t_spmv(false), t_rest(false);
|
||||
|
||||
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_x, &zero, d_r));
|
||||
#endif
|
||||
|
||||
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &mone, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_b, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_rw, 1));
|
||||
ROCBLAS_CHECK(rocblas_dcopy(blas_handle, N, d_r, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm_0));
|
||||
|
||||
if (verbosity >= 2) {
|
||||
std::ostringstream out;
|
||||
out << std::scientific << "rocsparseSolver initial norm: " << norm_0;
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
t_rest.start();
|
||||
}
|
||||
for (it = 0.5; it < maxit; it += 0.5) {
|
||||
rhop = rho;
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_r, 1, &rho));
|
||||
|
||||
if (it > 1) {
|
||||
beta = (rho / rhop) * (alpha / omega);
|
||||
nomega = -omega;
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_v, 1, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_dscal(blas_handle, N, &beta, d_p, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &one, d_r, 1, d_p, 1));
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_rest.stop();
|
||||
t_prec.start();
|
||||
}
|
||||
|
||||
// apply ilu0
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_p, d_t, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, d_pw, rocsparse_solve_policy_auto, d_buffer));
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_prec.stop();
|
||||
t_spmv.start();
|
||||
}
|
||||
|
||||
// spmv
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_pw, &zero, d_v));
|
||||
#endif
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_spmv.stop();
|
||||
t_rest.start();
|
||||
}
|
||||
|
||||
// apply wellContributions
|
||||
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
|
||||
alpha = rho / tmp1;
|
||||
nalpha = -alpha;
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nalpha, d_v, 1, d_r, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &alpha, d_pw, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_rest.stop();
|
||||
}
|
||||
|
||||
if (norm < tolerance * norm_0) {
|
||||
break;
|
||||
}
|
||||
|
||||
it += 0.5;
|
||||
|
||||
// apply ilu0
|
||||
if (verbosity >= 3) {
|
||||
t_prec.start();
|
||||
}
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_r, d_t, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, d_s, rocsparse_solve_policy_auto, d_buffer));
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_prec.stop();
|
||||
t_spmv.start();
|
||||
}
|
||||
|
||||
// spmv
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_s, &zero, d_t));
|
||||
#endif
|
||||
if(verbosity >= 3){
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_spmv.stop();
|
||||
t_rest.start();
|
||||
}
|
||||
|
||||
// apply wellContributions
|
||||
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
|
||||
omega = tmp1 / tmp2;
|
||||
nomega = -omega;
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &omega, d_s, 1, d_x, 1));
|
||||
ROCBLAS_CHECK(rocblas_daxpy(blas_handle, N, &nomega, d_t, 1, d_r, 1));
|
||||
|
||||
ROCBLAS_CHECK(rocblas_dnrm2(blas_handle, N, d_r, 1, &norm));
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_rest.stop();
|
||||
}
|
||||
|
||||
if (norm < tolerance * norm_0) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (verbosity > 1) {
|
||||
std::ostringstream out;
|
||||
out << "it: " << it << std::scientific << ", norm: " << norm;
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
res.iterations = std::min(it, (float)maxit);
|
||||
res.reduction = norm / norm_0;
|
||||
res.conv_rate = static_cast<double>(pow(res.reduction, 1.0 / it));
|
||||
res.elapsed = t_total.stop();
|
||||
res.converged = (it != (maxit + 0.5));
|
||||
|
||||
if (verbosity >= 1) {
|
||||
std::ostringstream out;
|
||||
out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \
|
||||
", time per iteration: " << res.elapsed / it << ", iterations: " << it;
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::prec_apply: " << t_prec.elapsed() << " s\n";
|
||||
out << "rocsparseSolver::spmv: " << t_spmv.elapsed() << " s\n";
|
||||
out << "rocsparseSolver::rest: " << t_rest.elapsed() << " s\n";
|
||||
out << "rocsparseSolver::total_solve: " << res.elapsed << " s\n";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix) {
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
this->nnz = nnzb * block_size * block_size;
|
||||
nnzbs_prec = nnzb;
|
||||
|
||||
if (jacMatrix) {
|
||||
useJacMatrix = true;
|
||||
nnzbs_prec = jacMatrix->nnzbs;
|
||||
}
|
||||
|
||||
std::ostringstream out;
|
||||
out << "Initializing GPU, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << "\n";
|
||||
if (useJacMatrix) {
|
||||
out << "Blocks in ILU matrix: " << jacMatrix->nnzbs << "\n";
|
||||
}
|
||||
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
|
||||
out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
|
||||
OpmLog::info(out.str());
|
||||
out.str("");
|
||||
out.clear();
|
||||
|
||||
mat = matrix;
|
||||
jacMat = jacMatrix;
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_r, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_p, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_s, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_v, sizeof(double) * N));
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_Arows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Acols, sizeof(rocsparse_int) * nnzb));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(double) * nnz));
|
||||
HIP_CHECK(hipMalloc((void**)&d_x, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_b, sizeof(double) * N));
|
||||
|
||||
if (useJacMatrix) {
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * nnzbs_prec));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size));
|
||||
} else { // preconditioner matrix is same
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size));
|
||||
d_Mcols = d_Acols;
|
||||
d_Mrows = d_Arows;
|
||||
}
|
||||
|
||||
initialized = true;
|
||||
} // end initialize()
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::copy_system_to_gpu(double *b) {
|
||||
Timer t;
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices, sizeof(rocsparse_int) * nnzb, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream));
|
||||
if (useJacMatrix) {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, sizeof(rocsparse_int) * nnzbs_prec, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream));
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::copy_system_to_gpu(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
} // end copy_system_to_gpu()
|
||||
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::update_system_on_gpu(double *b) {
|
||||
Timer t;
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream));
|
||||
if (useJacMatrix) {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream));
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::update_system_on_gpu(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
} // end update_system_on_gpu()
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool rocsparseSolverBackend<block_size>::analyze_matrix() {
|
||||
size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||
Timer t;
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_info(&ilu_info));
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_info(&spmv_info));
|
||||
#endif
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_A));
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_M));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_L));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(handle, dir, Nb, nnzbs_prec,
|
||||
descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec,
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec,
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
||||
|
||||
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
||||
|
||||
// analysis of ilu LU decomposition
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(handle, dir, \
|
||||
Nb, nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(handle, ilu_info, &zero_position);
|
||||
if (rocsparse_status_success != status) {
|
||||
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
||||
return false;
|
||||
}
|
||||
|
||||
// analysis of ilu apply
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(handle, dir, operation, \
|
||||
Nb, nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(handle, dir, operation, \
|
||||
Nb, nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex_analysis(handle, dir, operation,
|
||||
Nb, Nb, nnzb,
|
||||
descr_A, d_Avals, d_Arows, d_Acols,
|
||||
block_size, spmv_info));
|
||||
#endif
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::analyze_matrix(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
analysis_done = true;
|
||||
|
||||
return true;
|
||||
} // end analyze_matrix()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool rocsparseSolverBackend<block_size>::create_preconditioner() {
|
||||
Timer t;
|
||||
|
||||
bool result = true;
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0(handle, dir, Nb, nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
// Check for zero pivot
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(handle, ilu_info, &zero_position);
|
||||
if(rocsparse_status_success != status)
|
||||
{
|
||||
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::create_preconditioner(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
return result;
|
||||
} // end create_preconditioner()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::solve_system(WellContributions &wellContribs, BdaResult &res) {
|
||||
Timer t;
|
||||
|
||||
// actually solve
|
||||
try {
|
||||
gpu_pbicgstab(wellContribs, res);
|
||||
} catch (const cl::Error& error) {
|
||||
std::ostringstream oss;
|
||||
oss << "rocsparseSolverBackend::solve_system error: " << error.what() << "(" << error.err() << ")\n";
|
||||
oss << getErrorString(error.err());
|
||||
// rethrow exception
|
||||
OPM_THROW(std::logic_error, oss.str());
|
||||
} catch (const std::logic_error& error) {
|
||||
// rethrow exception by OPM_THROW in the try{}, without this, a segfault occurs
|
||||
throw error;
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::solve_system(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
} // end solve_system()
|
||||
|
||||
|
||||
// copy result to host memory
|
||||
// caller must be sure that x is a valid array
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::get_result(double *x) {
|
||||
Timer t;
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(double) * N, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream)); // always wait, caller might want to use x immediately
|
||||
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::get_result(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
} // end get_result()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus rocsparseSolverBackend<block_size>::solve_system(std::shared_ptr<BlockedMatrix> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix,
|
||||
WellContributions& wellContribs,
|
||||
BdaResult &res)
|
||||
{
|
||||
if (initialized == false) {
|
||||
initialize(matrix, jacMatrix);
|
||||
copy_system_to_gpu(b);
|
||||
if (analysis_done == false) {
|
||||
if (!analyze_matrix()) {
|
||||
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||
}
|
||||
}
|
||||
if (!create_preconditioner()) {
|
||||
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
}
|
||||
} else {
|
||||
update_system_on_gpu(b);
|
||||
if (!create_preconditioner()) {
|
||||
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
}
|
||||
}
|
||||
solve_system(wellContribs, res);
|
||||
|
||||
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template rocsparseSolverBackend<n>::rocsparseSolverBackend( \
|
||||
int, int, double, unsigned int, unsigned int);
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
INSTANTIATE_BDA_FUNCTIONS(5);
|
||||
INSTANTIATE_BDA_FUNCTIONS(6);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
158
opm/simulators/linalg/bda/rocsparseSolverBackend.hpp
Normal file
158
opm/simulators/linalg/bda/rocsparseSolverBackend.hpp
Normal file
@ -0,0 +1,158 @@
|
||||
/*
|
||||
Copyright 2023 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef OPM_ROCSPARSESOLVER_BACKEND_HEADER_INCLUDED
|
||||
#define OPM_ROCSPARSESOLVER_BACKEND_HEADER_INCLUDED
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <rocblas/rocblas.h>
|
||||
#include <rocsparse/rocsparse.h>
|
||||
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Accelerator
|
||||
{
|
||||
|
||||
/// This class implements a rocsparse-based ilu0-bicgstab solver on GPU
|
||||
template <unsigned int block_size>
|
||||
class rocsparseSolverBackend : public BdaSolver<block_size>
|
||||
{
|
||||
typedef BdaSolver<block_size> Base;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
using Base::nnz;
|
||||
using Base::nnzb;
|
||||
using Base::verbosity;
|
||||
using Base::platformID;
|
||||
using Base::deviceID;
|
||||
using Base::maxit;
|
||||
using Base::tolerance;
|
||||
using Base::initialized;
|
||||
|
||||
private:
|
||||
|
||||
bool useJacMatrix = false;
|
||||
|
||||
bool analysis_done = false;
|
||||
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
||||
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
|
||||
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
|
||||
|
||||
rocsparse_direction dir = rocsparse_direction_row;
|
||||
rocsparse_operation operation = rocsparse_operation_none;
|
||||
rocsparse_handle handle;
|
||||
rocblas_handle blas_handle;
|
||||
rocsparse_mat_descr descr_A, descr_M, descr_L, descr_U;
|
||||
rocsparse_mat_info ilu_info;
|
||||
#if HIP_VERSION >= 50400000
|
||||
rocsparse_mat_info spmv_info;
|
||||
#endif
|
||||
hipStream_t stream;
|
||||
|
||||
rocsparse_int *d_Arows, *d_Mrows;
|
||||
rocsparse_int *d_Acols, *d_Mcols;
|
||||
double *d_Avals, *d_Mvals;
|
||||
double *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve
|
||||
double *d_pw, *d_s, *d_t, *d_v;
|
||||
void *d_buffer; // buffer space, used by rocsparse ilu0 analysis
|
||||
int ver;
|
||||
char rev[64];
|
||||
|
||||
|
||||
/// Solve linear system using ilu0-bicgstab
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);
|
||||
|
||||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
|
||||
|
||||
/// Copy linear system to GPU
|
||||
/// \param[in] b input vector, contains N values
|
||||
void copy_system_to_gpu(double *b);
|
||||
|
||||
/// Update linear system to GPU
|
||||
/// \param[in] b input vector, contains N values
|
||||
void update_system_on_gpu(double *b);
|
||||
|
||||
/// Analyze sparsity pattern to extract parallelism
|
||||
/// \return true iff analysis was successful
|
||||
bool analyze_matrix();
|
||||
|
||||
/// Perform ilu0-decomposition
|
||||
/// \return true iff decomposition was successful
|
||||
bool create_preconditioner();
|
||||
|
||||
/// Solve linear system
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
void solve_system(WellContributions &wellContribs, BdaResult &res);
|
||||
|
||||
public:
|
||||
/// Construct a openclSolver
|
||||
/// \param[in] linear_solver_verbosity verbosity of openclSolver
|
||||
/// \param[in] maxit maximum number of iterations for openclSolver
|
||||
/// \param[in] tolerance required relative tolerance for openclSolver
|
||||
/// \param[in] platformID the OpenCL platform to be used
|
||||
/// \param[in] deviceID the device to be used
|
||||
rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
|
||||
|
||||
/// For the CPR coarse solver
|
||||
// rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);
|
||||
|
||||
/// Destroy a openclSolver, and free memory
|
||||
~rocsparseSolverBackend();
|
||||
|
||||
/// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] b input vector, contains N values
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
||||
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
|
||||
|
||||
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
||||
/// Data is already on the GPU
|
||||
// SolverStatus solve_system(BdaResult &res);
|
||||
|
||||
/// Get result after linear solve, and peform postprocessing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
void get_result(double *x) override;
|
||||
|
||||
}; // end class rocsparseSolverBackend
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
||||
|
||||
#endif
|
||||
|
||||
|
217
tests/test_rocsparseSolver.cpp
Normal file
217
tests/test_rocsparseSolver.cpp
Normal file
@ -0,0 +1,217 @@
|
||||
/*
|
||||
Copyright 2019 SINTEF Digital, Mathematics and Cybernetics.
|
||||
Copyright 2023 Equinor
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
|
||||
#define BOOST_TEST_MODULE OPM_test_rocsparseSolver
|
||||
#include <boost/test/unit_test.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <dune/common/fvector.hh>
|
||||
#include <dune/istl/bvector.hh>
|
||||
#include <dune/istl/bcrsmatrix.hh>
|
||||
#include <dune/istl/matrixmarket.hh>
|
||||
#include <dune/istl/solvers.hh>
|
||||
#include <dune/istl/preconditioners.hh>
|
||||
|
||||
#include <boost/property_tree/json_parser.hpp>
|
||||
#include <boost/property_tree/ptree.hpp>
|
||||
|
||||
class HIPInitException : public std::logic_error
|
||||
{
|
||||
public:
|
||||
HIPInitException(std::string msg) : logic_error(msg){};
|
||||
};
|
||||
|
||||
template <int bz>
|
||||
using Matrix = Dune::BCRSMatrix<Dune::FieldMatrix<double, bz, bz>>;
|
||||
template <int bz>
|
||||
using Vector = Dune::BlockVector<Dune::FieldVector<double, bz>>;
|
||||
|
||||
template <int bz>
|
||||
void readLinearSystem(const std::string& matrix_filename, const std::string& rhs_filename, Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||
{
|
||||
{
|
||||
std::ifstream mfile(matrix_filename);
|
||||
if (!mfile) {
|
||||
throw std::runtime_error("Could not read matrix file");
|
||||
}
|
||||
readMatrixMarket(matrix, mfile);
|
||||
}
|
||||
{
|
||||
std::ifstream rhsfile(rhs_filename);
|
||||
if (!rhsfile) {
|
||||
throw std::runtime_error("Could not read rhs file");
|
||||
}
|
||||
readMatrixMarket(rhs, rhsfile);
|
||||
}
|
||||
}
|
||||
|
||||
template <int bz>
|
||||
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
||||
getDuneSolution(Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||
{
|
||||
Dune::InverseOperatorResult result;
|
||||
|
||||
Vector<bz> x(rhs.size());
|
||||
|
||||
typedef Dune::MatrixAdapter<Matrix<bz>,Vector<bz>,Vector<bz> > Operator;
|
||||
Operator fop(matrix);
|
||||
double relaxation = 0.9;
|
||||
Dune::SeqILU<Matrix<bz>,Vector<bz>,Vector<bz> > prec(matrix, relaxation);
|
||||
double reduction = 1e-2;
|
||||
int maxit = 10;
|
||||
int verbosity = 0;
|
||||
Dune::BiCGSTABSolver<Vector<bz> > solver(fop, prec, reduction, maxit, verbosity);
|
||||
solver.apply(x, rhs, result);
|
||||
return x;
|
||||
}
|
||||
|
||||
template <int bz>
|
||||
void
|
||||
createBridge(const boost::property_tree::ptree& prm, std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge)
|
||||
{
|
||||
const int linear_solver_verbosity = prm.get<int>("verbosity");
|
||||
const int maxit = prm.get<int>("maxiter");
|
||||
const double tolerance = prm.get<double>("tol");
|
||||
const bool opencl_ilu_parallel(true);
|
||||
const int platformID = 0;
|
||||
const int deviceID = 0;
|
||||
const std::string accelerator_mode("rocsparse");
|
||||
const std::string linsolver("ilu0");
|
||||
|
||||
try {
|
||||
bridge = std::make_unique<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >(accelerator_mode,
|
||||
linear_solver_verbosity,
|
||||
maxit,
|
||||
tolerance,
|
||||
platformID,
|
||||
deviceID,
|
||||
opencl_ilu_parallel,
|
||||
linsolver);
|
||||
} catch (const std::logic_error& error) {
|
||||
BOOST_WARN_MESSAGE(true, error.what());
|
||||
if (strstr(error.what(), "HIP Error: could not get device") != nullptr)
|
||||
throw HIPInitException(error.what());
|
||||
else
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
template <int bz>
|
||||
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
||||
testRocsparseSolver(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||
{
|
||||
Dune::InverseOperatorResult result;
|
||||
Vector<bz> x(rhs.size());
|
||||
auto wellContribs = Opm::WellContributions::create("rocsparse", true);
|
||||
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
||||
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||
bridge->solve_system(&mat2, &mat2, /*numJacobiBlocks=*/0, rhs, *wellContribs, result);
|
||||
bridge->get_result(x);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
template <int bz>
|
||||
Dune::BlockVector<Dune::FieldVector<double, bz>>
|
||||
testRocsparseSolverJacobi(std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> >& bridge, Matrix<bz>& matrix, Vector<bz>& rhs)
|
||||
{
|
||||
Dune::InverseOperatorResult result;
|
||||
Vector<bz> x(rhs.size());
|
||||
auto wellContribs = Opm::WellContributions::create("rocsparse", true);
|
||||
auto mat2 = matrix; // deep copy to make sure nnz values are in contiguous memory
|
||||
// matrix created by readMatrixMarket() did not have contiguous memory
|
||||
auto mat3 = matrix; // another deep copy, to make sure Jacobi matrix memory is different
|
||||
// the sparsity pattern and values are actually the same
|
||||
bridge->solve_system(&mat2, &mat3, /*numJacobiBlocks=*/2, rhs, *wellContribs, result);
|
||||
bridge->get_result(x);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
namespace pt = boost::property_tree;
|
||||
|
||||
void test3(const pt::ptree& prm)
|
||||
{
|
||||
const int bz = 3;
|
||||
Matrix<bz> matrix;
|
||||
Vector<bz> rhs;
|
||||
readLinearSystem("matr33.txt", "rhs3.txt", matrix, rhs);
|
||||
Vector<bz> rhs2 = rhs; // deep copy, getDuneSolution() changes values in rhs vector
|
||||
auto duneSolution = getDuneSolution<bz>(matrix, rhs);
|
||||
|
||||
// create bridge twice, because rocsparseSolver allocates memory for
|
||||
// the jacobi matrix if passed, during the first solve_system() call
|
||||
// if not present, no memory is allocated, and subsequent calls
|
||||
// with a jacobi matrix will cause nans
|
||||
{
|
||||
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
||||
createBridge(prm, bridge); // create bridge with rocsparseSolver
|
||||
|
||||
// test rocsparseSolver without Jacobi matrix
|
||||
auto sol = testRocsparseSolver<bz>(bridge, matrix, rhs2);
|
||||
BOOST_REQUIRE_EQUAL(sol.size(), duneSolution.size());
|
||||
for (size_t i = 0; i < sol.size(); ++i) {
|
||||
for (int row = 0; row < bz; ++row) {
|
||||
BOOST_CHECK_CLOSE(sol[i][row], duneSolution[i][row], 1e-3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_ptr<Opm::BdaBridge<Matrix<bz>, Vector<bz>, bz> > bridge;
|
||||
createBridge(prm, bridge); // create bridge with rocsparseSolver
|
||||
|
||||
// test rocsparseSolver with Jacobi matrix
|
||||
auto solJacobi = testRocsparseSolverJacobi<bz>(bridge, matrix, rhs2);
|
||||
BOOST_REQUIRE_EQUAL(solJacobi.size(), duneSolution.size());
|
||||
for (size_t i = 0; i < solJacobi.size(); ++i) {
|
||||
for (int row = 0; row < bz; ++row) {
|
||||
BOOST_CHECK_CLOSE(solJacobi[i][row], duneSolution[i][row], 1e-3);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BOOST_AUTO_TEST_CASE(TestRocsparseSolver)
|
||||
{
|
||||
pt::ptree prm;
|
||||
|
||||
// Read parameters.
|
||||
{
|
||||
std::ifstream file("options_flexiblesolver.json");
|
||||
pt::read_json(file, prm);
|
||||
}
|
||||
|
||||
try {
|
||||
// Test with 3x3 block solvers.
|
||||
test3(prm);
|
||||
} catch(const HIPInitException& ) {
|
||||
BOOST_WARN_MESSAGE(true, "Problem with initializing HIP, skipping test");
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user