Merge pull request #3346 from Tongdongq/amgcl-support

Amgcl support
This commit is contained in:
Markus Blatt 2021-07-21 15:02:25 +02:00 committed by GitHub
commit ea4e4a1520
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 714 additions and 34 deletions

View File

@ -213,6 +213,35 @@ if(OpenCL_FOUND)
endif()
endif()
find_package(amgcl)
if(amgcl_FOUND)
set(HAVE_AMGCL 1)
# Linking to target angcl::amgcl drags in OpenMP and -fopenmp as a compile
# flag. With that nvcc fails as it does not that flag.
# Hence we set AMGCL_INCLUDE_DIRS.
get_property(AMGCL_INCLUDE_DIRS TARGET amgcl::amgcl PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
include_directories(SYSTEM ${AMGCL_INCLUDE_DIRS})
endif()
if(OpenCL_FOUND)
find_package(VexCL)
if(VexCL_FOUND)
set(HAVE_VEXCL 1)
# generator expressions in vexcl do not seem to work and therefore
# we cannot use the imported target. Hence we exract the needed info
# from the targets
get_property(VEXCL_INCLUDE_DIRS TARGET VexCL::Common PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
get_property(VEXCL_LINK_LIBRARIES TARGET VexCL::Common PROPERTY INTERFACE_LINK_LIBRARIES)
get_property(VEXCL_COMPILE_DEFINITIONS TARGET VexCL::OpenCL PROPERTY INTERFACE_COMPILE_DEFINITIONS)
set(VEXCL_LINK_LIBRARIES "${VEXCL_LINK_LIBRARIES};OpenCL::OpenCL")
add_library(OPM::VexCL::OpenCL INTERFACE IMPORTED)
set_target_properties(OPM::VexCL::OpenCL PROPERTIES
INTERFACE_COMPILE_DEFINITIONS "${VEXCL_COMPILE_DEFINITIONS}"
INTERFACE_LINK_LIBRARIES "${VEXCL_LINK_LIBRARIES}")
target_include_directories(OPM::VexCL::OpenCL SYSTEM INTERFACE "${VEXCL_INCLUDE_DIRS}")
endif()
endif()
# read the list of components from this file (in the project directory);
# it should set various lists with the names of the files to include
include (CMakeLists_files.cmake)
@ -528,6 +557,9 @@ if(OpenCL_FOUND)
target_link_libraries( opmsimulators PUBLIC ${OpenCL_LIBRARIES} )
endif()
if(VexCL_FOUND)
target_link_libraries( opmsimulators PUBLIC OPM::VexCL::OpenCL )
endif()
if(HAVE_FPGA)
add_dependencies(opmsimulators FPGA_library)
ExternalProject_Get_Property(FPGA_library binary_dir)

View File

@ -114,7 +114,15 @@ if(HAVE_FPGA)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
endif()
if(HAVE_AMGCL)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp)
if(CUDA_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cu)
endif()
endif()
if(MPI_FOUND)
list(APPEND MAIN_SOURCE_FILES opm/simulators/utils/ParallelEclipseState.cpp
opm/simulators/utils/ParallelSerialization.cpp)
@ -233,6 +241,7 @@ list (APPEND PUBLIC_HEADER_FILES
opm/simulators/aquifers/AquiferNumerical.hpp
opm/simulators/aquifers/BlackoilAquiferModel.hpp
opm/simulators/aquifers/BlackoilAquiferModel_impl.hpp
opm/simulators/linalg/bda/amgclSolverBackend.hpp
opm/simulators/linalg/bda/BdaBridge.hpp
opm/simulators/linalg/bda/BdaResult.hpp
opm/simulators/linalg/bda/BdaSolver.hpp

View File

@ -8,6 +8,8 @@ set (opm-simulators_CONFIG_VAR
HAVE_CUDA
HAVE_OPENCL
HAVE_FPGA
HAVE_AMGCL
HAVE_VEXCL
HAVE_SUITESPARSE_UMFPACK_H
HAVE_DUNE_ISTL
DUNE_ISTL_VERSION_MAJOR

View File

@ -314,7 +314,7 @@ namespace Opm
EWOMS_REGISTER_PARAM(TypeTag, int, CprMaxEllIter, "MaxIterations of the elliptic pressure part of the cpr solver");
EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse preconditioner setup. Valid options are 0: recreate the preconditioner for every linear solve, 1: recreate once every timestep, 2: recreate if last linear solve took more than 10 iterations, 3: never recreate");
EWOMS_REGISTER_PARAM(TypeTag, std::string, Linsolver, "Configuration of solver. Valid options are: ilu0 (default), cpr (an alias for cpr_trueimpes), cpr_quasiimpes, cpr_trueimpes or amg. Alternatively, you can request a configuration to be read from a JSON file by giving the filename here, ending with '.json.'");
EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Use GPU (cusparseSolver or openclSolver) or FPGA (fpgaSolver) as the linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|fpga]'");
EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Use GPU (cusparseSolver or openclSolver) or FPGA (fpgaSolver) as the linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]'");
EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
EWOMS_REGISTER_PARAM(TypeTag, std::string, OpenclIluReorder, "Choose the reordering strategy for ILU for openclSolver and fpgaSolver, usage: '--opencl-ilu-reorder=[level_scheduling|graph_coloring], level_scheduling behaves like Dune and cusparse, graph_coloring is more aggressive and likely to be faster, but is random-based and generally increases the number of linear solves and linear iterations significantly.");

View File

@ -35,7 +35,7 @@
#include <opm/simulators/linalg/setupPropertyTree.hpp>
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
#endif
@ -93,7 +93,7 @@ namespace Opm
using ElementMapper = GetPropType<TypeTag, Properties::ElementMapper>;
constexpr static std::size_t pressureIndex = GetPropType<TypeTag, Properties::Indices>::pressureSwitchIdx;
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL
static const unsigned int block_size = Matrix::block_type::rows;
std::unique_ptr<BdaBridge<Matrix, Vector, block_size>> bdaBridge;
#endif
@ -130,7 +130,7 @@ namespace Opm
EWOMS_PARAM_IS_SET(TypeTag, int, LinearSolverMaxIter),
EWOMS_PARAM_IS_SET(TypeTag, int, CprMaxEllIter));
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL
{
std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
if ((simulator_.vanguard().grid().comm().size() > 1) && (accelerator_mode != "none")) {
@ -150,7 +150,7 @@ namespace Opm
}
#else
if (EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode) != "none") {
OPM_THROW(std::logic_error,"Cannot use accelerated solver since neither CUDA nor OpenCL were found by cmake and FPGA was not enabled");
OPM_THROW(std::logic_error,"Cannot use accelerated solver since CUDA, OpenCL and amgcl were not found by cmake and FPGA was not enabled");
}
#endif
extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_);
@ -257,17 +257,20 @@ namespace Opm
// Use GPU if: available, chosen by user, and successful.
// Use FPGA if: support compiled, chosen by user, and successful.
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA
#if HAVE_CUDA || HAVE_OPENCL || HAVE_FPGA || HAVE_AMGCL
bool use_gpu = bdaBridge->getUseGpu();
bool use_fpga = bdaBridge->getUseFpga();
if (use_gpu || use_fpga) {
const std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
WellContributions wellContribs(accelerator_mode);
WellContributions wellContribs(accelerator_mode, useWellConn_);
bdaBridge->initWellContributions(wellContribs);
// the WellContributions can only be applied separately with CUDA or OpenCL, not with an FPGA or amgcl
#if HAVE_CUDA || HAVE_OPENCL
if (!useWellConn_) {
simulator_.problem().wellModel().getWellContributions(wellContribs);
}
#endif
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), *rhs_, wellContribs, result);

View File

@ -38,8 +38,9 @@
#include <opm/simulators/linalg/bda/FPGASolverBackend.hpp>
#endif
#define PRINT_TIMERS_BRIDGE 0
#if HAVE_AMGCL
#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
#endif
typedef Dune::InverseOperatorResult InverseOperatorResult;
@ -59,7 +60,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
[[maybe_unused]] unsigned int platformID,
unsigned int deviceID,
[[maybe_unused]] std::string opencl_ilu_reorder)
: accelerator_mode(accelerator_mode_)
: verbosity(linear_solver_verbosity), accelerator_mode(accelerator_mode_)
{
if (accelerator_mode.compare("cusparse") == 0) {
#if HAVE_CUDA
@ -103,12 +104,19 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
backend.reset(new bda::FpgaSolverBackend<block_size>(fpga_bitstream, linear_solver_verbosity, maxit, tolerance, ilu_reorder));
#else
OPM_THROW(std::logic_error, "Error fpgaSolver was chosen, but FPGA was not enabled by CMake");
#endif
} else if (accelerator_mode.compare("amgcl") == 0) {
#if HAVE_AMGCL
use_gpu = true; // should be replaced by a 'use_bridge' boolean
backend.reset(new bda::amgclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
#else
OPM_THROW(std::logic_error, "Error amgclSolver was chosen, but amgcl was not found by CMake");
#endif
} else if (accelerator_mode.compare("none") == 0) {
use_gpu = false;
use_fpga = false;
} else {
OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|fpga]");
OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|fpga|amgcl]");
}
}
@ -197,40 +205,30 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
const int nnz = nnzb * dim * dim;
if (dim != 3) {
OpmLog::warning("cusparseSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
use_gpu = false;
OpmLog::warning("BdaSolver only accepts blocksize = 3 at this time, will use Dune for the remainder of the program");
use_gpu = use_fpga = false;
return;
}
if (h_rows.capacity() == 0) {
h_rows.reserve(Nb+1);
h_cols.reserve(nnzb);
#if PRINT_TIMERS_BRIDGE
Dune::Timer t;
#endif
getSparsityPattern(*mat, h_rows, h_cols);
#if PRINT_TIMERS_BRIDGE
std::ostringstream out;
out << "getSparsityPattern() took: " << t.stop() << " s";
OpmLog::info(out.str());
#endif
}
#if PRINT_TIMERS_BRIDGE
Dune::Timer t_zeros;
int numZeros = checkZeroDiagonal(*mat);
std::ostringstream out;
out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
OpmLog::info(out.str());
#else
checkZeroDiagonal(*mat);
#endif
if (verbosity >= 2) {
std::ostringstream out;
out << "Checking zeros took: " << t_zeros.stop() << " s, found " << numZeros << " zeros";
OpmLog::info(out.str());
}
/////////////////////////
// actually solve
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, the chosen BdaSolver is expected to perform undefined behaviour
SolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
switch(status) {
case SolverStatus::BDA_SOLVER_SUCCESS:

View File

@ -45,6 +45,7 @@ template <class BridgeMatrix, class BridgeVector, int block_size>
class BdaBridge
{
private:
int verbosity = 0;
bool use_gpu = false;
bool use_fpga = false;
std::string accelerator_mode;

View File

@ -28,7 +28,7 @@
namespace Opm
{
WellContributions::WellContributions(std::string accelerator_mode){
WellContributions::WellContributions(std::string accelerator_mode, bool useWellConn){
if(accelerator_mode.compare("cusparse") == 0){
cuda_gpu = true;
}
@ -38,6 +38,11 @@ WellContributions::WellContributions(std::string accelerator_mode){
else if(accelerator_mode.compare("fpga") == 0){
// unused for FPGA, but must be defined to avoid error
}
else if(accelerator_mode.compare("amgcl") == 0){
if (!useWellConn) {
OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true");
}
}
else{
OPM_THROW(std::logic_error, "Invalid accelerator mode");
}

View File

@ -173,7 +173,9 @@ public:
void alloc();
/// Create a new WellContributions
WellContributions(std::string accelerator_mode);
/// \param[in] accelerator_mode string indicating which solverBackend is used
/// \param[in] useWellConn true iff wellcontributions are added to the matrix
WellContributions(std::string accelerator_mode, bool useWellConn);
/// Destroy a WellContributions, and free memory
~WellContributions();

View File

@ -0,0 +1,383 @@
/*
Copyright 2020 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <sstream>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <dune/common/timer.hh>
#include <opm/material/common/Unused.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
#include <boost/property_tree/json_parser.hpp>
#if HAVE_VEXCL
#include <amgcl/backend/vexcl.hpp>
#include <amgcl/backend/vexcl_static_matrix.hpp>
#endif
namespace bda
{
using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
amgclSolverBackend<block_size>::amgclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {}
template <unsigned int block_size>
amgclSolverBackend<block_size>::~amgclSolverBackend() {}
template <unsigned int block_size>
void amgclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim) {
this->N = N_;
this->nnz = nnz_;
this->nnzb = nnz_ / block_size / block_size;
Nb = (N + dim - 1) / dim;
std::ostringstream out;
out << "Initializing amgclSolverBackend, matrix size: " << N << " blocks, nnzb: " << nnzb << "\n";
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
out << "DeviceID: " << deviceID << "\n";
OpmLog::info(out.str());
out.str("");
out.clear();
A_vals.resize(nnz);
A_cols.resize(nnz);
A_rows.resize(N + 1);
rhs.resize(N);
x.resize(N);
// try to read amgcl parameters via json file
std::string filename = "amgcl_options.json";
std::ifstream file(filename);
std::string backend_type_string;
if (file.is_open()) { // if file exists, read parameters from file
try {
boost::property_tree::read_json(file, prm);
} catch (boost::property_tree::json_parser::json_parser_error e) {
OPM_THROW(std::logic_error, "Error cannot parse json file '" + filename + "'");
}
// the prm.get reads data from the file, with default values if not specified
// the prm.put puts the data in the property_tree, so it gets printed
backend_type_string = prm.get("backend_type", "cpu");
prm.put("backend_type", backend_type_string);
std::string t1 = prm.get("precond.class", "relaxation");
prm.put("precond.class", t1);
t1 = prm.get("precond.type", "ilu0");
prm.put("precond.type", t1);
double t2 = prm.get("precond.damping", 0.9);
prm.put("precond.damping", t2);
t1 = prm.get("solver.type", "bicgstab");
prm.put("solver.type", t1);
t2 = prm.get("solver.tol", tolerance);
prm.put("solver.tol", t2);
int t3 = prm.get("solver.maxiter", maxit);
prm.put("solver.maxiter", t3);
bool t4 = prm.get("solver.verbose", verbosity >= 2);
prm.put("solver.verbose", t4);
out << "Using parameters from " << filename << " (with default values for omitted parameters):\n";
} else { // otherwise use default parameters, same as Dune
prm.put("backend_type", "cpu"); // put it in the tree so it gets printed
prm.put("precond.class", "relaxation");
prm.put("precond.type", "ilu0");
prm.put("precond.damping", 0.9);
prm.put("solver.type", "bicgstab");
prm.put("solver.tol", tolerance);
prm.put("solver.maxiter", maxit);
prm.put("solver.verbose", verbosity >= 2);
backend_type_string = prm.get("backend_type", "cpu");
out << "Using default amgcl parameters:\n";
}
boost::property_tree::write_json(out, prm); // print amgcl parameters
prm.erase("backend_type"); // delete custom parameter, otherwise amgcl prints a warning
if (backend_type_string == "cpu") {
backend_type = Amgcl_backend_type::cpu;
} else if (backend_type_string == "cuda") {
backend_type = Amgcl_backend_type::cuda;
} else if (backend_type_string == "vexcl") {
backend_type = Amgcl_backend_type::vexcl;
} else {
OPM_THROW(std::logic_error, "Error unknown value for amgcl parameter 'backend_type', use [cpu|cuda|vexcl]");
}
if (backend_type == Amgcl_backend_type::cuda) {
#if !HAVE_CUDA
OPM_THROW(std::logic_error, "Error amgcl is trying to use CUDA, but CUDA was not found by CMake");
#endif
}
if (backend_type == Amgcl_backend_type::vexcl) {
#if !HAVE_VEXCL
OPM_THROW(std::logic_error, "Error amgcl is trying to use VexCL, but VexCL was not found by CMake");
#endif
}
OpmLog::info(out.str());
initialized = true;
} // end initialize()
template <unsigned int block_size>
void amgclSolverBackend<block_size>::convert_sparsity_pattern(int *rows, int *cols) {
Timer t;
const unsigned int bs = block_size;
int idx = 0; // indicates the unblocked write index
A_rows[0] = 0;
for (int row = 0; row < Nb; ++row) {
int rowStart = rows[row];
int rowEnd = rows[row+1];
for (unsigned r = 0; r < bs; ++r) {
for (int ij = rowStart; ij < rowEnd; ++ij) {
for (unsigned c = 0; c < bs; ++c) {
A_cols[idx] = cols[ij] * bs + c;
idx++;
}
}
A_rows[row*bs + r + 1] = idx;
}
}
if (verbosity >= 3) {
std::ostringstream out;
out << "amgclSolverBackend::convert_sparsity_pattern(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end convert_sparsity_pattern()
template <unsigned int block_size>
void amgclSolverBackend<block_size>::convert_data(double *vals, int *rows) {
Timer t;
const unsigned int bs = block_size;
int idx = 0; // indicates the unblocked write index
for (int row = 0; row < Nb; ++row) {
int rowStart = rows[row];
int rowEnd = rows[row+1];
for (unsigned r = 0; r < bs; ++r) {
for (int ij = rowStart; ij < rowEnd; ++ij) {
for (unsigned c = 0; c < bs; ++c) {
A_vals[idx] = vals[ij*bs*bs + r*bs + c];
idx++;
}
}
}
}
if (verbosity >= 3) {
std::ostringstream out;
out << "amgclSolverBackend::convert_data(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end convert_data()
#if HAVE_VEXCL
void initialize_vexcl(std::vector<cl::CommandQueue>& ctx, unsigned int platformID, unsigned int deviceID) {
std::vector<cl::Platform> platforms;
std::vector<cl::Device> devices;
cl::Platform::get(&platforms);
if (platforms.size() <= platformID) {
OPM_THROW(std::logic_error, "Error chosen too high OpenCL platform ID");
}
std::string platform_name, device_name;
platforms[platformID].getInfo(CL_PLATFORM_NAME, &platform_name);
platforms[platformID].getDevices(CL_DEVICE_TYPE_ALL, &devices);
if (devices.size() <= deviceID){
OPM_THROW(std::logic_error, "Error chosen too high OpenCL device ID");
}
devices[deviceID].getInfo(CL_DEVICE_NAME, &device_name);
cl::Context c(devices[deviceID]);
cl::CommandQueue q(c, devices[deviceID]);
ctx.push_back(q);
std::ostringstream out;
out << "Using VexCL on " << device_name << " (" << platform_name << ")\n";
OpmLog::info(out.str());
}
template <typename vexcl_matrix_type, typename vexcl_vector_type, unsigned int block_size>
void solve_vexcl(
const auto& A,
const boost::property_tree::ptree prm,
const std::vector<cl::CommandQueue>& ctx,
double *b,
std::vector<double>& x,
const int N,
int& iters,
double& error)
{
typedef amgcl::backend::vexcl<vexcl_matrix_type> Backend;
typedef amgcl::make_solver<amgcl::runtime::preconditioner<Backend>, amgcl::runtime::solver::wrapper<Backend> > Solver;
typename Solver::backend_params bprm;
bprm.q = ctx; // set vexcl context
Solver solve(A, prm, bprm); // create solver
auto b_ptr = reinterpret_cast<vexcl_vector_type*>(b);
auto x_ptr = reinterpret_cast<vexcl_vector_type*>(x.data());
vex::vector<vexcl_vector_type> B(ctx, N / block_size, b_ptr);
vex::vector<vexcl_vector_type> X(ctx, N / block_size, x_ptr);
std::tie(iters, error) = solve(B, X); // actually perform solve
vex::copy(X, x_ptr);
}
#endif
template <unsigned int block_size>
void amgclSolverBackend<block_size>::solve_system(double *b, BdaResult &res) {
Timer t;
try {
if (backend_type == Amgcl_backend_type::cuda) { // use CUDA
#if HAVE_CUDA
solve_cuda(b);
#endif
} else if (backend_type == Amgcl_backend_type::cpu) { // use builtin backend (CPU)
// create matrix object
auto Atmp = std::tie(N, A_rows, A_cols, A_vals);
auto A = amgcl::adapter::block_matrix<dmat_type>(Atmp);
// create solver and construct preconditioner
// don't reuse this unless the preconditioner can be reused
CPU_Solver solve(A, prm);
// print solver structure (once)
std::call_once(print_info, [&](){
std::ostringstream out;
out << solve << std::endl;
OpmLog::info(out.str());
});
// reset x vector
std::fill(x.begin(), x.end(), 0.0);
// create blocked vectors
auto b_ptr = reinterpret_cast<dvec_type*>(b);
auto x_ptr = reinterpret_cast<dvec_type*>(x.data());
auto B = amgcl::make_iterator_range(b_ptr, b_ptr + N / block_size);
auto X = amgcl::make_iterator_range(x_ptr, x_ptr + N / block_size);
// actually solve
std::tie(iters, error) = solve(B, X);
} else if (backend_type == Amgcl_backend_type::vexcl) {
#if HAVE_VEXCL
static std::vector<cl::CommandQueue> ctx; // using CommandQueue directly instead of vex::Context
std::call_once(vexcl_initialize, [&](){
initialize_vexcl(ctx, platformID, deviceID);
});
if constexpr(block_size == 1){
auto A = std::tie(N, A_rows, A_cols, A_vals);
solve_vexcl<double, double, block_size>(A, prm, ctx, b, x, N, iters, error);
} else {
// allow vexcl to use blocked matrices
vex::scoped_program_header h1(ctx, amgcl::backend::vexcl_static_matrix_declaration<double, block_size>());
auto Atmp = std::tie(N, A_rows, A_cols, A_vals);
auto A = amgcl::adapter::block_matrix<dmat_type>(Atmp);
solve_vexcl<dmat_type, dvec_type, block_size>(A, prm, ctx, b, x, N, iters, error);
}
#endif
}
} catch (const std::exception& ex) {
std::cerr << "Caught exception: " << ex.what() << std::endl;
throw ex;
}
double time_elapsed = t.stop();
res.iterations = iters;
res.reduction = 0.0;
res.elapsed = time_elapsed;
res.converged = (iters != maxit);
if (verbosity >= 1) {
std::ostringstream out;
out << "=== converged: " << res.converged << ", time: " << res.elapsed << \
", time per iteration: " << res.elapsed / iters << ", iterations: " << iters;
OpmLog::info(out.str());
}
if (verbosity >= 3) {
std::ostringstream out;
out << "amgclSolverBackend::solve_system(): " << time_elapsed << " s";
OpmLog::info(out.str());
}
} // end solve_system()
// copy result to host memory
// caller must be sure that x is a valid array
template <unsigned int block_size>
void amgclSolverBackend<block_size>::get_result(double *x_) {
Timer t;
std::copy(x.begin(), x.end(), x_);
if (verbosity >= 3) {
std::ostringstream out;
out << "amgclSolverBackend::get_result(): " << t.stop() << " s";
OpmLog::info(out.str());
}
} // end get_result()
template <unsigned int block_size>
SolverStatus amgclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs OPM_UNUSED, BdaResult &res) {
if (initialized == false) {
initialize(N_, nnz_, dim);
convert_sparsity_pattern(rows, cols);
}
convert_data(vals, rows);
solve_system(b, res);
return SolverStatus::BDA_SOLVER_SUCCESS;
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template amgclSolverBackend<n>::amgclSolverBackend(int, int, double, unsigned int, unsigned int); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
#undef INSTANTIATE_BDA_FUNCTIONS
} // namespace bda

View File

@ -0,0 +1,89 @@
/*
Copyright 2021 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <sstream>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <amgcl/backend/cuda.hpp>
#include <amgcl/relaxation/cusparse_ilu0.hpp>
#include <opm/simulators/linalg/bda/amgclSolverBackend.hpp>
/// This file is only compiled when both amgcl and CUDA are found by CMake
namespace bda
{
using Opm::OpmLog;
template <unsigned int block_size>
void amgclSolverBackend<block_size>::solve_cuda(double *b) {
typedef amgcl::backend::cuda<double> CUDA_Backend;
typedef amgcl::make_solver<amgcl::runtime::preconditioner<CUDA_Backend>, amgcl::runtime::solver::wrapper<CUDA_Backend> > CUDA_Solver;
static typename CUDA_Backend::params CUDA_bprm; // amgcl backend parameters, only used for cusparseHandle
// initialize cusparse handle for amgcl, cannot merge this call_once with 'print solver structure'
std::call_once(cuda_initialize, [&](){
cudaDeviceProp prop;
cudaGetDeviceProperties(&prop, deviceID);
std::ostringstream out;
out << prop.name << std::endl;
OpmLog::info(out.str());
cusparseCreate(&CUDA_bprm.cusparse_handle);
});
// create matrix object
auto A = std::tie(N, A_rows, A_cols, A_vals);
// create solver and construct preconditioner
// don't reuse this unless the preconditioner can be reused
CUDA_Solver solve(A, prm, CUDA_bprm);
// print solver structure (once)
std::call_once(print_info, [&](){
std::ostringstream out;
out << solve << std::endl;
OpmLog::info(out.str());
});
thrust::device_vector<double> B(b, b + N);
thrust::device_vector<double> X(N, 0.0);
// actually solve
std::tie(iters, error) = solve(B, X);
thrust::copy(X.begin(), X.end(), x.begin());
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template void amgclSolverBackend<n>::solve_cuda(double*); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);
INSTANTIATE_BDA_FUNCTIONS(3);
INSTANTIATE_BDA_FUNCTIONS(4);
#undef INSTANTIATE_BDA_FUNCTIONS
} // namespace bda

View File

@ -0,0 +1,156 @@
/*
Copyright 2020 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef OPM_AMGCLSOLVER_BACKEND_HEADER_INCLUDED
#define OPM_AMGCLSOLVER_BACKEND_HEADER_INCLUDED
#include <mutex>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <boost/property_tree/ptree.hpp>
#include <amgcl/amg.hpp>
#include <amgcl/backend/builtin.hpp>
#include <amgcl/adapter/crs_tuple.hpp>
#include <amgcl/make_block_solver.hpp>
#include <amgcl/relaxation/as_preconditioner.hpp>
#include <amgcl/relaxation/ilu0.hpp>
#include <amgcl/solver/bicgstab.hpp>
#include <amgcl/preconditioner/runtime.hpp>
#include <amgcl/value_type/static_matrix.hpp>
namespace bda
{
/// This class does not implement a solver, but converts the BCSR format to normal CSR and uses amgcl for solving
/// Note amgcl also implements blocked solvers, but looks like it needs unblocked input data
template <unsigned int block_size>
class amgclSolverBackend : public BdaSolver<block_size>
{
typedef BdaSolver<block_size> Base;
using Base::N;
using Base::Nb;
using Base::nnz;
using Base::nnzb;
using Base::verbosity;
using Base::platformID;
using Base::deviceID;
using Base::maxit;
using Base::tolerance;
using Base::initialized;
typedef amgcl::static_matrix<double, block_size, block_size> dmat_type; // matrix value type in double precision
typedef amgcl::static_matrix<double, block_size, 1> dvec_type; // the corresponding vector value type
typedef amgcl::backend::builtin<dmat_type> CPU_Backend;
typedef amgcl::make_solver<amgcl::runtime::preconditioner<CPU_Backend>, amgcl::runtime::solver::wrapper<CPU_Backend> > CPU_Solver;
private:
// amgcl can use different backends, this lets the user choose
enum Amgcl_backend_type {
cpu,
cuda,
vexcl
};
// store matrix in CSR format
std::vector<unsigned> A_rows, A_cols;
std::vector<double> A_vals, rhs;
std::vector<double> x;
std::once_flag print_info;
Amgcl_backend_type backend_type = cpu;
boost::property_tree::ptree prm; // amgcl parameters
int iters = 0;
double error = 0.0;
#if HAVE_CUDA
std::once_flag cuda_initialize;
void solve_cuda(double *b);
#endif
#if HAVE_VEXCL
std::once_flag vexcl_initialize;
#endif
/// Initialize host memory and determine amgcl parameters
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] dim size of block
void initialize(int N, int nnz, int dim);
/// Convert the BCSR sparsity pattern to a CSR one
/// \param[in] rows array of rowPointers, contains N/dim+1 values
/// \param[in] cols array of columnIndices, contains nnz values
void convert_sparsity_pattern(int *rows, int *cols);
/// Convert the BCSR nonzero data to a CSR format
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
/// \param[in] rows array of rowPointers, contains N/dim+1 values
void convert_data(double *vals, int *rows);
/// Solve linear system
/// \param[in] b pointer to b vector
/// \param[inout] res summary of solver result
void solve_system(double *b, BdaResult &res);
public:
/// Construct a openclSolver
/// \param[in] linear_solver_verbosity verbosity of openclSolver
/// \param[in] maxit maximum number of iterations for openclSolver
/// \param[in] tolerance required relative tolerance for openclSolver
/// \param[in] platformID the OpenCL platform to be used
/// \param[in] deviceID the device to be used
amgclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
/// Destroy a openclSolver, and free memory
~amgclSolverBackend();
/// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
/// \param[in] N number of rows, divide by dim to get number of blockrows
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] nnz_prec number of nonzeroes of matrix for ILU0, divide by dim*dim to get number of blocks
/// \param[in] dim size of block
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
/// \param[in] rows array of rowPointers, contains N/dim+1 values
/// \param[in] cols array of columnIndices, contains nnz values
/// \param[in] vals_prec array of nonzeroes for preconditioner
/// \param[in] rows_prec array of rowPointers for preconditioner
/// \param[in] cols_prec array of columnIndices for preconditioner
/// \param[in] b input vector, contains N values
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
/// \return status code
SolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
/// Get result after linear solve, and peform postprocessing if necessary
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
void get_result(double *x) override;
}; // end class amgclSolverBackend
} // namespace bda
#endif

View File

@ -80,7 +80,7 @@ testCusparseSolver(const boost::property_tree::ptree& prm, const std::string& ma
Dune::InverseOperatorResult result;
Vector x(rhs.size());
Opm::WellContributions wellContribs("cusparse");
Opm::WellContributions wellContribs("cusparse", false);
std::unique_ptr<Opm::BdaBridge<Matrix, Vector, bz> > bridge;
try {
bridge = std::make_unique<Opm::BdaBridge<Matrix, Vector, bz> >(gpu_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder);

View File

@ -79,7 +79,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, const std::string& matr
Dune::InverseOperatorResult result;
Vector x(rhs.size());
Opm::WellContributions wellContribs("opencl");
Opm::WellContributions wellContribs("opencl", false);
std::unique_ptr<Opm::BdaBridge<Matrix, Vector, bz> > bridge;
try {
bridge = std::make_unique<Opm::BdaBridge<Matrix, Vector, bz> >(gpu_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder);