2020-06-22 11:26:49 -05:00
|
|
|
/*
|
|
|
|
Copyright 2020 Equinor ASA
|
|
|
|
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED
|
|
|
|
#define OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED
|
|
|
|
|
2022-02-01 09:51:32 -06:00
|
|
|
#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
2020-06-22 11:26:49 -05:00
|
|
|
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
|
|
|
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
|
|
|
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
2021-11-30 06:20:09 -06:00
|
|
|
|
|
|
|
#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>
|
2020-06-22 11:26:49 -05:00
|
|
|
|
2021-10-25 04:08:06 -05:00
|
|
|
namespace Opm
|
|
|
|
{
|
|
|
|
namespace Accelerator
|
2020-06-22 11:26:49 -05:00
|
|
|
{
|
|
|
|
|
|
|
|
/// This class implements a opencl-based ilu0-bicgstab solver on GPU
|
2020-06-24 13:09:14 -05:00
|
|
|
template <unsigned int block_size>
|
|
|
|
class openclSolverBackend : public BdaSolver<block_size>
|
2020-06-22 11:26:49 -05:00
|
|
|
{
|
2020-06-24 13:09:14 -05:00
|
|
|
typedef BdaSolver<block_size> Base;
|
|
|
|
|
|
|
|
using Base::N;
|
|
|
|
using Base::Nb;
|
|
|
|
using Base::nnz;
|
|
|
|
using Base::nnzb;
|
|
|
|
using Base::verbosity;
|
2020-07-01 12:43:22 -05:00
|
|
|
using Base::platformID;
|
|
|
|
using Base::deviceID;
|
2020-06-24 13:09:14 -05:00
|
|
|
using Base::maxit;
|
|
|
|
using Base::tolerance;
|
|
|
|
using Base::initialized;
|
|
|
|
|
2020-06-22 11:26:49 -05:00
|
|
|
private:
|
2022-09-22 03:15:23 -05:00
|
|
|
double *h_b = nullptr; // b vector, on host
|
2021-12-01 05:00:20 -06:00
|
|
|
std::vector<double> vals_contiguous; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
// OpenCL variables must be reusable, they are initialized in initialize()
|
2022-09-27 08:55:26 -05:00
|
|
|
cl::Buffer d_Avals, d_Acols, d_Arows; // matrix in BSR format on GPU
|
2020-06-22 11:26:49 -05:00
|
|
|
cl::Buffer d_x, d_b, d_rb, d_r, d_rw, d_p; // vectors, used during linear solve
|
|
|
|
cl::Buffer d_pw, d_s, d_t, d_v; // vectors, used during linear solve
|
|
|
|
cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
|
|
|
|
|
2020-11-11 11:34:10 -06:00
|
|
|
std::vector<cl::Device> devices;
|
2020-06-22 11:26:49 -05:00
|
|
|
|
2022-02-23 09:28:37 -06:00
|
|
|
bool useJacMatrix = false;
|
2022-04-21 10:18:54 -05:00
|
|
|
|
2021-11-30 06:20:09 -06:00
|
|
|
std::unique_ptr<Preconditioner<block_size> > prec;
|
2021-11-25 06:40:30 -06:00
|
|
|
// can perform blocked ILU0 and AMG on pressure component
|
2021-12-01 04:43:30 -06:00
|
|
|
bool is_root; // allow for nested solvers, the root solver is called by BdaBridge
|
2020-11-11 11:34:10 -06:00
|
|
|
bool analysis_done = false;
|
2022-02-07 06:39:38 -06:00
|
|
|
std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix
|
|
|
|
std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner
|
2022-09-27 08:55:26 -05:00
|
|
|
bool opencl_ilu_parallel; // parallelize ILU operations (with level_scheduling)
|
2021-03-03 07:04:06 -06:00
|
|
|
std::vector<cl::Event> events;
|
|
|
|
cl_int err;
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
/// Divide A by B, and round up: return (int)ceil(A/B)
|
|
|
|
/// \param[in] A dividend
|
|
|
|
/// \param[in] B divisor
|
|
|
|
/// \return rounded division result
|
|
|
|
unsigned int ceilDivision(const unsigned int A, const unsigned int B);
|
|
|
|
|
|
|
|
/// Calculate dot product between in1 and in2, partial sums are stored in out, which are summed on CPU
|
|
|
|
/// \param[in] in1 input vector 1
|
|
|
|
/// \param[in] in2 input vector 2
|
|
|
|
/// \param[out] out output vector containing partial sums
|
|
|
|
/// \return dot product
|
|
|
|
double dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out);
|
|
|
|
|
|
|
|
/// Calculate the norm of in, partial sums are stored in out, which are summed on the CPU
|
|
|
|
/// Equal to Dune::DenseVector::two_norm()
|
|
|
|
/// \param[in] in input vector
|
|
|
|
/// \param[out] out output vector containing partial sums
|
|
|
|
/// \return norm
|
|
|
|
double norm_w(cl::Buffer in, cl::Buffer out);
|
|
|
|
|
|
|
|
/// Perform axpy: out += a * in
|
|
|
|
/// \param[in] in input vector
|
|
|
|
/// \param[in] a scalar value to multiply input vector
|
|
|
|
/// \param[inout] out output vector
|
|
|
|
void axpy_w(cl::Buffer in, const double a, cl::Buffer out);
|
|
|
|
|
2021-09-24 05:59:22 -05:00
|
|
|
/// Perform scale: vec *= a
|
|
|
|
/// \param[inout] vec vector to scale
|
|
|
|
/// \param[in] a scalar value to multiply vector
|
|
|
|
void scale_w(cl::Buffer vec, const double a);
|
|
|
|
|
2020-06-22 11:26:49 -05:00
|
|
|
/// Custom function that combines scale, axpy and add functions in bicgstab
|
|
|
|
/// p = (p - omega * v) * beta + r
|
|
|
|
/// \param[inout] p output vector
|
|
|
|
/// \param[in] v input vector
|
|
|
|
/// \param[in] r input vector
|
|
|
|
/// \param[in] omega scalar value
|
|
|
|
/// \param[in] beta scalar value
|
|
|
|
void custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta);
|
|
|
|
|
|
|
|
/// Sparse matrix-vector multiply, spmv
|
|
|
|
/// b = A * x
|
|
|
|
/// Matrix A, must be in BCRS format
|
|
|
|
/// \param[in] vals nnzs of matrix A
|
|
|
|
/// \param[in] cols columnindices of matrix A
|
|
|
|
/// \param[in] rows rowpointers of matrix A
|
|
|
|
/// \param[in] x input vector
|
|
|
|
/// \param[out] b output vector
|
|
|
|
void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);
|
|
|
|
|
|
|
|
/// Solve linear system using ilu0-bicgstab
|
|
|
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
|
|
|
/// \param[inout] res summary of solver result
|
2020-11-11 11:34:10 -06:00
|
|
|
void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
/// Initialize GPU and allocate memory
|
2022-02-07 06:39:38 -06:00
|
|
|
/// \param[in] matrix matrix A
|
|
|
|
/// \param[in] jacMatrix matrix for preconditioner
|
2022-02-23 09:28:37 -06:00
|
|
|
void initialize(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);
|
2022-04-21 10:18:32 -05:00
|
|
|
|
2020-06-22 11:26:49 -05:00
|
|
|
/// Clean memory
|
|
|
|
void finalize();
|
|
|
|
|
|
|
|
/// Copy linear system to GPU
|
2020-11-11 11:34:10 -06:00
|
|
|
void copy_system_to_gpu();
|
2020-09-07 11:58:48 -05:00
|
|
|
|
2022-09-27 08:55:26 -05:00
|
|
|
/// Reassign pointers, in case the addresses of the Dune variables have changed
|
2020-06-22 11:26:49 -05:00
|
|
|
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
|
|
|
/// \param[in] b input vectors, contains N values
|
2022-09-27 08:55:26 -05:00
|
|
|
void update_system(double *vals, double *b);
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
/// Update linear system on GPU, don't copy rowpointers and colindices, they stay the same
|
2020-11-11 11:34:10 -06:00
|
|
|
void update_system_on_gpu();
|
2020-06-22 11:26:49 -05:00
|
|
|
|
2021-11-30 06:20:09 -06:00
|
|
|
/// Analyze sparsity pattern to extract parallelism
|
2020-06-22 11:26:49 -05:00
|
|
|
/// \return true iff analysis was successful
|
2021-11-30 06:20:09 -06:00
|
|
|
bool analyze_matrix();
|
2020-06-22 11:26:49 -05:00
|
|
|
|
2022-09-27 08:55:26 -05:00
|
|
|
/// Create the preconditioner, only done once per linear solve
|
2020-06-22 11:26:49 -05:00
|
|
|
/// \return true iff decomposition was successful
|
|
|
|
bool create_preconditioner();
|
|
|
|
|
|
|
|
/// Solve linear system
|
|
|
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
2022-09-27 08:55:26 -05:00
|
|
|
/// could be empty
|
2020-06-22 11:26:49 -05:00
|
|
|
/// \param[inout] res summary of solver result
|
2020-11-11 11:34:10 -06:00
|
|
|
void solve_system(WellContributions &wellContribs, BdaResult &res);
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
public:
|
2020-11-11 11:34:10 -06:00
|
|
|
std::shared_ptr<cl::Context> context;
|
|
|
|
std::shared_ptr<cl::CommandQueue> queue;
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
/// Construct a openclSolver
|
|
|
|
/// \param[in] linear_solver_verbosity verbosity of openclSolver
|
|
|
|
/// \param[in] maxit maximum number of iterations for openclSolver
|
|
|
|
/// \param[in] tolerance required relative tolerance for openclSolver
|
2020-07-01 12:43:22 -05:00
|
|
|
/// \param[in] platformID the OpenCL platform to be used
|
|
|
|
/// \param[in] deviceID the device to be used
|
2022-09-27 08:55:26 -05:00
|
|
|
/// \param[in] opencl_ilu_parallel whether to parallelize the ILU decomposition and application in OpenCL with level_scheduling
|
2022-09-21 01:57:43 -05:00
|
|
|
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
|
2022-02-02 05:06:40 -06:00
|
|
|
/// only ilu0, cpr_quasiimpes and isai are supported
|
2021-12-01 04:43:30 -06:00
|
|
|
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID,
|
2022-09-27 08:54:19 -05:00
|
|
|
bool opencl_ilu_parallel, std::string linsolver);
|
2021-12-01 04:43:30 -06:00
|
|
|
|
|
|
|
/// For the CPR coarse solver
|
2022-09-27 08:54:19 -05:00
|
|
|
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, bool opencl_ilu_parallel);
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
/// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
|
2022-02-07 06:39:38 -06:00
|
|
|
/// \param[in] matrix matrix A
|
2020-06-22 11:26:49 -05:00
|
|
|
/// \param[in] b input vector, contains N values
|
2022-02-23 09:28:37 -06:00
|
|
|
/// \param[in] jacMatrix matrix for preconditioner
|
2020-06-22 11:26:49 -05:00
|
|
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
|
|
|
/// \param[inout] res summary of solver result
|
|
|
|
/// \return status code
|
2022-02-23 09:28:37 -06:00
|
|
|
SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b,
|
|
|
|
std::shared_ptr<BlockedMatrix> jacMatrix, WellContributions& wellContribs, BdaResult &res) override;
|
2022-04-21 10:18:32 -05:00
|
|
|
|
2021-12-01 04:43:30 -06:00
|
|
|
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
|
|
|
/// Data is already on the GPU
|
|
|
|
// SolverStatus solve_system(BdaResult &res);
|
|
|
|
|
2020-06-22 11:26:49 -05:00
|
|
|
/// Get result after linear solve, and peform postprocessing if necessary
|
|
|
|
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
|
|
|
void get_result(double *x) override;
|
|
|
|
|
2021-12-01 04:43:30 -06:00
|
|
|
/// Set OpenCL objects
|
|
|
|
/// This class either creates them based on platformID and deviceID or receives them through this function
|
|
|
|
/// \param[in] context the opencl context to be used
|
|
|
|
/// \param[in] queue the opencl queue to be used
|
|
|
|
void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
|
2022-04-21 10:18:32 -05:00
|
|
|
|
2020-06-22 11:26:49 -05:00
|
|
|
}; // end class openclSolverBackend
|
|
|
|
|
2021-10-25 04:08:06 -05:00
|
|
|
} // namespace Accelerator
|
|
|
|
} // namespace Opm
|
2020-06-22 11:26:49 -05:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|