opm-simulators/opm/simulators/linalg/bda/opencl/openclSolverBackend.hpp

/*
  Copyright 2020 Equinor ASA

  This file is part of the Open Porous Media project (OPM).

  OPM is free software: you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation, either version 3 of the License, or
  (at your option) any later version.

  OPM is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.

  You should have received a copy of the GNU General Public License
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
*/

#ifndef OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED
#define OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED

#include <opm/simulators/linalg/bda/opencl/opencl.hpp>
#include <opm/simulators/linalg/bda/BdaResult.hpp>
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>

#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>

namespace Opm
{
namespace Accelerator
{

/// This class implements a opencl-based ilu0-bicgstab solver on GPU
template <unsigned int block_size>
class openclSolverBackend : public BdaSolver<block_size>
{
    typedef BdaSolver<block_size> Base;

    using Base::N;
    using Base::Nb;
    using Base::nnz;
    using Base::nnzb;
    using Base::verbosity;
    using Base::platformID;
    using Base::deviceID;
    using Base::maxit;
    using Base::tolerance;
    using Base::initialized;

private:
    double *rb = nullptr;                 // reordered b vector, if the matrix is reordered, rb is newly allocated, otherwise it just points to b
    std::vector<double> vals_contiguous;  // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp

    // OpenCL variables must be reusable, they are initialized in initialize()
    cl::Buffer d_Avals, d_Acols, d_Arows;        // (reordered) matrix in BSR format on GPU
    cl::Buffer d_x, d_b, d_rb, d_r, d_rw, d_p;   // vectors, used during linear solve
    cl::Buffer d_pw, d_s, d_t, d_v;              // vectors, used during linear solve
    cl::Buffer d_tmp;                            // used as tmp GPU buffer for dot() and norm()
    cl::Buffer d_toOrder;                        // only used when reordering is used

    std::vector<cl::Device> devices;

    int jac_nnz;
    int jac_nnzb;
    bool blockJacVersion = false;

    std::unique_ptr<Preconditioner<block_size> > prec;
                                                                  // can perform blocked ILU0 and AMG on pressure component
    bool is_root;                                                 // allow for nested solvers, the root solver is called by BdaBridge
    int *toOrder = nullptr, *fromOrder = nullptr;                 // BILU0 reorders rows of the matrix via these mappings
    bool analysis_done = false;
    std::shared_ptr<BlockedMatrix> mat = nullptr;                 // original matrix
    std::shared_ptr<BlockedMatrix> jacMat = nullptr;              // matrix for preconditioner
    BlockedMatrix *rmat = nullptr;                                // reordered matrix (or original if no reordering), used for spmv
    ILUReorder opencl_ilu_reorder;                                // reordering strategy
    std::vector<cl::Event> events;
    cl_int err;

    /// Divide A by B, and round up: return (int)ceil(A/B)
    /// \param[in] A    dividend
    /// \param[in] B    divisor
    /// \return         rounded division result
    unsigned int ceilDivision(const unsigned int A, const unsigned int B);

    /// Calculate dot product between in1 and in2, partial sums are stored in out, which are summed on CPU
    /// \param[in] in1           input vector 1
    /// \param[in] in2           input vector 2
    /// \param[out] out          output vector containing partial sums
    /// \return                  dot product
    double dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out);

    /// Calculate the norm of in, partial sums are stored in out, which are summed on the CPU
    /// Equal to Dune::DenseVector::two_norm()
    /// \param[in] in          input vector
    /// \param[out] out        output vector containing partial sums
    /// \return                norm
    double norm_w(cl::Buffer in, cl::Buffer out);

    /// Perform axpy: out += a * in
    /// \param[in] in         input vector
    /// \param[in] a          scalar value to multiply input vector
    /// \param[inout] out     output vector
    void axpy_w(cl::Buffer in, const double a, cl::Buffer out);

    /// Perform scale: vec *= a
    /// \param[inout] vec     vector to scale
    /// \param[in] a          scalar value to multiply vector
    void scale_w(cl::Buffer vec, const double a);

    /// Custom function that combines scale, axpy and add functions in bicgstab
    /// p = (p - omega * v) * beta + r
    /// \param[inout] p      output vector
    /// \param[in] v         input vector
    /// \param[in] r         input vector
    /// \param[in] omega     scalar value
    /// \param[in] beta      scalar value
    void custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta);

    /// Sparse matrix-vector multiply, spmv
    /// b = A * x
    /// Matrix A, must be in BCRS format
    /// \param[in] vals     nnzs of matrix A
    /// \param[in] cols     columnindices of matrix A
    /// \param[in] rows     rowpointers of matrix A
    /// \param[in] x        input vector
    /// \param[out] b       output vector
    void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);

    /// Solve linear system using ilu0-bicgstab
    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
    /// \param[inout] res         summary of solver result
    void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);

    /// Initialize GPU and allocate memory
    /// \param[in] matrix     matrix A
    /// \param[in] jacMatrix  matrix for preconditioner
    void initialize(std::shared_ptr<BlockedMatrix> matrix);
    void initialize2(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);

    /// Clean memory
    void finalize();

    /// Copy linear system to GPU
    void copy_system_to_gpu();

    /// Reorder the linear system so it corresponds with the coloring
    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
    /// \param[in] b              input vectors, contains N values
    /// \param[out] wellContribs  WellContributions, to set reordering
    void update_system(double *vals, double *b, WellContributions &wellContribs);

    /// Update linear system on GPU, don't copy rowpointers and colindices, they stay the same
    void update_system_on_gpu();

    /// Analyze sparsity pattern to extract parallelism
    /// \return true iff analysis was successful
    bool analyze_matrix();

    /// Perform ilu0-decomposition
    /// \return true iff decomposition was successful
    bool create_preconditioner();

    /// Solve linear system
    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
    /// \param[inout] res         summary of solver result
    void solve_system(WellContributions &wellContribs, BdaResult &res);

public:
    std::shared_ptr<cl::Context> context;
    std::shared_ptr<cl::CommandQueue> queue;

    /// Construct a openclSolver
    /// \param[in] linear_solver_verbosity    verbosity of openclSolver
    /// \param[in] maxit                      maximum number of iterations for openclSolver
    /// \param[in] tolerance                  required relative tolerance for openclSolver
    /// \param[in] platformID                 the OpenCL platform to be used
    /// \param[in] deviceID                   the device to be used
    /// \param[in] opencl_ilu_reorder         select either level_scheduling or graph_coloring, see Reorder.hpp for explanation
    /// \param[in] linsolver                  indicating the preconditioner, equal to the --linsolver cmdline argument
    ///                                       only ilu0, cpr_quasiimpes and isai are supported
    openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID,
        ILUReorder opencl_ilu_reorder, std::string linsolver);

    /// For the CPR coarse solver
    openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);

    /// Destroy a openclSolver, and free memory
    ~openclSolverBackend();

    /// Solve linear system, A*x = b, matrix A must be in blocked-CSR format
    /// \param[in] matrix         matrix A
    /// \param[in] b              input vector, contains N values
    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
    /// \param[inout] res         summary of solver result
    /// \return                   status code
    SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b, WellContributions& wellContribs, BdaResult &res) override;

    SolverStatus solve_system2(std::shared_ptr<BlockedMatrix> matrix, double *b,
                               std::shared_ptr<BlockedMatrix> jacMatrix,
                               WellContributions& wellContribs, BdaResult &res) override;

    /// Solve scalar linear system, for example a coarse system of an AMG preconditioner
    /// Data is already on the GPU
    // SolverStatus solve_system(BdaResult &res);

    /// Get result after linear solve, and peform postprocessing if necessary
    /// \param[inout] x          resulting x vector, caller must guarantee that x points to a valid array
    void get_result(double *x) override;

    /// Set OpenCL objects
    /// This class either creates them based on platformID and deviceID or receives them through this function
    /// \param[in] context   the opencl context to be used
    /// \param[in] queue     the opencl queue to be used
    void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);
    
}; // end class openclSolverBackend

} // namespace Accelerator
} // namespace Opm

#endif
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`/*`
			`Copyright 2020 Equinor ASA`

			`This file is part of the Open Porous Media project (OPM).`

			`OPM is free software: you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation, either version 3 of the License, or`
			`(at your option) any later version.`

			`OPM is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with OPM. If not, see <http://www.gnu.org/licenses/>.`
			`*/`

			`#ifndef OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED`
			`#define OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED`

Move opencl files to separate folder 2022-02-01 09:51:32 -06:00			`#include <opm/simulators/linalg/bda/opencl/opencl.hpp>`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`#include <opm/simulators/linalg/bda/BdaResult.hpp>`
			`#include <opm/simulators/linalg/bda/BdaSolver.hpp>`
Renamed parameter to OpenclIluReorder and moved error checking to BdaBridge constructor. 2020-10-16 08:05:02 -05:00			`#include <opm/simulators/linalg/bda/ILUReorder.hpp>`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`#include <opm/simulators/linalg/bda/WellContributions.hpp>`
Add Preconditioner, superclass of BILU0 and CPR 2021-11-30 06:20:09 -06:00
			`#include <opm/simulators/linalg/bda/opencl/Preconditioner.hpp>`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
Rename namespace bda to Opm::Accelerator 2021-10-25 04:08:06 -05:00			`namespace Opm`
			`{`
			`namespace Accelerator`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`{`

			`/// This class implements a opencl-based ilu0-bicgstab solver on GPU`
Added block_size template to BdaSolvers and BILU0 2020-06-24 13:09:14 -05:00			`template <unsigned int block_size>`
			`class openclSolverBackend : public BdaSolver<block_size>`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`{`
Added block_size template to BdaSolvers and BILU0 2020-06-24 13:09:14 -05:00			`typedef BdaSolver<block_size> Base;`

			`using Base::N;`
			`using Base::Nb;`
			`using Base::nnz;`
			`using Base::nnzb;`
			`using Base::verbosity;`
Changed platformID and deviceID to commandline parameter. Make sure Flow exits cleanly upon invalid platform- or deviceID. 2020-07-01 12:43:22 -05:00			`using Base::platformID;`
			`using Base::deviceID;`
Added block_size template to BdaSolvers and BILU0 2020-06-24 13:09:14 -05:00			`using Base::maxit;`
			`using Base::tolerance;`
			`using Base::initialized;`

Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`private:`
Reordering is now actually skipped when NONE is chosen. Also made reordering optional for openclSolver. 2021-01-11 09:51:05 -06:00			`double *rb = nullptr; // reordered b vector, if the matrix is reordered, rb is newly allocated, otherwise it just points to b`
Change C to C++ Also bugfix wrong size when COPY_ROW_BY_ROW is active 2021-12-01 05:00:20 -06:00			`std::vector<double> vals_contiguous; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`// OpenCL variables must be reusable, they are initialized in initialize()`
			`cl::Buffer d_Avals, d_Acols, d_Arows; // (reordered) matrix in BSR format on GPU`
			`cl::Buffer d_x, d_b, d_rb, d_r, d_rw, d_p; // vectors, used during linear solve`
			`cl::Buffer d_pw, d_s, d_t, d_v; // vectors, used during linear solve`
			`cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()`
Reordering is now actually skipped when NONE is chosen. Also made reordering optional for openclSolver. 2021-01-11 09:51:05 -06:00			`cl::Buffer d_toOrder; // only used when reordering is used`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
Removed WellContributionsOCLContainer class (rebase) 2020-11-11 11:34:10 -06:00			`std::vector<cl::Device> devices;`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
add block-jacobi partitioner option. Add block-jacobi matrix for use in OpenCL preconditioner Rebased 2022-04-21 10:18:32 -05:00			`int jac_nnz;`
			`int jac_nnzb;`
reenable normal opencl Rebased 2022-04-21 10:18:54 -05:00			`bool blockJacVersion = false;`

Add Preconditioner, superclass of BILU0 and CPR 2021-11-30 06:20:09 -06:00			`std::unique_ptr<Preconditioner<block_size> > prec;`
Combine BILU0 and CPR preconditioner 2021-11-25 06:40:30 -06:00			`// can perform blocked ILU0 and AMG on pressure component`
Add CPR preconditioner for openclSolver and change raw pointers to vector for Matrix 2021-12-01 04:43:30 -06:00			`bool is_root; // allow for nested solvers, the root solver is called by BdaBridge`
Fixed kernel; fixed time step chopping; 2020-09-07 11:58:48 -05:00			`int toOrder = nullptr, fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings`
Removed WellContributionsOCLContainer class (rebase) 2020-11-11 11:34:10 -06:00			`bool analysis_done = false;`
Pass BlockedMatrix to BdaSolvers 2022-02-07 06:39:38 -06:00			`std::shared_ptr<BlockedMatrix> mat = nullptr; // original matrix`
			`std::shared_ptr<BlockedMatrix> jacMat = nullptr; // matrix for preconditioner`
Changed block_size template to variable for BlockedMatrix 2021-11-12 09:11:53 -06:00			`BlockedMatrix *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv`
Reordering is now actually skipped when NONE is chosen. Also made reordering optional for openclSolver. 2021-01-11 09:51:05 -06:00			`ILUReorder opencl_ilu_reorder; // reordering strategy`
Add more waiting for opencl functions, using shortened opencl kernel params in WellContributions 2021-03-03 07:04:06 -06:00			`std::vector<cl::Event> events;`
			`cl_int err;`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`/// Divide A by B, and round up: return (int)ceil(A/B)`
			`/// \param[in] A dividend`
			`/// \param[in] B divisor`
			`/// \return rounded division result`
			`unsigned int ceilDivision(const unsigned int A, const unsigned int B);`

			`/// Calculate dot product between in1 and in2, partial sums are stored in out, which are summed on CPU`
			`/// \param[in] in1 input vector 1`
			`/// \param[in] in2 input vector 2`
			`/// \param[out] out output vector containing partial sums`
			`/// \return dot product`
			`double dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out);`

			`/// Calculate the norm of in, partial sums are stored in out, which are summed on the CPU`
			`/// Equal to Dune::DenseVector::two_norm()`
			`/// \param[in] in input vector`
			`/// \param[out] out output vector containing partial sums`
			`/// \return norm`
			`double norm_w(cl::Buffer in, cl::Buffer out);`

			`/// Perform axpy: out += a * in`
			`/// \param[in] in input vector`
			`/// \param[in] a scalar value to multiply input vector`
			`/// \param[inout] out output vector`
			`void axpy_w(cl::Buffer in, const double a, cl::Buffer out);`

Create and use scale kernel for relaxation of ILU 2021-09-24 05:59:22 -05:00			`/// Perform scale: vec *= a`
			`/// \param[inout] vec vector to scale`
			`/// \param[in] a scalar value to multiply vector`
			`void scale_w(cl::Buffer vec, const double a);`

Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`/// Custom function that combines scale, axpy and add functions in bicgstab`
			`/// p = (p - omega * v) * beta + r`
			`/// \param[inout] p output vector`
			`/// \param[in] v input vector`
			`/// \param[in] r input vector`
			`/// \param[in] omega scalar value`
			`/// \param[in] beta scalar value`
			`void custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta);`

			`/// Sparse matrix-vector multiply, spmv`
			`/// b = A * x`
			`/// Matrix A, must be in BCRS format`
			`/// \param[in] vals nnzs of matrix A`
			`/// \param[in] cols columnindices of matrix A`
			`/// \param[in] rows rowpointers of matrix A`
			`/// \param[in] x input vector`
			`/// \param[out] b output vector`
			`void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);`

			`/// Solve linear system using ilu0-bicgstab`
			`/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A`
			`/// \param[inout] res summary of solver result`
Removed WellContributionsOCLContainer class (rebase) 2020-11-11 11:34:10 -06:00			`void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`/// Initialize GPU and allocate memory`
Pass BlockedMatrix to BdaSolvers 2022-02-07 06:39:38 -06:00			`/// \param[in] matrix matrix A`
			`/// \param[in] jacMatrix matrix for preconditioner`
			`void initialize(std::shared_ptr<BlockedMatrix> matrix);`
			`void initialize2(std::shared_ptr<BlockedMatrix> matrix, std::shared_ptr<BlockedMatrix> jacMatrix);`
add block-jacobi partitioner option. Add block-jacobi matrix for use in OpenCL preconditioner Rebased 2022-04-21 10:18:32 -05:00
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`/// Clean memory`
			`void finalize();`

			`/// Copy linear system to GPU`
Removed WellContributionsOCLContainer class (rebase) 2020-11-11 11:34:10 -06:00			`void copy_system_to_gpu();`
Fixed kernel; fixed time step chopping; 2020-09-07 11:58:48 -05:00
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`/// Reorder the linear system so it corresponds with the coloring`
			`/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values`
			`/// \param[in] b input vectors, contains N values`
Made reordering optional for WellContributions Removed unnecessary alloc 2021-01-13 04:54:40 -06:00			`/// \param[out] wellContribs WellContributions, to set reordering`
			`void update_system(double vals, double b, WellContributions &wellContribs);`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`/// Update linear system on GPU, don't copy rowpointers and colindices, they stay the same`
Removed WellContributionsOCLContainer class (rebase) 2020-11-11 11:34:10 -06:00			`void update_system_on_gpu();`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
Add Preconditioner, superclass of BILU0 and CPR 2021-11-30 06:20:09 -06:00			`/// Analyze sparsity pattern to extract parallelism`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`/// \return true iff analysis was successful`
Add Preconditioner, superclass of BILU0 and CPR 2021-11-30 06:20:09 -06:00			`bool analyze_matrix();`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`/// Perform ilu0-decomposition`
			`/// \return true iff decomposition was successful`
			`bool create_preconditioner();`

			`/// Solve linear system`
			`/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A`
			`/// \param[inout] res summary of solver result`
Removed WellContributionsOCLContainer class (rebase) 2020-11-11 11:34:10 -06:00			`void solve_system(WellContributions &wellContribs, BdaResult &res);`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`public:`
Removed WellContributionsOCLContainer class (rebase) 2020-11-11 11:34:10 -06:00			`std::shared_ptr<cl::Context> context;`
			`std::shared_ptr<cl::CommandQueue> queue;`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`/// Construct a openclSolver`
			`/// \param[in] linear_solver_verbosity verbosity of openclSolver`
			`/// \param[in] maxit maximum number of iterations for openclSolver`
			`/// \param[in] tolerance required relative tolerance for openclSolver`
Changed platformID and deviceID to commandline parameter. Make sure Flow exits cleanly upon invalid platform- or deviceID. 2020-07-01 12:43:22 -05:00			`/// \param[in] platformID the OpenCL platform to be used`
			`/// \param[in] deviceID the device to be used`
Reduced header inclusion and updated comments 2022-02-02 05:06:40 -06:00			`/// \param[in] opencl_ilu_reorder select either level_scheduling or graph_coloring, see Reorder.hpp for explanation`
Add CPR preconditioner for openclSolver and change raw pointers to vector for Matrix 2021-12-01 04:43:30 -06:00			`/// \param[in] linsolver indicating the preconditioner, equal to the --linsolver cmdline argument`
Reduced header inclusion and updated comments 2022-02-02 05:06:40 -06:00			`/// only ilu0, cpr_quasiimpes and isai are supported`
Add CPR preconditioner for openclSolver and change raw pointers to vector for Matrix 2021-12-01 04:43:30 -06:00			`openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID,`
			`ILUReorder opencl_ilu_reorder, std::string linsolver);`

			`/// For the CPR coarse solver`
			`openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`/// Destroy a openclSolver, and free memory`
			`~openclSolverBackend();`

			`/// Solve linear system, A*x = b, matrix A must be in blocked-CSR format`
Pass BlockedMatrix to BdaSolvers 2022-02-07 06:39:38 -06:00			`/// \param[in] matrix matrix A`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`/// \param[in] b input vector, contains N values`
			`/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A`
			`/// \param[inout] res summary of solver result`
			`/// \return status code`
Pass BlockedMatrix to BdaSolvers 2022-02-07 06:39:38 -06:00			`SolverStatus solve_system(std::shared_ptr<BlockedMatrix> matrix, double *b, WellContributions& wellContribs, BdaResult &res) override;`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
Pass BlockedMatrix to BdaSolvers 2022-02-07 06:39:38 -06:00			`SolverStatus solve_system2(std::shared_ptr<BlockedMatrix> matrix, double *b,`
			`std::shared_ptr<BlockedMatrix> jacMatrix,`
			`WellContributions& wellContribs, BdaResult &res) override;`
add block-jacobi partitioner option. Add block-jacobi matrix for use in OpenCL preconditioner Rebased 2022-04-21 10:18:32 -05:00
Add CPR preconditioner for openclSolver and change raw pointers to vector for Matrix 2021-12-01 04:43:30 -06:00			`/// Solve scalar linear system, for example a coarse system of an AMG preconditioner`
			`/// Data is already on the GPU`
			`// SolverStatus solve_system(BdaResult &res);`

Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`/// Get result after linear solve, and peform postprocessing if necessary`
			`/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array`
			`void get_result(double *x) override;`

Add CPR preconditioner for openclSolver and change raw pointers to vector for Matrix 2021-12-01 04:43:30 -06:00			`/// Set OpenCL objects`
			`/// This class either creates them based on platformID and deviceID or receives them through this function`
			`/// \param[in] context the opencl context to be used`
			`/// \param[in] queue the opencl queue to be used`
			`void setOpencl(std::shared_ptr<cl::Context>& context, std::shared_ptr<cl::CommandQueue>& queue);`
add block-jacobi partitioner option. Add block-jacobi matrix for use in OpenCL preconditioner Rebased 2022-04-21 10:18:32 -05:00
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00			`}; // end class openclSolverBackend`

Rename namespace bda to Opm::Accelerator 2021-10-25 04:08:06 -05:00			`} // namespace Accelerator`
			`} // namespace Opm`
Added openclSolver Usage: --gpu-mode=[cusparse\|opencl\|none] on command line 2020-06-22 11:26:49 -05:00
			`#endif`