First commit

2025-02-25 18:55:30 -06:00 · 2020-09-24 16:34:46 -03:00
parent c43648076f
commit 1ba81cece2
10 changed files with 317 additions and 258 deletions
--- a/opm/simulators/linalg/bda/openclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.hpp
@@ -21,11 +21,10 @@
 #define OPM_OPENCLSOLVER_BACKEND_HEADER_INCLUDED

 #include <opm/simulators/linalg/bda/opencl.hpp>
-
 #include <opm/simulators/linalg/bda/BdaResult.hpp>
 #include <opm/simulators/linalg/bda/BdaSolver.hpp>
 #include <opm/simulators/linalg/bda/WellContributions.hpp>
-
+#include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
 #include <opm/simulators/linalg/bda/BILU0.hpp>

 namespace bda
@@ -35,9 +34,9 @@ namespace bda
 template <unsigned int block_size>
 class openclSolverBackend : public BdaSolver<block_size>
 {
-
    typedef BdaSolver<block_size> Base;
    typedef BILU0<block_size> Preconditioner;
+    typedef WellContributionsOCLContainer WContainer;

    using Base::N;
    using Base::Nb;
@@ -51,7 +50,6 @@ class openclSolverBackend : public BdaSolver<block_size>
    using Base::initialized;

 private:
-
    double *rb = nullptr;                 // reordered b vector, the matrix is reordered, so b must also be
    double *vals_contiguous = nullptr;    // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp

@@ -64,13 +62,6 @@ private:
    cl::Buffer d_tmp;                            // used as tmp GPU buffer for dot() and norm()
    double *tmp = nullptr;                       // used as tmp CPU buffer for dot() and norm()

-    unsigned int num_blocks, dim_weqs, dim_wells, num_std_wells;
-    unsigned int *h_val_pointers;
-    int *h_Ccols, *h_Bcols;
-    double *h_Cnnzs, *h_Dnnzs, *h_Bnnzs;
-    cl::Buffer d_Cnnzs, d_Dnnzs, d_Bnnzs;
-    cl::Buffer d_Ccols, d_Bcols, d_val_pointers;
-
    // shared pointers are also passed to other objects
    cl::Program program;
    std::shared_ptr<cl::Context> context;
@@ -82,9 +73,13 @@ private:
    std::unique_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > spmv_blocked_k;
    std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> > ILU_apply1_k;
    std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> > ILU_apply2_k;
-    std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> > add_well_contributions_k;
+    std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
+                                    cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
+                                    const unsigned int, const unsigned int, cl::Buffer&,
+                                    cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> > stdwell_apply_k;

    Preconditioner *prec = nullptr;                               // only supported preconditioner is BILU0
+    WContainer *wcontainer = nullptr;
    int *toOrder = nullptr, *fromOrder = nullptr;                 // BILU0 reorders rows of the matrix via these mappings
    std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr;    // original matrix 
    BlockedMatrix<block_size> *rmat = nullptr;                    // reordered matrix, used for spmv
@@ -148,17 +143,13 @@ private:
    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
    /// \param[in] rows           array of rowPointers, contains N/dim+1 values
    /// \param[in] cols           array of columnIndices, contains nnz values
-    void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
-
-    void initialize_wellContribs(WellContributions& wellContribs);
+    void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols, WellContributions &wellContribs);

    /// Clean memory
    void finalize();

    /// Copy linear system to GPU
-    void copy_system_to_gpu();
-
-    void copy_wells_to_gpu(WellContributions& wellContribs);
+    void copy_system_to_gpu(WellContributions &wellContribs);

    /// Reorder the linear system so it corresponds with the coloring
    /// \param[in] vals           array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
@@ -166,7 +157,7 @@ private:
    void update_system(double *vals, double *b);

    /// Update linear system on GPU, don't copy rowpointers and colindices, they stay the same
-    void update_system_on_gpu();
+    void update_system_on_gpu(WellContributions &wellContribs);

    /// Analyse sparsity pattern to extract parallelism
    /// \return true iff analysis was successful
@@ -179,7 +170,7 @@ private:
    /// Solve linear system
    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
    /// \param[inout] res         summary of solver result
-    void solve_system(BdaResult &res);
+    void solve_system(WellContributions &wellContribs, BdaResult &res);

 public: