Fixed errors

2025-02-25 18:55:30 -06:00 · 2020-09-24 18:07:10 -03:00 · 2020-09-24 18:07:10 -03:00 · ab57d5ecd2
commit ab57d5ecd2
parent 1ba81cece2
6 changed files with 30 additions and 29 deletions
--- a/opm/simulators/linalg/bda/WellContributions.hpp
+++ b/opm/simulators/linalg/bda/WellContributions.hpp
@ -63,22 +63,23 @@ public:
        B
    };

+    unsigned int dim;                        // number of columns in blocks in B and C, equal to StandardWell::numEq
+    unsigned int dim_wells;                  // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
+
 #if HAVE_OPENCL
    std::vector<double> h_Cnnzs_ocl, h_Dnnzs_ocl, h_Bnnzs_ocl;
    std::vector<int> h_Ccols_ocl, h_Bcols_ocl;
    std::vector<unsigned int> h_val_pointers_ocl;
    std::vector<double> h_x_ocl, h_y_ocl;
-#endif
-
-private:
-    unsigned int dim;                        // number of columns in blocks in B and C, equal to StandardWell::numEq
-    unsigned int dim_wells;                  // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
-    unsigned int num_ms_wells = 0;           // number of MultisegmentWells in this object, must equal multisegments.size()
-    unsigned int N;                          // number of rows (not blockrows) in vectors x and y
-    std::vector<MultisegmentWellContribution*> multisegments;

    int *toOrder = nullptr;
    bool reorder = false;
+#endif
+
+private:
+    unsigned int num_ms_wells = 0;           // number of MultisegmentWells in this object, must equal multisegments.size()
+    unsigned int N;                          // number of rows (not blockrows) in vectors x and y
+    std::vector<MultisegmentWellContribution*> multisegments;

    bool opencl_gpu = false;
    bool cuda_gpu = false;
--- a/opm/simulators/linalg/bda/WellContributionsOCLContainer.cpp
+++ b/opm/simulators/linalg/bda/WellContributionsOCLContainer.cpp
@ -24,19 +24,18 @@
 #include <dune/common/timer.hh>

 #include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
-
+#include<iostream>

 namespace bda
 {
    using Opm::OpmLog;
    using Dune::Timer;

-    void WellContributionsOCLContainer::initBuffers(WellContributions &wellContribs)
-    {
+    void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int Nb_){
+        Nb = Nb_;
        dim = wellContribs.dim;
        dim_wells = wellContribs.dim_wells;
        num_std_wells = wellContribs.h_val_pointers_ocl.size() - 1;
-        toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + wellContribs.h_Ccols_ocl.size());

        s.Cnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Cnnzs_ocl.size());
        s.Dnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Dnnzs_ocl.size());
@ -44,10 +43,12 @@ namespace bda
        s.Ccols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * wellContribs.h_Ccols_ocl.size());
        s.Bcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * wellContribs.h_Bcols_ocl.size());
        s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
-        s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * toOrder.size());
+        s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
    }

-    void WellcontributionsOCLContainer::copy_to_gpu(WellContributions &wellContribs){
+    void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs){
+        toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + Nb);
+
        cl::Event event;
        queue->enqueueWriteBuffer(s.Cnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data());
        queue->enqueueWriteBuffer(s.Dnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data());
@ -59,7 +60,7 @@ namespace bda
        event.wait();
    }

-    void WellcontributionsOCLContainer::update_on_gpu(WellContributions &wellContribs){
+    void WellContributionsOCLContainer::update_on_gpu(Opm::WellContributions &wellContribs){
        cl::Event event;
        queue->enqueueWriteBuffer(s.Cnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data());
        queue->enqueueWriteBuffer(s.Dnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data());
@ -90,7 +91,7 @@ namespace bda

        cl::Event event;
        event = (*stdwell_apply)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
-                                 s.Cnnzs, s.Dnnzs, s.Bnnzs, s.Ccols, s.Bcols, s.toOrder,x, y, dim, dim_wells, s.val_pointers,
+                                 s.Cnnzs, s.Dnnzs, s.Bnnzs, s.Ccols, s.Bcols, x, y, s.toOrder, dim, dim_wells, s.val_pointers,
                                 cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
    }

--- a/opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp
+++ b/opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp
@ -31,6 +31,7 @@ namespace bda
        unsigned int dim, dim_wells;
        unsigned int num_std_wells = 0;
        unsigned int num_ms_wells = 0;           // number of MultisegmentWells in this object, must equal multisegments.size()
+        int Nb;
        std::vector<int> toOrder;

        typedef struct {
@ -50,16 +51,15 @@ namespace bda
        void applyStdWells(cl::Buffer& x, cl::Buffer& y);

    public:
-        WellContributionsOCLContainer();
-        ~WellContributionsOCLContainer();
+        WellContributionsOCLContainer() {};
+        ~WellContributionsOCLContainer() {};

        void apply(cl::Buffer& x, cl::Buffer& y);
-        void initBuffers(WellContributions &wellContribs);
-        void copy_to_gpu(WellContributions &wellContribs);
-        void update_on_gpu(WellContributions &wellContribs);
+        void init(Opm::WellContributions &wellContribs, int Nb);
+        void copy_to_gpu(Opm::WellContributions &wellContribs);
+        void update_on_gpu(Opm::WellContributions &wellContribs);
        void setOpenCLContext(cl::Context *context);
        void setOpenCLQueue(cl::CommandQueue *queue);
-        void setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group);
        void setKernel(cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
                                       cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
                                       const unsigned int, const unsigned int, cl::Buffer&,
--- a/opm/simulators/linalg/bda/openclKernels.hpp
+++ b/opm/simulators/linalg/bda/openclKernels.hpp
@ -436,7 +436,7 @@ namespace bda
            for (unsigned int j = 0; j < dim_wells; ++j){
                temp += Cnnzs[bb*dim*dim_wells + j*dim + c]*z2[j];
            }
-            colIdx = toOrder[Ccols[bb]];
+            int colIdx = toOrder[Ccols[bb]];
            y[colIdx*dim + c] -= temp;
        }
    }
--- a/opm/simulators/linalg/bda/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp
@ -236,12 +236,12 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(BdaResult& res) {

        // v = A * pw
        t_spmv.start();
-        wcontainer->apply(d_pw, d_v);
+        spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v);
        t_spmv.stop();

        // apply wellContributions
        t_well.start();
-        stdwell_w(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_pw, d_v, d_val_pointers);
+        wcontainer->apply(d_pw, d_v);
        t_well.stop();

        t_rest.start();
@ -496,7 +496,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
        d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
        d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));

-        wcontainer->initBuffers(wellContribs);
+        wcontainer->init(wellContribs, Nb);

        // queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
        // cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
@ -566,6 +566,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu(WellContributions &well
    queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
    event.wait();

+    wellContribs.setReordering(toOrder, true);
    wcontainer->copy_to_gpu(wellContribs);

    if (verbosity > 2) {
--- a/opm/simulators/linalg/bda/openclSolverBackend.hpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.hpp
@ -129,8 +129,6 @@ private:
    /// \param[out] b       output vector
    void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);

-    void stdwell_w(cl::Buffer Cnnzs, cl::Buffer Dnnzs, cl::Buffer Bnnzs, cl::Buffer Ccols, cl::Buffer Bcols, cl::Buffer x, cl::Buffer y, cl::Buffer val_pointers);
-
    /// Solve linear system using ilu0-bicgstab
    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
    /// \param[inout] res         summary of solver result
@ -170,7 +168,7 @@ private:
    /// Solve linear system
    /// \param[in] wellContribs   WellContributions, to apply them separately, instead of adding them to matrix A
    /// \param[inout] res         summary of solver result
-    void solve_system(WellContributions &wellContribs, BdaResult &res);
+    void solve_system(BdaResult &res);

 public: