Simplified filling of WellContributions object. Added comments

2025-02-25 18:55:30 -06:00 · 2020-03-19 11:08:11 +01:00
parent fdcf46792a
commit 8223cd1db8
5 changed files with 123 additions and 99 deletions
--- a/opm/simulators/linalg/bda/WellContributions.cu
+++ b/opm/simulators/linalg/bda/WellContributions.cu
@@ -130,86 +130,92 @@ namespace Opm
    }


-        void WellContributions::alloc(){
-            cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
-            cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_wells * dim_wells * dim_wells);
-            cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
-            cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
-            cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
-            val_pointers = new unsigned int[num_wells + 1];
-            cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_wells + 1));
-            cudaCheckLastError("apply_gpu malloc failed");
-            allocated = true;
+    void WellContributions::alloc(){
+        cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
+        cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_wells * dim_wells * dim_wells);
+        cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
+        cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
+        cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
+        val_pointers = new unsigned int[num_wells + 1];
+        cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_wells + 1));
+        cudaCheckLastError("apply_gpu malloc failed");
+        allocated = true;
+    }
+
+    WellContributions::~WellContributions()
+    {
+        cudaFree(d_Cnnzs);
+        cudaFree(d_Dnnzs);
+        cudaFree(d_Bnnzs);
+        cudaFree(d_Ccols);
+        cudaFree(d_Bcols);
+        delete[] val_pointers;
+        cudaFree(d_val_pointers);
+    }
+
+
+    // Apply the WellContributions, similar to StandardWell::apply()
+    // y -= (C^T *(D^-1*(   B*x)))
+    void WellContributions::apply(double *d_x, double *d_y)
+    {
+        int smem_size = 2 * sizeof(double) * dim_wells;
+        apply_well_contributions<<<num_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
+    }
+
+
+    void WellContributions::addMatrix(int idx, int *colIndices, double *values, unsigned int val_size)
+    {
+        if (!allocated) {
+            OPM_THROW(std::logic_error,"Error cannot add wellcontribution before allocating memory in WellContributions");
        }
-
-        WellContributions::~WellContributions()
-        {
-            cudaFree(d_Cnnzs);
-            cudaFree(d_Dnnzs);
-            cudaFree(d_Bnnzs);
-            cudaFree(d_Ccols);
-            cudaFree(d_Bcols);
-            delete[] val_pointers;
-            cudaFree(d_val_pointers);
-        }
-
-
-        // Apply the WellContributions, similar to StandardWell::apply()
-        // y -= (C^T *(D^-1*(   B*x)))
-        void WellContributions::apply(double *d_x, double *d_y)
-        {
-            int smem_size = 2 * sizeof(double) * dim_wells;
-            apply_well_contributions<<<num_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
-        }
-
-
-        void WellContributions::addMatrix(int idx, int *colIndices, double *values, unsigned int val_size)
-        {
-            switch (idx) {
-            case 0:
-                cudaMemcpy(d_Cnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
-                cudaMemcpy(d_Ccols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
-                break;
-            case 1:
-                cudaMemcpy(d_Dnnzs + num_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
-                break;
-            case 2:
-                cudaMemcpy(d_Bnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
-                cudaMemcpy(d_Bcols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
-                val_pointers[num_wells_so_far] = num_blocks_so_far;
-                if(num_wells_so_far == num_wells - 1){
-                    val_pointers[num_wells] = num_blocks;
-                }
-                cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_wells+1), cudaMemcpyHostToDevice);
-                break;
-            default:
-                OPM_THROW(std::logic_error,"Error unsupported matrix ID for WellContributions::addMatrix()");
-            }
-            cudaCheckLastError("WellContributions::addMatrix() failed");
-            if(idx == 2){
-                num_blocks_so_far += val_size;
-            }
-            if(idx == 2){
-                num_wells_so_far++;
+        switch (idx) {
+        case 0:
+            cudaMemcpy(d_Cnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
+            cudaMemcpy(d_Ccols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
+            break;
+        case 1:
+            cudaMemcpy(d_Dnnzs + num_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
+            break;
+        case 2:
+            cudaMemcpy(d_Bnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
+            cudaMemcpy(d_Bcols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
+            val_pointers[num_wells_so_far] = num_blocks_so_far;
+            if(num_wells_so_far == num_wells - 1){
+                val_pointers[num_wells] = num_blocks;
            }
+            cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_wells+1), cudaMemcpyHostToDevice);
+            break;
+        default:
+            OPM_THROW(std::logic_error,"Error unsupported matrix ID for WellContributions::addMatrix()");
        }
-
-        void WellContributions::setCudaStream(cudaStream_t stream_)
-        {
-            this->stream = stream_;
+        cudaCheckLastError("WellContributions::addMatrix() failed");
+        if(idx == 2){
+            num_blocks_so_far += val_size;
        }
-
-
-        void WellContributions::addSizes(unsigned int nnz, unsigned int numEq, unsigned int numWellEq)
-        {
-            if(allocated){
-                OPM_THROW(std::logic_error,"Error cannot add more sizes after allocated in WellContributions");
-            }
-            num_blocks += nnz;
-            dim = numEq;
-            dim_wells = numWellEq;
-            num_wells++;
+        if(idx == 2){
+            num_wells_so_far++;
        }
+    }
+
+    void WellContributions::setCudaStream(cudaStream_t stream_)
+    {
+        this->stream = stream_;
+    }
+
+    void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
+    {
+        dim = dim_;
+        dim_wells = dim_wells_;
+    }
+
+    void WellContributions::addNumBlocks(unsigned int nnz)
+    {
+        if (allocated) {
+            OPM_THROW(std::logic_error,"Error cannot add more sizes after allocated in WellContributions");
+        }
+        num_blocks += nnz;
+        num_wells++;
+    }

 } //namespace Opm

--- a/opm/simulators/linalg/bda/WellContributions.hpp
+++ b/opm/simulators/linalg/bda/WellContributions.hpp
@@ -35,17 +35,27 @@ namespace Opm

    /// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver
    /// If the --matrix-add-well-contributions commandline parameter is true, this class should not be used
+    /// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))
+    /// B and C are vectors, disguised as matrices and contain blocks of StandardWell::numEq by StandardWell::numStaticWellEq
+    /// D is a block, disguised as matrix, the square block has size StandardWell::numStaticWellEq. D is actually stored as D^-1
+    /// B*x and D*B*x are a vector with numStaticWellEq doubles
+    /// C*D*B*x is a blocked matrix with a symmetric sparsity pattern, contains square blocks with size numEq. For every columnindex i, j in StandardWell::duneB_, there is a block on (i, j) in C*D*B*x.
+    ///
+    /// This class is used in 3 phases:
+    /// - get total size of all wellcontributions that must be stored here
+    /// - allocate memory
+    /// - copy data of wellcontributions
    class WellContributions
    {

    private:
-        unsigned int num_blocks = 0;    // total number of blocks in all wells
-        unsigned int dim;
-        unsigned int dim_wells;
-        unsigned int num_wells = 0;
-        unsigned int num_blocks_so_far = 0;
-        unsigned int num_wells_so_far = 0;
-        unsigned int *val_pointers = nullptr;     // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
+        unsigned int num_blocks = 0;             // total number of blocks in all wells
+        unsigned int dim;                        // number of columns of blocks in B and C, equal to StandardWell::numEq
+        unsigned int dim_wells;                  // number of rows of blocks in B and C, equal to StandardWell::numStaticWellEq
+        unsigned int num_wells = 0;              // number of wellcontributions in this object
+        unsigned int num_blocks_so_far = 0;      // keep track of where next data is written
+        unsigned int num_wells_so_far = 0;       // keep track of where next data is written
+        unsigned int *val_pointers = nullptr;    // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
        bool allocated = false;

        double *d_Cnnzs = nullptr;
@@ -69,26 +79,35 @@ namespace Opm
        ~WellContributions();

        /// Apply all wellcontributions in this object
+        /// performs y -= (C^T * (D^-1 * (B*x))) for StandardWell
+        /// \param[in] x          vector x
+        /// \param[inout] y       vector y
        void apply(double *x, double *y);

        /// Allocate memory for the wellcontributions
        void alloc();

-        /// Indicate how large the next wellcontributions are, this function cannot be called after alloc_all() is called
-        void addSizes(unsigned int nnz, unsigned int numEq, unsigned int numWellEq);
+        /// Indicate how large the blocks of the wellcontributions (C and B) are
+        /// \param[in] dim         number of columns
+        /// \param[in] dim_wells   number of rows
+        void setBlockSize(unsigned int dim, unsigned int dim_wells);

-        /// Store a matrix in this object, in blocked csr format
+        /// Indicate how large the next wellcontribution is, this function cannot be called after alloc() is called
+        /// \param[in] numBlocks   number of blocks in C and B of next wellcontribution
+        void addNumBlocks(unsigned int numBlocks);
+
+        /// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
+        /// \param[in] idx         indicate if C, D or B is sent
+        /// \param[in] colIndices  columnindices of blocks in C or B, ignored for D
+        /// \param[in] values      array of nonzeroes
+        /// \param[in] val_size    number of blocks in C or B, ignored for D
        void addMatrix(int idx, int *colIndices, double *values, unsigned int val_size);

        /// Return the number of wells added to this object
+        /// \return the number of wells added to this object
        unsigned int getNumWells(){
            return num_wells;
        }
-
-        /// WellContributions can be applied on CPU or GPU
-        /// This function sets the static variable, so each WellContributions is applied on the correct hardware
-        static void setMode(bool use_gpu);
-
    };

 } //namespace Opm
--- a/opm/simulators/wells/BlackoilWellModel_impl.hpp
+++ b/opm/simulators/wells/BlackoilWellModel_impl.hpp
@@ -924,12 +924,13 @@ namespace Opm {
    BlackoilWellModel<TypeTag>::
    getWellContributions(WellContributions& wellContribs) const
    {
+        wellContribs.setBlockSize(StandardWell<TypeTag>::numEq, StandardWell<TypeTag>::numStaticWellEq);
        for(unsigned int i = 0; i < well_container_.size(); i++){
            auto& well = well_container_[i];
            std::shared_ptr<StandardWell<TypeTag> > derived = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
-            unsigned int nnz, numEq, numWellEq;
-            derived->getWellSizes(nnz, numEq, numWellEq);
-            wellContribs.addSizes(nnz, numEq, numWellEq);
+            unsigned int numBlocks;
+            derived->getNumBlocks(numBlocks);
+            wellContribs.addNumBlocks(numBlocks);
        }
        wellContribs.alloc();
        for(unsigned int i = 0; i < well_container_.size(); i++){
--- a/opm/simulators/wells/StandardWell.hpp
+++ b/opm/simulators/wells/StandardWell.hpp
@@ -188,8 +188,8 @@ namespace Opm
        /// add the contribution (C, D^-1, B matrices) of this Well to the WellContributions object
        void addWellContribution(WellContributions& wellContribs) const;

-        /// get the sizes of the C, D^-1 and B matrices, used to allocate memory in a WellContributions object
-        void getWellSizes(unsigned int& _nnzs, unsigned int& _numEq, unsigned int& _numWellEq) const;
+        /// get the number of blocks of the C and B matrices, used to allocate memory in a WellContributions object
+        void getNumBlocks(unsigned int& _nnzs) const;
 #endif

        /// using the solution x to recover the solution xw for wells and applying
--- a/opm/simulators/wells/StandardWell_impl.hpp
+++ b/opm/simulators/wells/StandardWell_impl.hpp
@@ -2789,11 +2789,9 @@ namespace Opm
    template<typename TypeTag>
    void
    StandardWell<TypeTag>::
-    getWellSizes(unsigned int& _nnzs, unsigned int& _numEq, unsigned int& _numWellEq) const
+    getNumBlocks(unsigned int& numBlocks) const
    {
-        _nnzs = duneB_.nonzeroes();
-        _numEq = numEq;
-        _numWellEq = numStaticWellEq;
+        numBlocks = duneB_.nonzeroes();
    }
 #endif