Simplified filling of WellContributions object. Added comments

2025-02-25 18:55:30 -06:00 · 2020-03-19 11:08:11 +01:00
parent fdcf46792a
commit 8223cd1db8
5 changed files with 123 additions and 99 deletions
--- a/opm/simulators/linalg/bda/WellContributions.cu
+++ b/opm/simulators/linalg/bda/WellContributions.cu
@@ -130,86 +130,92 @@ namespace Opm
    }
-        void WellContributions::alloc(){
+    void WellContributions::alloc(){
-            cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
+        cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
-            cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_wells * dim_wells * dim_wells);
+        cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_wells * dim_wells * dim_wells);
-            cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
+        cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
-            cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
+        cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
-            cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
+        cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
-            val_pointers = new unsigned int[num_wells + 1];
+        val_pointers = new unsigned int[num_wells + 1];
-            cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_wells + 1));
+        cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_wells + 1));
-            cudaCheckLastError("apply_gpu malloc failed");
+        cudaCheckLastError("apply_gpu malloc failed");
-            allocated = true;
+        allocated = true;
    }
    WellContributions::~WellContributions()
    {
        cudaFree(d_Cnnzs);
        cudaFree(d_Dnnzs);
        cudaFree(d_Bnnzs);
        cudaFree(d_Ccols);
        cudaFree(d_Bcols);
        delete[] val_pointers;
        cudaFree(d_val_pointers);
    }
    // Apply the WellContributions, similar to StandardWell::apply()
    // y -= (C^T *(D^-1*(   B*x)))
    void WellContributions::apply(double *d_x, double *d_y)
    {
        int smem_size = 2 * sizeof(double) * dim_wells;
        apply_well_contributions<<<num_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
    }
    void WellContributions::addMatrix(int idx, int *colIndices, double *values, unsigned int val_size)
    {
        if (!allocated) {
            OPM_THROW(std::logic_error,"Error cannot add wellcontribution before allocating memory in WellContributions");
        }
-
+        switch (idx) {
-        WellContributions::~WellContributions()
+        case 0:
-        {
+            cudaMemcpy(d_Cnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
-            cudaFree(d_Cnnzs);
+            cudaMemcpy(d_Ccols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
-            cudaFree(d_Dnnzs);
+            break;
-            cudaFree(d_Bnnzs);
+        case 1:
-            cudaFree(d_Ccols);
+            cudaMemcpy(d_Dnnzs + num_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
-            cudaFree(d_Bcols);
+            break;
-            delete[] val_pointers;
+        case 2:
-            cudaFree(d_val_pointers);
+            cudaMemcpy(d_Bnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
-        }
+            cudaMemcpy(d_Bcols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
-
+            val_pointers[num_wells_so_far] = num_blocks_so_far;
-
+            if(num_wells_so_far == num_wells - 1){
-        // Apply the WellContributions, similar to StandardWell::apply()
+                val_pointers[num_wells] = num_blocks;
        // y -= (C^T *(D^-1*(   B*x)))
        void WellContributions::apply(double *d_x, double *d_y)
        {
            int smem_size = 2 * sizeof(double) * dim_wells;
            apply_well_contributions<<<num_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
        }
        void WellContributions::addMatrix(int idx, int *colIndices, double *values, unsigned int val_size)
        {
            switch (idx) {
            case 0:
                cudaMemcpy(d_Cnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
                cudaMemcpy(d_Ccols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
                break;
            case 1:
                cudaMemcpy(d_Dnnzs + num_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
                break;
            case 2:
                cudaMemcpy(d_Bnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
                cudaMemcpy(d_Bcols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
                val_pointers[num_wells_so_far] = num_blocks_so_far;
                if(num_wells_so_far == num_wells - 1){
                    val_pointers[num_wells] = num_blocks;
                }
                cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_wells+1), cudaMemcpyHostToDevice);
                break;
            default:
                OPM_THROW(std::logic_error,"Error unsupported matrix ID for WellContributions::addMatrix()");
            }
            cudaCheckLastError("WellContributions::addMatrix() failed");
            if(idx == 2){
                num_blocks_so_far += val_size;
            }
            if(idx == 2){
                num_wells_so_far++;
            }
            cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_wells+1), cudaMemcpyHostToDevice);
            break;
        default:
            OPM_THROW(std::logic_error,"Error unsupported matrix ID for WellContributions::addMatrix()");
        }
-
+        cudaCheckLastError("WellContributions::addMatrix() failed");
-        void WellContributions::setCudaStream(cudaStream_t stream_)
+        if(idx == 2){
-        {
+            num_blocks_so_far += val_size;
            this->stream = stream_;
        }
-
+        if(idx == 2){
-
+            num_wells_so_far++;
        void WellContributions::addSizes(unsigned int nnz, unsigned int numEq, unsigned int numWellEq)
        {
            if(allocated){
                OPM_THROW(std::logic_error,"Error cannot add more sizes after allocated in WellContributions");
            }
            num_blocks += nnz;
            dim = numEq;
            dim_wells = numWellEq;
            num_wells++;
        }
    }
    void WellContributions::setCudaStream(cudaStream_t stream_)
    {
        this->stream = stream_;
    }
    void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
    {
        dim = dim_;
        dim_wells = dim_wells_;
    }
    void WellContributions::addNumBlocks(unsigned int nnz)
    {
        if (allocated) {
            OPM_THROW(std::logic_error,"Error cannot add more sizes after allocated in WellContributions");
        }
        num_blocks += nnz;
        num_wells++;
    }
 } //namespace Opm
--- a/opm/simulators/linalg/bda/WellContributions.hpp
+++ b/opm/simulators/linalg/bda/WellContributions.hpp
@@ -35,17 +35,27 @@ namespace Opm
    /// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver
    /// If the --matrix-add-well-contributions commandline parameter is true, this class should not be used
    /// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))
    /// B and C are vectors, disguised as matrices and contain blocks of StandardWell::numEq by StandardWell::numStaticWellEq
    /// D is a block, disguised as matrix, the square block has size StandardWell::numStaticWellEq. D is actually stored as D^-1
    /// B*x and D*B*x are a vector with numStaticWellEq doubles
    /// C*D*B*x is a blocked matrix with a symmetric sparsity pattern, contains square blocks with size numEq. For every columnindex i, j in StandardWell::duneB_, there is a block on (i, j) in C*D*B*x.
    ///
    /// This class is used in 3 phases:
    /// - get total size of all wellcontributions that must be stored here
    /// - allocate memory
    /// - copy data of wellcontributions
    class WellContributions
    {
    private:
-        unsigned int num_blocks = 0;    // total number of blocks in all wells
+        unsigned int num_blocks = 0;             // total number of blocks in all wells
-        unsigned int dim;
+        unsigned int dim;                        // number of columns of blocks in B and C, equal to StandardWell::numEq
-        unsigned int dim_wells;
+        unsigned int dim_wells;                  // number of rows of blocks in B and C, equal to StandardWell::numStaticWellEq
-        unsigned int num_wells = 0;
+        unsigned int num_wells = 0;              // number of wellcontributions in this object
-        unsigned int num_blocks_so_far = 0;
+        unsigned int num_blocks_so_far = 0;      // keep track of where next data is written
-        unsigned int num_wells_so_far = 0;
+        unsigned int num_wells_so_far = 0;       // keep track of where next data is written
-        unsigned int *val_pointers = nullptr;     // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
+        unsigned int *val_pointers = nullptr;    // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
        bool allocated = false;
        double *d_Cnnzs = nullptr;
@@ -69,26 +79,35 @@ namespace Opm
        ~WellContributions();
        /// Apply all wellcontributions in this object
        /// performs y -= (C^T * (D^-1 * (B*x))) for StandardWell
        /// \param[in] x          vector x
        /// \param[inout] y       vector y
        void apply(double *x, double *y);
        /// Allocate memory for the wellcontributions
        void alloc();
-        /// Indicate how large the next wellcontributions are, this function cannot be called after alloc_all() is called
+        /// Indicate how large the blocks of the wellcontributions (C and B) are
-        void addSizes(unsigned int nnz, unsigned int numEq, unsigned int numWellEq);
+        /// \param[in] dim         number of columns
        /// \param[in] dim_wells   number of rows
        void setBlockSize(unsigned int dim, unsigned int dim_wells);
-        /// Store a matrix in this object, in blocked csr format
+        /// Indicate how large the next wellcontribution is, this function cannot be called after alloc() is called
        /// \param[in] numBlocks   number of blocks in C and B of next wellcontribution
        void addNumBlocks(unsigned int numBlocks);
        /// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
        /// \param[in] idx         indicate if C, D or B is sent
        /// \param[in] colIndices  columnindices of blocks in C or B, ignored for D
        /// \param[in] values      array of nonzeroes
        /// \param[in] val_size    number of blocks in C or B, ignored for D
        void addMatrix(int idx, int *colIndices, double *values, unsigned int val_size);
        /// Return the number of wells added to this object
        /// \return the number of wells added to this object
        unsigned int getNumWells(){
            return num_wells;
        }
        /// WellContributions can be applied on CPU or GPU
        /// This function sets the static variable, so each WellContributions is applied on the correct hardware
        static void setMode(bool use_gpu);
    };
 } //namespace Opm
--- a/opm/simulators/wells/BlackoilWellModel_impl.hpp
+++ b/opm/simulators/wells/BlackoilWellModel_impl.hpp
@@ -924,12 +924,13 @@ namespace Opm {
    BlackoilWellModel<TypeTag>::
    getWellContributions(WellContributions& wellContribs) const
    {
        wellContribs.setBlockSize(StandardWell<TypeTag>::numEq, StandardWell<TypeTag>::numStaticWellEq);
        for(unsigned int i = 0; i < well_container_.size(); i++){
            auto& well = well_container_[i];
            std::shared_ptr<StandardWell<TypeTag> > derived = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
-            unsigned int nnz, numEq, numWellEq;
+            unsigned int numBlocks;
-            derived->getWellSizes(nnz, numEq, numWellEq);
+            derived->getNumBlocks(numBlocks);
-            wellContribs.addSizes(nnz, numEq, numWellEq);
+            wellContribs.addNumBlocks(numBlocks);
        }
        wellContribs.alloc();
        for(unsigned int i = 0; i < well_container_.size(); i++){
--- a/opm/simulators/wells/StandardWell.hpp
+++ b/opm/simulators/wells/StandardWell.hpp
@@ -188,8 +188,8 @@ namespace Opm
        /// add the contribution (C, D^-1, B matrices) of this Well to the WellContributions object
        void addWellContribution(WellContributions& wellContribs) const;
-        /// get the sizes of the C, D^-1 and B matrices, used to allocate memory in a WellContributions object
+        /// get the number of blocks of the C and B matrices, used to allocate memory in a WellContributions object
-        void getWellSizes(unsigned int& _nnzs, unsigned int& _numEq, unsigned int& _numWellEq) const;
+        void getNumBlocks(unsigned int& _nnzs) const;
 #endif
        /// using the solution x to recover the solution xw for wells and applying
--- a/opm/simulators/wells/StandardWell_impl.hpp
+++ b/opm/simulators/wells/StandardWell_impl.hpp
@@ -2789,11 +2789,9 @@ namespace Opm
    template<typename TypeTag>
    void
    StandardWell<TypeTag>::
-    getWellSizes(unsigned int& _nnzs, unsigned int& _numEq, unsigned int& _numWellEq) const
+    getNumBlocks(unsigned int& numBlocks) const
    {
-        _nnzs = duneB_.nonzeroes();
+        numBlocks = duneB_.nonzeroes();
        _numEq = numEq;
        _numWellEq = numStaticWellEq;
    }
 #endif