From 04ee2be3488ace54ab0c33cd4d26a1ce48b03d21 Mon Sep 17 00:00:00 2001 From: "T.D. (Tongdong) Qiu" Date: Fri, 15 May 2020 16:00:09 +0200 Subject: [PATCH] cusparseSolver can now handle MultisegmentWells, they are actually applied on CPU via SuiteSparse/UMFPACK --- CMakeLists_files.cmake | 2 + .../bda/MultisegmentWellContribution.cpp | 138 ++++++++++++++++++ .../bda/MultisegmentWellContribution.hpp | 114 +++++++++++++++ .../linalg/bda/WellContributions.cu | 86 +++++++---- .../linalg/bda/WellContributions.hpp | 55 +++++-- opm/simulators/linalg/bda/cuda_header.hpp | 1 + .../linalg/bda/cusparseSolverBackend.hpp | 6 +- .../wells/BlackoilWellModel_impl.hpp | 26 +++- opm/simulators/wells/MultisegmentWell.hpp | 5 + .../wells/MultisegmentWell_impl.hpp | 60 ++++++++ 10 files changed, 445 insertions(+), 48 deletions(-) create mode 100644 opm/simulators/linalg/bda/MultisegmentWellContribution.cpp create mode 100644 opm/simulators/linalg/bda/MultisegmentWellContribution.hpp diff --git a/CMakeLists_files.cmake b/CMakeLists_files.cmake index fc3fb8f2a..95993509a 100644 --- a/CMakeLists_files.cmake +++ b/CMakeLists_files.cmake @@ -44,6 +44,7 @@ list (APPEND MAIN_SOURCE_FILES if(CUDA_FOUND) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu) + list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp) list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp) endif() if(MPI_FOUND) @@ -144,6 +145,7 @@ list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/bda/BdaResult.hpp opm/simulators/linalg/bda/cuda_header.hpp opm/simulators/linalg/bda/cusparseSolverBackend.hpp + opm/simulators/linalg/bda/MultisegmentWellContribution.hpp opm/simulators/linalg/bda/WellContributions.hpp opm/simulators/linalg/BlackoilAmg.hpp opm/simulators/linalg/amgcpr.hh diff --git a/opm/simulators/linalg/bda/MultisegmentWellContribution.cpp b/opm/simulators/linalg/bda/MultisegmentWellContribution.cpp new file mode 100644 index 000000000..deacb033e --- /dev/null +++ b/opm/simulators/linalg/bda/MultisegmentWellContribution.cpp @@ -0,0 +1,138 @@ +/* + Copyright 2020 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + + +#include +#include +#include // CMake +#include + +#include +#include + +#if HAVE_UMFPACK +#include +#include +#endif // HAVE_UMFPACK + +#include + +namespace Opm +{ + + MultisegmentWellContribution::MultisegmentWellContribution(unsigned int dim_, unsigned int dim_wells_, + unsigned int Nb_, unsigned int Mb_, + unsigned int BnumBlocks_, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers, + unsigned int DnumBlocks_, double *Dvalues, int *DcolPointers, int *DrowIndices, + double *Cvalues) + : dim(dim_), dim_wells(dim_wells_), N(Nb_*dim), Nb(Nb_), M(Mb_*dim_wells), Mb(Mb_), DnumBlocks(DnumBlocks_), BnumBlocks(BnumBlocks_) + { + Cvals.insert(Cvals.end(), Cvalues, Cvalues + BnumBlocks*dim*dim_wells); + Dvals.insert(Dvals.end(), Dvalues, Dvalues + DnumBlocks*dim_wells*dim_wells); + Bvals.insert(Bvals.end(), Bvalues, Bvalues + BnumBlocks*dim*dim_wells); + Ccols.insert(Ccols.end(), BcolIndices, BcolIndices + BnumBlocks); + Bcols.insert(Bcols.end(), BcolIndices, BcolIndices + BnumBlocks); + Crows.insert(Crows.end(), BrowPointers, BrowPointers + M + 1); + Brows.insert(Brows.end(), BrowPointers, BrowPointers + M + 1); + + Dcols.insert(Dcols.end(), DcolPointers, DcolPointers + M + 1); + Drows.insert(Drows.end(), DrowIndices, DrowIndices + DnumBlocks*dim_wells*dim_wells); + + z1.resize(Mb * dim_wells); + z2.resize(Mb * dim_wells); + + // allocate pinned memory on host + cudaMallocHost(&h_x, sizeof(double) * N); + cudaMallocHost(&h_y, sizeof(double) * N); + + umfpack_di_symbolic(M, M, Dcols.data(), Drows.data(), Dvals.data(), &UMFPACK_Symbolic, nullptr, nullptr); + umfpack_di_numeric(Dcols.data(), Drows.data(), Dvals.data(), UMFPACK_Symbolic, &UMFPACK_Numeric, nullptr, nullptr); + } + + MultisegmentWellContribution::~MultisegmentWellContribution() + { + cudaFreeHost(h_x); + cudaFreeHost(h_y); + + umfpack_di_free_symbolic(&UMFPACK_Symbolic); + umfpack_di_free_numeric(&UMFPACK_Numeric); + } + + + // Apply the MultisegmentWellContribution, similar to MultisegmentWell::apply() + // y -= (C^T * (D^-1 * (B * x))) + void MultisegmentWellContribution::apply(double *d_x, double *d_y) + { + // copy vectors x and y from GPU to CPU + cudaMemcpyAsync(h_x, d_x, sizeof(double) * N, cudaMemcpyDeviceToHost, stream); + cudaMemcpyAsync(h_y, d_y, sizeof(double) * N, cudaMemcpyDeviceToHost, stream); + cudaStreamSynchronize(stream); + + // reset z1 and z2 + std::fill(z1.begin(), z1.end(), 0.0); + std::fill(z2.begin(), z2.end(), 0.0); + + // z1 = B * x + for (unsigned int row = 0; row < Mb; ++row) { + // for every block in the row + for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) { + unsigned int colIdx = Bcols[blockID]; + for (unsigned int j = 0; j < dim_wells; ++j) { + double temp = 0.0; + for (unsigned int k = 0; k < dim; ++k) { + temp += Bvals[blockID * dim * dim_wells + j * dim + k] * h_x[colIdx * dim + k]; + } + z1[row * dim_wells + j] += temp; + } + } + } + + // z2 = D^-1 * (B * x) + // umfpack + umfpack_di_solve(UMFPACK_A, Dcols.data(), Drows.data(), Dvals.data(), z2.data(), z1.data(), UMFPACK_Numeric, nullptr, nullptr); + + // y -= (C^T * z2) + // y -= (C^T * (D^-1 * (B * x))) + for (unsigned int row = 0; row < Mb; ++row) { + // for every block in the row + for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) { + unsigned int colIdx = Bcols[blockID]; + for (unsigned int j = 0; j < dim; ++j) { + double temp = 0.0; + for (unsigned int k = 0; k < dim_wells; ++k) { + temp += Cvals[blockID * dim * dim_wells + j + k * dim] * z2[row * dim_wells + k]; + } + h_y[colIdx * dim + j] -= temp; + } + } + } + + // copy vector y from CPU to GPU + cudaMemcpyAsync(d_y, h_y, sizeof(double) * N, cudaMemcpyHostToDevice, stream); + cudaStreamSynchronize(stream); + } + + void MultisegmentWellContribution::setCudaStream(cudaStream_t stream_) + { + stream = stream_; + } + + +} //namespace Opm + diff --git a/opm/simulators/linalg/bda/MultisegmentWellContribution.hpp b/opm/simulators/linalg/bda/MultisegmentWellContribution.hpp new file mode 100644 index 000000000..0a040d6aa --- /dev/null +++ b/opm/simulators/linalg/bda/MultisegmentWellContribution.hpp @@ -0,0 +1,114 @@ +/* + Copyright 2020 Equinor ASA + + This file is part of the Open Porous Media project (OPM). + + OPM is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + OPM is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with OPM. If not, see . +*/ + +#ifndef MULTISEGMENTWELLCONTRIBUTION_HEADER_INCLUDED +#define MULTISEGMENTWELLCONTRIBUTION_HEADER_INCLUDED + +#include + +#include + +#include + +namespace Opm +{ + + /// This class serves to duplicate the functionality of the MultisegmentWell + /// A MultisegmentWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x))) + /// B and C are matrices, with M rows and N columns, where N is the size of the matrix. They contain blocks of MultisegmentWell::numEq by MultisegmentWell::numWellEq. + /// D is a MxM matrix, the square blocks have size MultisegmentWell::numWellEq. + /// B*x and D*B*x are a vector with M*numWellEq doubles + /// C*D*B*x is a vector with N*numEq doubles. + /// + /// This class is used in 3 phases: + /// - get total size of all wellcontributions that must be stored here + /// - allocate memory + /// - copy data of wellcontributions + class MultisegmentWellContribution + { + + private: + unsigned int dim; // number of columns of blocks in B and C, equal to MultisegmentWell::numEq + unsigned int dim_wells; // number of rows of blocks in B and C, equal to MultisegmentWell::numWellEq + unsigned int N; // number of rows in vectors x and y, N == dim*Nb + unsigned int Nb; // number of blockrows in x and y + unsigned int M; // number of rows, M == dim_wells*Mb + unsigned int Mb; // number of blockrows in C, D and B + + cudaStream_t stream; + double *h_x = nullptr, *h_y = nullptr; // CUDA pinned memory for GPU memcpy + + // C and B are stored in BCRS format, D is stored in CSC format (Dune::UMFPack). + unsigned int DnumBlocks; + unsigned int BnumBlocks; + std::vector Cvals; + std::vector Dvals; + std::vector Bvals; + std::vector Ccols; + std::vector Dcols; // Columnpointers, contains M+1 entries + std::vector Bcols; + std::vector Crows; + std::vector Drows; // Rowindicies, contains DnumBlocks*dim*dim_wells entries + std::vector Brows; + std::vector z1; // z1 = B * x + std::vector z2; // z2 = D^-1 * B * x + void *UMFPACK_Symbolic, *UMFPACK_Numeric; + + + public: + + /// Set a cudaStream to be used + /// \param[in] stream the cudaStream that is used + void setCudaStream(cudaStream_t stream); + + /// Create a new MultisegmentWellContribution + /// Matrices C and B are passed in Blocked CSR, matrix D in CSC + /// \param[in] dim size of blocks in vectors x and y, equal to MultisegmentWell::numEq + /// \param[in] dim_wells size of blocks of C, B and D, equal to MultisegmentWell::numWellEq + /// \param[in] Nb number of blocks in vectors x and y + /// \param[in] Mb number of blockrows in C, B and D + /// \param[in] BnumBlocks number of blocks in C and B + /// \param[in] Bvalues nonzero values of matrix B + /// \param[in] BcolIndices columnindices of blocks of matrix B + /// \param[in] BrowPointers rowpointers of matrix B + /// \param[in] DnumBlocks number of blocks in D + /// \param[in] Dvalues nonzero values of matrix D + /// \param[in] DcolPointers columnpointers of matrix D + /// \param[in] DrowIndices rowindices of matrix D + /// \param[in] Cvalues nonzero values of matrix C + MultisegmentWellContribution(unsigned int dim, unsigned int dim_wells, + unsigned int Nb, unsigned int Mb, + unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers, + unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices, + double *Cvalues); + + /// Destroy a MultisegmentWellContribution, and free memory + ~MultisegmentWellContribution(); + + /// Apply the MultisegmentWellContribution on GPU + /// performs y -= (C^T * (D^-1 * (B*x))) for MultisegmentWell + /// \param[in] d_x vector x, must be on GPU + /// \param[inout] d_y vector y, must be on GPU + void apply(double *d_x, double *d_y); + + }; + +} //namespace Opm + +#endif diff --git a/opm/simulators/linalg/bda/WellContributions.cu b/opm/simulators/linalg/bda/WellContributions.cu index ac79bb21c..d3ee09a05 100644 --- a/opm/simulators/linalg/bda/WellContributions.cu +++ b/opm/simulators/linalg/bda/WellContributions.cu @@ -128,27 +128,39 @@ namespace Opm } - void WellContributions::alloc(){ - cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells); - cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_wells * dim_wells * dim_wells); - cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells); - cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks); - cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks); - val_pointers = new unsigned int[num_wells + 1]; - cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_wells + 1)); - cudaCheckLastError("apply_gpu malloc failed"); - allocated = true; + void WellContributions::alloc() + { + if (num_std_wells > 0) { + cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells); + cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells); + cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells); + cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks); + cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks); + val_pointers = new unsigned int[num_std_wells + 1]; + cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_std_wells + 1)); + cudaCheckLastError("apply_gpu malloc failed"); + allocated = true; + } } WellContributions::~WellContributions() { - cudaFree(d_Cnnzs); - cudaFree(d_Dnnzs); - cudaFree(d_Bnnzs); - cudaFree(d_Ccols); - cudaFree(d_Bcols); - delete[] val_pointers; - cudaFree(d_val_pointers); + // delete MultisegmentWellContributions + for (auto ms : multisegments) { + delete ms; + } + multisegments.clear(); + + // delete data for StandardWell + if (num_std_wells > 0) { + cudaFree(d_Cnnzs); + cudaFree(d_Dnnzs); + cudaFree(d_Bnnzs); + cudaFree(d_Ccols); + cudaFree(d_Bcols); + delete[] val_pointers; + cudaFree(d_val_pointers); + } } @@ -156,8 +168,16 @@ namespace Opm // y -= (C^T *(D^-1*( B*x))) void WellContributions::apply(double *d_x, double *d_y) { - int smem_size = 2 * sizeof(double) * dim_wells; - apply_well_contributions<<>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers); + // apply MultisegmentWells + for(MultisegmentWellContribution *well : multisegments){ + well->apply(d_x, d_y); + } + + // apply StandardWells + if (num_std_wells > 0) { + int smem_size = 2 * sizeof(double) * dim_wells; + apply_well_contributions<<>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers); + } } @@ -173,16 +193,16 @@ namespace Opm cudaMemcpy(d_Ccols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice); break; case MatrixType::D: - cudaMemcpy(d_Dnnzs + num_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice); + cudaMemcpy(d_Dnnzs + num_std_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice); break; case MatrixType::B: cudaMemcpy(d_Bnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice); cudaMemcpy(d_Bcols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice); - val_pointers[num_wells_so_far] = num_blocks_so_far; - if(num_wells_so_far == num_wells - 1){ - val_pointers[num_wells] = num_blocks; + val_pointers[num_std_wells_so_far] = num_blocks_so_far; + if(num_std_wells_so_far == num_std_wells - 1){ + val_pointers[num_std_wells] = num_blocks; } - cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_wells+1), cudaMemcpyHostToDevice); + cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_std_wells+1), cudaMemcpyHostToDevice); break; default: OPM_THROW(std::logic_error,"Error unsupported matrix ID for WellContributions::addMatrix()"); @@ -190,13 +210,16 @@ namespace Opm cudaCheckLastError("WellContributions::addMatrix() failed"); if (MatrixType::B == type) { num_blocks_so_far += val_size; - num_wells_so_far++; + num_std_wells_so_far++; } } void WellContributions::setCudaStream(cudaStream_t stream_) { this->stream = stream_; + for(MultisegmentWellContribution *well : multisegments){ + well->setCudaStream(stream_); + } } void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_) @@ -211,7 +234,18 @@ namespace Opm OPM_THROW(std::logic_error,"Error cannot add more sizes after allocated in WellContributions"); } num_blocks += nnz; - num_wells++; + num_std_wells++; + } + + void WellContributions::addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells, + unsigned int Nb, unsigned int Mb, + unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers, + unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices, + double *Cvalues) + { + MultisegmentWellContribution *well = new MultisegmentWellContribution(dim, dim_wells, Nb, Mb, BnumBlocks, Bvalues, BcolIndices, BrowPointers, DnumBlocks, Dvalues, DcolPointers, DrowIndices, Cvalues); + multisegments.emplace_back(well); + ++num_ms_wells; } } //namespace Opm diff --git a/opm/simulators/linalg/bda/WellContributions.hpp b/opm/simulators/linalg/bda/WellContributions.hpp index 74f7efc09..1ab270ca8 100644 --- a/opm/simulators/linalg/bda/WellContributions.hpp +++ b/opm/simulators/linalg/bda/WellContributions.hpp @@ -28,6 +28,8 @@ #include +#include + #include namespace Opm @@ -35,6 +37,10 @@ namespace Opm /// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver /// If the --matrix-add-well-contributions commandline parameter is true, this class should not be used + /// So far, StandardWell and MultisegmentWell are supported + /// A single instance (or pointer) of this class is passed to the cusparseSolver. + /// For StandardWell, this class contains all the data and handles the computation. For MultisegmentWell, the vector 'multisegments' contains all the data. For more information, check the MultisegmentWellContribution class. + /// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x))) /// B and C are vectors, disguised as matrices and contain blocks of StandardWell::numEq by StandardWell::numStaticWellEq /// D is a block, disguised as matrix, the square block has size StandardWell::numStaticWellEq. D is actually stored as D^-1 @@ -50,14 +56,18 @@ namespace Opm private: unsigned int num_blocks = 0; // total number of blocks in all wells - unsigned int dim; // number of columns of blocks in B and C, equal to StandardWell::numEq - unsigned int dim_wells; // number of rows of blocks in B and C, equal to StandardWell::numStaticWellEq - unsigned int num_wells = 0; // number of wellcontributions in this object + unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq + unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq + unsigned int num_std_wells = 0; // number of StandardWells in this object + unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size() unsigned int num_blocks_so_far = 0; // keep track of where next data is written - unsigned int num_wells_so_far = 0; // keep track of where next data is written + unsigned int num_std_wells_so_far = 0; // keep track of where next data is written unsigned int *val_pointers = nullptr; // val_pointers[wellID] == index of first block for this well in Ccols and Bcols bool allocated = false; + std::vector multisegments; + cudaStream_t stream; + // data for StandardWells, could remain nullptrs if not used double *d_Cnnzs = nullptr; double *d_Dnnzs = nullptr; double *d_Bnnzs = nullptr; @@ -66,7 +76,7 @@ namespace Opm double *d_z1 = nullptr; double *d_z2 = nullptr; unsigned int *d_val_pointers = nullptr; - cudaStream_t stream; + public: /// StandardWell has C, D and B matrices that need to be copied @@ -86,22 +96,22 @@ namespace Opm /// Destroy a WellContributions, and free memory ~WellContributions(); - /// Apply all wellcontributions in this object - /// performs y -= (C^T * (D^-1 * (B*x))) for StandardWell + /// Apply all Wells in this object + /// performs y -= (C^T * (D^-1 * (B*x))) for all Wells /// \param[in] x vector x /// \param[inout] y vector y void apply(double *x, double *y); - /// Allocate memory for the wellcontributions + /// Allocate memory for the StandardWells void alloc(); - /// Indicate how large the blocks of the wellcontributions (C and B) are + /// Indicate how large the blocks of the StandardWell (C and B) are /// \param[in] dim number of columns /// \param[in] dim_wells number of rows void setBlockSize(unsigned int dim, unsigned int dim_wells); - /// Indicate how large the next wellcontribution is, this function cannot be called after alloc() is called - /// \param[in] numBlocks number of blocks in C and B of next wellcontribution + /// Indicate how large the next StandardWell is, this function cannot be called after alloc() is called + /// \param[in] numBlocks number of blocks in C and B of next StandardWell void addNumBlocks(unsigned int numBlocks); /// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called @@ -111,10 +121,31 @@ namespace Opm /// \param[in] val_size number of blocks in C or B, ignored for D void addMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size); + /// Add a MultisegmentWellContribution, actually creates an object on heap that is destroyed in the destructor + /// Matrices C and B are passed in Blocked CSR, matrix D in CSC + /// \param[in] dim size of blocks in vectors x and y, equal to MultisegmentWell::numEq + /// \param[in] dim_wells size of blocks of C, B and D, equal to MultisegmentWell::numWellEq + /// \param[in] Nb number of blocks in vectors x and y + /// \param[in] Mb number of blockrows in C, B and D + /// \param[in] BnumBlocks number of blocks in C and B + /// \param[in] Bvalues nonzero values of matrix B + /// \param[in] BcolIndices columnindices of blocks of matrix B + /// \param[in] BrowPointers rowpointers of matrix B + /// \param[in] DnumBlocks number of blocks in D + /// \param[in] Dvalues nonzero values of matrix D + /// \param[in] DcolPointers columnpointers of matrix D + /// \param[in] DrowIndices rowindices of matrix D + /// \param[in] Cvalues nonzero values of matrix C + void addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells, + unsigned int Nb, unsigned int Mb, + unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers, + unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices, + double *Cvalues); + /// Return the number of wells added to this object /// \return the number of wells added to this object unsigned int getNumWells(){ - return num_wells; + return num_std_wells + num_ms_wells; } }; diff --git a/opm/simulators/linalg/bda/cuda_header.hpp b/opm/simulators/linalg/bda/cuda_header.hpp index 7d1f722d4..4ca6e3b77 100644 --- a/opm/simulators/linalg/bda/cuda_header.hpp +++ b/opm/simulators/linalg/bda/cuda_header.hpp @@ -20,6 +20,7 @@ #ifndef CUDA_HEADER_HEADER_INCLUDED #define CUDA_HEADER_HEADER_INCLUDED +#include #include /// Runtime error checking of CUDA functions diff --git a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp index b8648ef15..601efa811 100644 --- a/opm/simulators/linalg/bda/cusparseSolverBackend.hpp +++ b/opm/simulators/linalg/bda/cusparseSolverBackend.hpp @@ -74,9 +74,9 @@ private: void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res); /// Initialize GPU and allocate memory - /// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks - /// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks - /// \param[in] dim size of block + /// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks + /// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks + /// \param[in] dim size of block void initialize(int N, int nnz, int dim); /// Clean memory diff --git a/opm/simulators/wells/BlackoilWellModel_impl.hpp b/opm/simulators/wells/BlackoilWellModel_impl.hpp index 5dc7d4043..a78de1b52 100644 --- a/opm/simulators/wells/BlackoilWellModel_impl.hpp +++ b/opm/simulators/wells/BlackoilWellModel_impl.hpp @@ -907,22 +907,34 @@ namespace Opm { BlackoilWellModel:: getWellContributions(WellContributions& wellContribs) const { + // prepare for StandardWells wellContribs.setBlockSize(StandardWell::numEq, StandardWell::numStaticWellEq); for(unsigned int i = 0; i < well_container_.size(); i++){ auto& well = well_container_[i]; std::shared_ptr > derived = std::dynamic_pointer_cast >(well); - unsigned int numBlocks; - derived->getNumBlocks(numBlocks); - wellContribs.addNumBlocks(numBlocks); + if (derived) { + unsigned int numBlocks; + derived->getNumBlocks(numBlocks); + wellContribs.addNumBlocks(numBlocks); + } } + + // allocate memory for data from StandardWells wellContribs.alloc(); + for(unsigned int i = 0; i < well_container_.size(); i++){ auto& well = well_container_[i]; - std::shared_ptr > derived = std::dynamic_pointer_cast >(well); - if (derived) { - derived->addWellContribution(wellContribs); + // maybe WellInterface could implement addWellContribution() + auto derived_std = std::dynamic_pointer_cast >(well); + if (derived_std) { + derived_std->addWellContribution(wellContribs); } else { - OpmLog::warning("Warning only StandardWell is supported by WellContributions for GPU"); + auto derived_ms = std::dynamic_pointer_cast >(well); + if (derived_ms) { + derived_ms->addWellContribution(wellContribs); + } else { + OpmLog::warning("Warning unknown type of well"); + } } } } diff --git a/opm/simulators/wells/MultisegmentWell.hpp b/opm/simulators/wells/MultisegmentWell.hpp index 8644ae505..9cc9b12f9 100644 --- a/opm/simulators/wells/MultisegmentWell.hpp +++ b/opm/simulators/wells/MultisegmentWell.hpp @@ -135,6 +135,11 @@ namespace Opm /// r = r - C D^-1 Rw virtual void apply(BVector& r) const override; +#if HAVE_CUDA + /// add the contribution (C, D, B matrices) of this Well to the WellContributions object + void addWellContribution(WellContributions& wellContribs) const; +#endif + /// using the solution x to recover the solution xw for wells and applying /// xw to update Well State virtual void recoverWellSolutionAndUpdateWellState(const BVector& x, diff --git a/opm/simulators/wells/MultisegmentWell_impl.hpp b/opm/simulators/wells/MultisegmentWell_impl.hpp index 5062641cd..b037e4cee 100644 --- a/opm/simulators/wells/MultisegmentWell_impl.hpp +++ b/opm/simulators/wells/MultisegmentWell_impl.hpp @@ -639,6 +639,66 @@ namespace Opm +#if HAVE_CUDA + template + void + MultisegmentWell:: + addWellContribution(WellContributions& wellContribs) const + { + unsigned int Nb = duneB_.M(); // number of blockrows in matrix A + unsigned int Mb = duneB_.N(); // number of blockrows in duneB_, duneC_ and duneD_ + unsigned int BnumBlocks = duneB_.nonzeroes(); + unsigned int DnumBlocks = duneD_.nonzeroes(); + + // duneC + std::vector Ccols; + std::vector Cvals; + Ccols.reserve(BnumBlocks); + Cvals.reserve(BnumBlocks * numEq * numWellEq); + for (auto rowC = duneC_.begin(); rowC != duneC_.end(); ++rowC) { + for (auto colC = rowC->begin(), endC = rowC->end(); colC != endC; ++colC) { + Ccols.emplace_back(colC.index()); + for (int i = 0; i < numWellEq; ++i) { + for (int j = 0; j < numEq; ++j) { + Cvals.emplace_back((*colC)[i][j]); + } + } + } + } + + // duneD + Dune::UMFPack umfpackMatrix(duneD_, 0); + double *Dvals = umfpackMatrix.getInternalMatrix().getValues(); + int *Dcols = umfpackMatrix.getInternalMatrix().getColStart(); + int *Drows = umfpackMatrix.getInternalMatrix().getRowIndex(); + + // duneB + std::vector Bcols; + std::vector Brows; + std::vector Bvals; + Bcols.reserve(BnumBlocks); + Brows.reserve(Mb+1); + Bvals.reserve(BnumBlocks * numEq * numWellEq); + Brows.emplace_back(0); + unsigned int sumBlocks = 0; + for (auto rowB = duneB_.begin(); rowB != duneB_.end(); ++rowB) { + int sizeRow = 0; + for (auto colB = rowB->begin(), endB = rowB->end(); colB != endB; ++colB) { + Bcols.emplace_back(colB.index()); + for (int i = 0; i < numWellEq; ++i) { + for (int j = 0; j < numEq; ++j) { + Bvals.emplace_back((*colB)[i][j]); + } + } + sizeRow++; + } + sumBlocks += sizeRow; + Brows.emplace_back(sumBlocks); + } + + wellContribs.addMultisegmentWellContribution(numEq, numWellEq, Nb, Mb, BnumBlocks, Bvals.data(), Bcols.data(), Brows.data(), DnumBlocks, Dvals, Dcols, Drows, Cvals.data()); + } +#endif template