cusparseSolver can now handle MultisegmentWells, they are actually applied on CPU via SuiteSparse/UMFPACK

This commit is contained in:
T.D. (Tongdong) Qiu 2020-05-15 16:00:09 +02:00
parent 1f177b33e7
commit 04ee2be348
10 changed files with 445 additions and 48 deletions

View File

@ -44,6 +44,7 @@ list (APPEND MAIN_SOURCE_FILES
if(CUDA_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
endif()
if(MPI_FOUND)
@ -144,6 +145,7 @@ list (APPEND PUBLIC_HEADER_FILES
opm/simulators/linalg/bda/BdaResult.hpp
opm/simulators/linalg/bda/cuda_header.hpp
opm/simulators/linalg/bda/cusparseSolverBackend.hpp
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
opm/simulators/linalg/bda/WellContributions.hpp
opm/simulators/linalg/BlackoilAmg.hpp
opm/simulators/linalg/amgcpr.hh

View File

@ -0,0 +1,138 @@
/*
Copyright 2020 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <cstdlib>
#include <cstring>
#include <config.h> // CMake
#include <fstream>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#if HAVE_UMFPACK
#include <dune/istl/umfpack.hh>
#include <opm/simulators/linalg/bda/cuda_header.hpp>
#endif // HAVE_UMFPACK
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
namespace Opm
{
MultisegmentWellContribution::MultisegmentWellContribution(unsigned int dim_, unsigned int dim_wells_,
unsigned int Nb_, unsigned int Mb_,
unsigned int BnumBlocks_, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
unsigned int DnumBlocks_, double *Dvalues, int *DcolPointers, int *DrowIndices,
double *Cvalues)
: dim(dim_), dim_wells(dim_wells_), N(Nb_*dim), Nb(Nb_), M(Mb_*dim_wells), Mb(Mb_), DnumBlocks(DnumBlocks_), BnumBlocks(BnumBlocks_)
{
Cvals.insert(Cvals.end(), Cvalues, Cvalues + BnumBlocks*dim*dim_wells);
Dvals.insert(Dvals.end(), Dvalues, Dvalues + DnumBlocks*dim_wells*dim_wells);
Bvals.insert(Bvals.end(), Bvalues, Bvalues + BnumBlocks*dim*dim_wells);
Ccols.insert(Ccols.end(), BcolIndices, BcolIndices + BnumBlocks);
Bcols.insert(Bcols.end(), BcolIndices, BcolIndices + BnumBlocks);
Crows.insert(Crows.end(), BrowPointers, BrowPointers + M + 1);
Brows.insert(Brows.end(), BrowPointers, BrowPointers + M + 1);
Dcols.insert(Dcols.end(), DcolPointers, DcolPointers + M + 1);
Drows.insert(Drows.end(), DrowIndices, DrowIndices + DnumBlocks*dim_wells*dim_wells);
z1.resize(Mb * dim_wells);
z2.resize(Mb * dim_wells);
// allocate pinned memory on host
cudaMallocHost(&h_x, sizeof(double) * N);
cudaMallocHost(&h_y, sizeof(double) * N);
umfpack_di_symbolic(M, M, Dcols.data(), Drows.data(), Dvals.data(), &UMFPACK_Symbolic, nullptr, nullptr);
umfpack_di_numeric(Dcols.data(), Drows.data(), Dvals.data(), UMFPACK_Symbolic, &UMFPACK_Numeric, nullptr, nullptr);
}
MultisegmentWellContribution::~MultisegmentWellContribution()
{
cudaFreeHost(h_x);
cudaFreeHost(h_y);
umfpack_di_free_symbolic(&UMFPACK_Symbolic);
umfpack_di_free_numeric(&UMFPACK_Numeric);
}
// Apply the MultisegmentWellContribution, similar to MultisegmentWell::apply()
// y -= (C^T * (D^-1 * (B * x)))
void MultisegmentWellContribution::apply(double *d_x, double *d_y)
{
// copy vectors x and y from GPU to CPU
cudaMemcpyAsync(h_x, d_x, sizeof(double) * N, cudaMemcpyDeviceToHost, stream);
cudaMemcpyAsync(h_y, d_y, sizeof(double) * N, cudaMemcpyDeviceToHost, stream);
cudaStreamSynchronize(stream);
// reset z1 and z2
std::fill(z1.begin(), z1.end(), 0.0);
std::fill(z2.begin(), z2.end(), 0.0);
// z1 = B * x
for (unsigned int row = 0; row < Mb; ++row) {
// for every block in the row
for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) {
unsigned int colIdx = Bcols[blockID];
for (unsigned int j = 0; j < dim_wells; ++j) {
double temp = 0.0;
for (unsigned int k = 0; k < dim; ++k) {
temp += Bvals[blockID * dim * dim_wells + j * dim + k] * h_x[colIdx * dim + k];
}
z1[row * dim_wells + j] += temp;
}
}
}
// z2 = D^-1 * (B * x)
// umfpack
umfpack_di_solve(UMFPACK_A, Dcols.data(), Drows.data(), Dvals.data(), z2.data(), z1.data(), UMFPACK_Numeric, nullptr, nullptr);
// y -= (C^T * z2)
// y -= (C^T * (D^-1 * (B * x)))
for (unsigned int row = 0; row < Mb; ++row) {
// for every block in the row
for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) {
unsigned int colIdx = Bcols[blockID];
for (unsigned int j = 0; j < dim; ++j) {
double temp = 0.0;
for (unsigned int k = 0; k < dim_wells; ++k) {
temp += Cvals[blockID * dim * dim_wells + j + k * dim] * z2[row * dim_wells + k];
}
h_y[colIdx * dim + j] -= temp;
}
}
}
// copy vector y from CPU to GPU
cudaMemcpyAsync(d_y, h_y, sizeof(double) * N, cudaMemcpyHostToDevice, stream);
cudaStreamSynchronize(stream);
}
void MultisegmentWellContribution::setCudaStream(cudaStream_t stream_)
{
stream = stream_;
}
} //namespace Opm

View File

@ -0,0 +1,114 @@
/*
Copyright 2020 Equinor ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef MULTISEGMENTWELLCONTRIBUTION_HEADER_INCLUDED
#define MULTISEGMENTWELLCONTRIBUTION_HEADER_INCLUDED
#include <config.h>
#include <vector>
#include <cuda_runtime.h>
namespace Opm
{
/// This class serves to duplicate the functionality of the MultisegmentWell
/// A MultisegmentWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))
/// B and C are matrices, with M rows and N columns, where N is the size of the matrix. They contain blocks of MultisegmentWell::numEq by MultisegmentWell::numWellEq.
/// D is a MxM matrix, the square blocks have size MultisegmentWell::numWellEq.
/// B*x and D*B*x are a vector with M*numWellEq doubles
/// C*D*B*x is a vector with N*numEq doubles.
///
/// This class is used in 3 phases:
/// - get total size of all wellcontributions that must be stored here
/// - allocate memory
/// - copy data of wellcontributions
class MultisegmentWellContribution
{
private:
unsigned int dim; // number of columns of blocks in B and C, equal to MultisegmentWell::numEq
unsigned int dim_wells; // number of rows of blocks in B and C, equal to MultisegmentWell::numWellEq
unsigned int N; // number of rows in vectors x and y, N == dim*Nb
unsigned int Nb; // number of blockrows in x and y
unsigned int M; // number of rows, M == dim_wells*Mb
unsigned int Mb; // number of blockrows in C, D and B
cudaStream_t stream;
double *h_x = nullptr, *h_y = nullptr; // CUDA pinned memory for GPU memcpy
// C and B are stored in BCRS format, D is stored in CSC format (Dune::UMFPack).
unsigned int DnumBlocks;
unsigned int BnumBlocks;
std::vector<double> Cvals;
std::vector<double> Dvals;
std::vector<double> Bvals;
std::vector<unsigned int> Ccols;
std::vector<int> Dcols; // Columnpointers, contains M+1 entries
std::vector<unsigned int> Bcols;
std::vector<unsigned int> Crows;
std::vector<int> Drows; // Rowindicies, contains DnumBlocks*dim*dim_wells entries
std::vector<unsigned int> Brows;
std::vector<double> z1; // z1 = B * x
std::vector<double> z2; // z2 = D^-1 * B * x
void *UMFPACK_Symbolic, *UMFPACK_Numeric;
public:
/// Set a cudaStream to be used
/// \param[in] stream the cudaStream that is used
void setCudaStream(cudaStream_t stream);
/// Create a new MultisegmentWellContribution
/// Matrices C and B are passed in Blocked CSR, matrix D in CSC
/// \param[in] dim size of blocks in vectors x and y, equal to MultisegmentWell::numEq
/// \param[in] dim_wells size of blocks of C, B and D, equal to MultisegmentWell::numWellEq
/// \param[in] Nb number of blocks in vectors x and y
/// \param[in] Mb number of blockrows in C, B and D
/// \param[in] BnumBlocks number of blocks in C and B
/// \param[in] Bvalues nonzero values of matrix B
/// \param[in] BcolIndices columnindices of blocks of matrix B
/// \param[in] BrowPointers rowpointers of matrix B
/// \param[in] DnumBlocks number of blocks in D
/// \param[in] Dvalues nonzero values of matrix D
/// \param[in] DcolPointers columnpointers of matrix D
/// \param[in] DrowIndices rowindices of matrix D
/// \param[in] Cvalues nonzero values of matrix C
MultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
unsigned int Nb, unsigned int Mb,
unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
double *Cvalues);
/// Destroy a MultisegmentWellContribution, and free memory
~MultisegmentWellContribution();
/// Apply the MultisegmentWellContribution on GPU
/// performs y -= (C^T * (D^-1 * (B*x))) for MultisegmentWell
/// \param[in] d_x vector x, must be on GPU
/// \param[inout] d_y vector y, must be on GPU
void apply(double *d_x, double *d_y);
};
} //namespace Opm
#endif

View File

@ -128,27 +128,39 @@ namespace Opm
}
void WellContributions::alloc(){
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_wells * dim_wells * dim_wells);
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
val_pointers = new unsigned int[num_wells + 1];
cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_wells + 1));
cudaCheckLastError("apply_gpu malloc failed");
allocated = true;
void WellContributions::alloc()
{
if (num_std_wells > 0) {
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
val_pointers = new unsigned int[num_std_wells + 1];
cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_std_wells + 1));
cudaCheckLastError("apply_gpu malloc failed");
allocated = true;
}
}
WellContributions::~WellContributions()
{
cudaFree(d_Cnnzs);
cudaFree(d_Dnnzs);
cudaFree(d_Bnnzs);
cudaFree(d_Ccols);
cudaFree(d_Bcols);
delete[] val_pointers;
cudaFree(d_val_pointers);
// delete MultisegmentWellContributions
for (auto ms : multisegments) {
delete ms;
}
multisegments.clear();
// delete data for StandardWell
if (num_std_wells > 0) {
cudaFree(d_Cnnzs);
cudaFree(d_Dnnzs);
cudaFree(d_Bnnzs);
cudaFree(d_Ccols);
cudaFree(d_Bcols);
delete[] val_pointers;
cudaFree(d_val_pointers);
}
}
@ -156,8 +168,16 @@ namespace Opm
// y -= (C^T *(D^-1*( B*x)))
void WellContributions::apply(double *d_x, double *d_y)
{
int smem_size = 2 * sizeof(double) * dim_wells;
apply_well_contributions<<<num_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
// apply MultisegmentWells
for(MultisegmentWellContribution *well : multisegments){
well->apply(d_x, d_y);
}
// apply StandardWells
if (num_std_wells > 0) {
int smem_size = 2 * sizeof(double) * dim_wells;
apply_well_contributions<<<num_std_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
}
}
@ -173,16 +193,16 @@ namespace Opm
cudaMemcpy(d_Ccols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
break;
case MatrixType::D:
cudaMemcpy(d_Dnnzs + num_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
cudaMemcpy(d_Dnnzs + num_std_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
break;
case MatrixType::B:
cudaMemcpy(d_Bnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
cudaMemcpy(d_Bcols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
val_pointers[num_wells_so_far] = num_blocks_so_far;
if(num_wells_so_far == num_wells - 1){
val_pointers[num_wells] = num_blocks;
val_pointers[num_std_wells_so_far] = num_blocks_so_far;
if(num_std_wells_so_far == num_std_wells - 1){
val_pointers[num_std_wells] = num_blocks;
}
cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_wells+1), cudaMemcpyHostToDevice);
cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_std_wells+1), cudaMemcpyHostToDevice);
break;
default:
OPM_THROW(std::logic_error,"Error unsupported matrix ID for WellContributions::addMatrix()");
@ -190,13 +210,16 @@ namespace Opm
cudaCheckLastError("WellContributions::addMatrix() failed");
if (MatrixType::B == type) {
num_blocks_so_far += val_size;
num_wells_so_far++;
num_std_wells_so_far++;
}
}
void WellContributions::setCudaStream(cudaStream_t stream_)
{
this->stream = stream_;
for(MultisegmentWellContribution *well : multisegments){
well->setCudaStream(stream_);
}
}
void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
@ -211,7 +234,18 @@ namespace Opm
OPM_THROW(std::logic_error,"Error cannot add more sizes after allocated in WellContributions");
}
num_blocks += nnz;
num_wells++;
num_std_wells++;
}
void WellContributions::addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
unsigned int Nb, unsigned int Mb,
unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
double *Cvalues)
{
MultisegmentWellContribution *well = new MultisegmentWellContribution(dim, dim_wells, Nb, Mb, BnumBlocks, Bvalues, BcolIndices, BrowPointers, DnumBlocks, Dvalues, DcolPointers, DrowIndices, Cvalues);
multisegments.emplace_back(well);
++num_ms_wells;
}
} //namespace Opm

View File

@ -28,6 +28,8 @@
#include <vector>
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
#include <cuda_runtime.h>
namespace Opm
@ -35,6 +37,10 @@ namespace Opm
/// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver
/// If the --matrix-add-well-contributions commandline parameter is true, this class should not be used
/// So far, StandardWell and MultisegmentWell are supported
/// A single instance (or pointer) of this class is passed to the cusparseSolver.
/// For StandardWell, this class contains all the data and handles the computation. For MultisegmentWell, the vector 'multisegments' contains all the data. For more information, check the MultisegmentWellContribution class.
/// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))
/// B and C are vectors, disguised as matrices and contain blocks of StandardWell::numEq by StandardWell::numStaticWellEq
/// D is a block, disguised as matrix, the square block has size StandardWell::numStaticWellEq. D is actually stored as D^-1
@ -50,14 +56,18 @@ namespace Opm
private:
unsigned int num_blocks = 0; // total number of blocks in all wells
unsigned int dim; // number of columns of blocks in B and C, equal to StandardWell::numEq
unsigned int dim_wells; // number of rows of blocks in B and C, equal to StandardWell::numStaticWellEq
unsigned int num_wells = 0; // number of wellcontributions in this object
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
unsigned int num_std_wells = 0; // number of StandardWells in this object
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
unsigned int num_blocks_so_far = 0; // keep track of where next data is written
unsigned int num_wells_so_far = 0; // keep track of where next data is written
unsigned int num_std_wells_so_far = 0; // keep track of where next data is written
unsigned int *val_pointers = nullptr; // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
bool allocated = false;
std::vector<MultisegmentWellContribution*> multisegments;
cudaStream_t stream;
// data for StandardWells, could remain nullptrs if not used
double *d_Cnnzs = nullptr;
double *d_Dnnzs = nullptr;
double *d_Bnnzs = nullptr;
@ -66,7 +76,7 @@ namespace Opm
double *d_z1 = nullptr;
double *d_z2 = nullptr;
unsigned int *d_val_pointers = nullptr;
cudaStream_t stream;
public:
/// StandardWell has C, D and B matrices that need to be copied
@ -86,22 +96,22 @@ namespace Opm
/// Destroy a WellContributions, and free memory
~WellContributions();
/// Apply all wellcontributions in this object
/// performs y -= (C^T * (D^-1 * (B*x))) for StandardWell
/// Apply all Wells in this object
/// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
/// \param[in] x vector x
/// \param[inout] y vector y
void apply(double *x, double *y);
/// Allocate memory for the wellcontributions
/// Allocate memory for the StandardWells
void alloc();
/// Indicate how large the blocks of the wellcontributions (C and B) are
/// Indicate how large the blocks of the StandardWell (C and B) are
/// \param[in] dim number of columns
/// \param[in] dim_wells number of rows
void setBlockSize(unsigned int dim, unsigned int dim_wells);
/// Indicate how large the next wellcontribution is, this function cannot be called after alloc() is called
/// \param[in] numBlocks number of blocks in C and B of next wellcontribution
/// Indicate how large the next StandardWell is, this function cannot be called after alloc() is called
/// \param[in] numBlocks number of blocks in C and B of next StandardWell
void addNumBlocks(unsigned int numBlocks);
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
@ -111,10 +121,31 @@ namespace Opm
/// \param[in] val_size number of blocks in C or B, ignored for D
void addMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size);
/// Add a MultisegmentWellContribution, actually creates an object on heap that is destroyed in the destructor
/// Matrices C and B are passed in Blocked CSR, matrix D in CSC
/// \param[in] dim size of blocks in vectors x and y, equal to MultisegmentWell::numEq
/// \param[in] dim_wells size of blocks of C, B and D, equal to MultisegmentWell::numWellEq
/// \param[in] Nb number of blocks in vectors x and y
/// \param[in] Mb number of blockrows in C, B and D
/// \param[in] BnumBlocks number of blocks in C and B
/// \param[in] Bvalues nonzero values of matrix B
/// \param[in] BcolIndices columnindices of blocks of matrix B
/// \param[in] BrowPointers rowpointers of matrix B
/// \param[in] DnumBlocks number of blocks in D
/// \param[in] Dvalues nonzero values of matrix D
/// \param[in] DcolPointers columnpointers of matrix D
/// \param[in] DrowIndices rowindices of matrix D
/// \param[in] Cvalues nonzero values of matrix C
void addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
unsigned int Nb, unsigned int Mb,
unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
double *Cvalues);
/// Return the number of wells added to this object
/// \return the number of wells added to this object
unsigned int getNumWells(){
return num_wells;
return num_std_wells + num_ms_wells;
}
};

View File

@ -20,6 +20,7 @@
#ifndef CUDA_HEADER_HEADER_INCLUDED
#define CUDA_HEADER_HEADER_INCLUDED
#include <cuda_runtime.h>
#include <iostream>
/// Runtime error checking of CUDA functions

View File

@ -74,9 +74,9 @@ private:
void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);
/// Initialize GPU and allocate memory
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] dim size of block
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
/// \param[in] dim size of block
void initialize(int N, int nnz, int dim);
/// Clean memory

View File

@ -907,22 +907,34 @@ namespace Opm {
BlackoilWellModel<TypeTag>::
getWellContributions(WellContributions& wellContribs) const
{
// prepare for StandardWells
wellContribs.setBlockSize(StandardWell<TypeTag>::numEq, StandardWell<TypeTag>::numStaticWellEq);
for(unsigned int i = 0; i < well_container_.size(); i++){
auto& well = well_container_[i];
std::shared_ptr<StandardWell<TypeTag> > derived = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
unsigned int numBlocks;
derived->getNumBlocks(numBlocks);
wellContribs.addNumBlocks(numBlocks);
if (derived) {
unsigned int numBlocks;
derived->getNumBlocks(numBlocks);
wellContribs.addNumBlocks(numBlocks);
}
}
// allocate memory for data from StandardWells
wellContribs.alloc();
for(unsigned int i = 0; i < well_container_.size(); i++){
auto& well = well_container_[i];
std::shared_ptr<StandardWell<TypeTag> > derived = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
if (derived) {
derived->addWellContribution(wellContribs);
// maybe WellInterface could implement addWellContribution()
auto derived_std = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
if (derived_std) {
derived_std->addWellContribution(wellContribs);
} else {
OpmLog::warning("Warning only StandardWell is supported by WellContributions for GPU");
auto derived_ms = std::dynamic_pointer_cast<MultisegmentWell<TypeTag> >(well);
if (derived_ms) {
derived_ms->addWellContribution(wellContribs);
} else {
OpmLog::warning("Warning unknown type of well");
}
}
}
}

View File

@ -135,6 +135,11 @@ namespace Opm
/// r = r - C D^-1 Rw
virtual void apply(BVector& r) const override;
#if HAVE_CUDA
/// add the contribution (C, D, B matrices) of this Well to the WellContributions object
void addWellContribution(WellContributions& wellContribs) const;
#endif
/// using the solution x to recover the solution xw for wells and applying
/// xw to update Well State
virtual void recoverWellSolutionAndUpdateWellState(const BVector& x,

View File

@ -639,6 +639,66 @@ namespace Opm
#if HAVE_CUDA
template<typename TypeTag>
void
MultisegmentWell<TypeTag>::
addWellContribution(WellContributions& wellContribs) const
{
unsigned int Nb = duneB_.M(); // number of blockrows in matrix A
unsigned int Mb = duneB_.N(); // number of blockrows in duneB_, duneC_ and duneD_
unsigned int BnumBlocks = duneB_.nonzeroes();
unsigned int DnumBlocks = duneD_.nonzeroes();
// duneC
std::vector<unsigned int> Ccols;
std::vector<double> Cvals;
Ccols.reserve(BnumBlocks);
Cvals.reserve(BnumBlocks * numEq * numWellEq);
for (auto rowC = duneC_.begin(); rowC != duneC_.end(); ++rowC) {
for (auto colC = rowC->begin(), endC = rowC->end(); colC != endC; ++colC) {
Ccols.emplace_back(colC.index());
for (int i = 0; i < numWellEq; ++i) {
for (int j = 0; j < numEq; ++j) {
Cvals.emplace_back((*colC)[i][j]);
}
}
}
}
// duneD
Dune::UMFPack<DiagMatWell> umfpackMatrix(duneD_, 0);
double *Dvals = umfpackMatrix.getInternalMatrix().getValues();
int *Dcols = umfpackMatrix.getInternalMatrix().getColStart();
int *Drows = umfpackMatrix.getInternalMatrix().getRowIndex();
// duneB
std::vector<unsigned int> Bcols;
std::vector<unsigned int> Brows;
std::vector<double> Bvals;
Bcols.reserve(BnumBlocks);
Brows.reserve(Mb+1);
Bvals.reserve(BnumBlocks * numEq * numWellEq);
Brows.emplace_back(0);
unsigned int sumBlocks = 0;
for (auto rowB = duneB_.begin(); rowB != duneB_.end(); ++rowB) {
int sizeRow = 0;
for (auto colB = rowB->begin(), endB = rowB->end(); colB != endB; ++colB) {
Bcols.emplace_back(colB.index());
for (int i = 0; i < numWellEq; ++i) {
for (int j = 0; j < numEq; ++j) {
Bvals.emplace_back((*colB)[i][j]);
}
}
sizeRow++;
}
sumBlocks += sizeRow;
Brows.emplace_back(sumBlocks);
}
wellContribs.addMultisegmentWellContribution(numEq, numWellEq, Nb, Mb, BnumBlocks, Bvals.data(), Bcols.data(), Brows.data(), DnumBlocks, Dvals, Dcols, Drows, Cvals.data());
}
#endif
template <typename TypeTag>