mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
cusparseSolver can now handle MultisegmentWells, they are actually applied on CPU via SuiteSparse/UMFPACK
This commit is contained in:
parent
1f177b33e7
commit
04ee2be348
@ -44,6 +44,7 @@ list (APPEND MAIN_SOURCE_FILES
|
||||
if(CUDA_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
endif()
|
||||
if(MPI_FOUND)
|
||||
@ -144,6 +145,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
||||
opm/simulators/linalg/bda/BdaResult.hpp
|
||||
opm/simulators/linalg/bda/cuda_header.hpp
|
||||
opm/simulators/linalg/bda/cusparseSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
|
||||
opm/simulators/linalg/bda/WellContributions.hpp
|
||||
opm/simulators/linalg/BlackoilAmg.hpp
|
||||
opm/simulators/linalg/amgcpr.hh
|
||||
|
138
opm/simulators/linalg/bda/MultisegmentWellContribution.cpp
Normal file
138
opm/simulators/linalg/bda/MultisegmentWellContribution.cpp
Normal file
@ -0,0 +1,138 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <config.h> // CMake
|
||||
#include <fstream>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
#if HAVE_UMFPACK
|
||||
#include <dune/istl/umfpack.hh>
|
||||
#include <opm/simulators/linalg/bda/cuda_header.hpp>
|
||||
#endif // HAVE_UMFPACK
|
||||
|
||||
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
MultisegmentWellContribution::MultisegmentWellContribution(unsigned int dim_, unsigned int dim_wells_,
|
||||
unsigned int Nb_, unsigned int Mb_,
|
||||
unsigned int BnumBlocks_, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
|
||||
unsigned int DnumBlocks_, double *Dvalues, int *DcolPointers, int *DrowIndices,
|
||||
double *Cvalues)
|
||||
: dim(dim_), dim_wells(dim_wells_), N(Nb_*dim), Nb(Nb_), M(Mb_*dim_wells), Mb(Mb_), DnumBlocks(DnumBlocks_), BnumBlocks(BnumBlocks_)
|
||||
{
|
||||
Cvals.insert(Cvals.end(), Cvalues, Cvalues + BnumBlocks*dim*dim_wells);
|
||||
Dvals.insert(Dvals.end(), Dvalues, Dvalues + DnumBlocks*dim_wells*dim_wells);
|
||||
Bvals.insert(Bvals.end(), Bvalues, Bvalues + BnumBlocks*dim*dim_wells);
|
||||
Ccols.insert(Ccols.end(), BcolIndices, BcolIndices + BnumBlocks);
|
||||
Bcols.insert(Bcols.end(), BcolIndices, BcolIndices + BnumBlocks);
|
||||
Crows.insert(Crows.end(), BrowPointers, BrowPointers + M + 1);
|
||||
Brows.insert(Brows.end(), BrowPointers, BrowPointers + M + 1);
|
||||
|
||||
Dcols.insert(Dcols.end(), DcolPointers, DcolPointers + M + 1);
|
||||
Drows.insert(Drows.end(), DrowIndices, DrowIndices + DnumBlocks*dim_wells*dim_wells);
|
||||
|
||||
z1.resize(Mb * dim_wells);
|
||||
z2.resize(Mb * dim_wells);
|
||||
|
||||
// allocate pinned memory on host
|
||||
cudaMallocHost(&h_x, sizeof(double) * N);
|
||||
cudaMallocHost(&h_y, sizeof(double) * N);
|
||||
|
||||
umfpack_di_symbolic(M, M, Dcols.data(), Drows.data(), Dvals.data(), &UMFPACK_Symbolic, nullptr, nullptr);
|
||||
umfpack_di_numeric(Dcols.data(), Drows.data(), Dvals.data(), UMFPACK_Symbolic, &UMFPACK_Numeric, nullptr, nullptr);
|
||||
}
|
||||
|
||||
MultisegmentWellContribution::~MultisegmentWellContribution()
|
||||
{
|
||||
cudaFreeHost(h_x);
|
||||
cudaFreeHost(h_y);
|
||||
|
||||
umfpack_di_free_symbolic(&UMFPACK_Symbolic);
|
||||
umfpack_di_free_numeric(&UMFPACK_Numeric);
|
||||
}
|
||||
|
||||
|
||||
// Apply the MultisegmentWellContribution, similar to MultisegmentWell::apply()
|
||||
// y -= (C^T * (D^-1 * (B * x)))
|
||||
void MultisegmentWellContribution::apply(double *d_x, double *d_y)
|
||||
{
|
||||
// copy vectors x and y from GPU to CPU
|
||||
cudaMemcpyAsync(h_x, d_x, sizeof(double) * N, cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpyAsync(h_y, d_y, sizeof(double) * N, cudaMemcpyDeviceToHost, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
|
||||
// reset z1 and z2
|
||||
std::fill(z1.begin(), z1.end(), 0.0);
|
||||
std::fill(z2.begin(), z2.end(), 0.0);
|
||||
|
||||
// z1 = B * x
|
||||
for (unsigned int row = 0; row < Mb; ++row) {
|
||||
// for every block in the row
|
||||
for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) {
|
||||
unsigned int colIdx = Bcols[blockID];
|
||||
for (unsigned int j = 0; j < dim_wells; ++j) {
|
||||
double temp = 0.0;
|
||||
for (unsigned int k = 0; k < dim; ++k) {
|
||||
temp += Bvals[blockID * dim * dim_wells + j * dim + k] * h_x[colIdx * dim + k];
|
||||
}
|
||||
z1[row * dim_wells + j] += temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// z2 = D^-1 * (B * x)
|
||||
// umfpack
|
||||
umfpack_di_solve(UMFPACK_A, Dcols.data(), Drows.data(), Dvals.data(), z2.data(), z1.data(), UMFPACK_Numeric, nullptr, nullptr);
|
||||
|
||||
// y -= (C^T * z2)
|
||||
// y -= (C^T * (D^-1 * (B * x)))
|
||||
for (unsigned int row = 0; row < Mb; ++row) {
|
||||
// for every block in the row
|
||||
for (unsigned int blockID = Brows[row]; blockID < Brows[row+1]; ++blockID) {
|
||||
unsigned int colIdx = Bcols[blockID];
|
||||
for (unsigned int j = 0; j < dim; ++j) {
|
||||
double temp = 0.0;
|
||||
for (unsigned int k = 0; k < dim_wells; ++k) {
|
||||
temp += Cvals[blockID * dim * dim_wells + j + k * dim] * z2[row * dim_wells + k];
|
||||
}
|
||||
h_y[colIdx * dim + j] -= temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
cudaMemcpyAsync(d_y, h_y, sizeof(double) * N, cudaMemcpyHostToDevice, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
}
|
||||
|
||||
void MultisegmentWellContribution::setCudaStream(cudaStream_t stream_)
|
||||
{
|
||||
stream = stream_;
|
||||
}
|
||||
|
||||
|
||||
} //namespace Opm
|
||||
|
114
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
Normal file
114
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef MULTISEGMENTWELLCONTRIBUTION_HEADER_INCLUDED
|
||||
#define MULTISEGMENTWELLCONTRIBUTION_HEADER_INCLUDED
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
/// This class serves to duplicate the functionality of the MultisegmentWell
|
||||
/// A MultisegmentWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))
|
||||
/// B and C are matrices, with M rows and N columns, where N is the size of the matrix. They contain blocks of MultisegmentWell::numEq by MultisegmentWell::numWellEq.
|
||||
/// D is a MxM matrix, the square blocks have size MultisegmentWell::numWellEq.
|
||||
/// B*x and D*B*x are a vector with M*numWellEq doubles
|
||||
/// C*D*B*x is a vector with N*numEq doubles.
|
||||
///
|
||||
/// This class is used in 3 phases:
|
||||
/// - get total size of all wellcontributions that must be stored here
|
||||
/// - allocate memory
|
||||
/// - copy data of wellcontributions
|
||||
class MultisegmentWellContribution
|
||||
{
|
||||
|
||||
private:
|
||||
unsigned int dim; // number of columns of blocks in B and C, equal to MultisegmentWell::numEq
|
||||
unsigned int dim_wells; // number of rows of blocks in B and C, equal to MultisegmentWell::numWellEq
|
||||
unsigned int N; // number of rows in vectors x and y, N == dim*Nb
|
||||
unsigned int Nb; // number of blockrows in x and y
|
||||
unsigned int M; // number of rows, M == dim_wells*Mb
|
||||
unsigned int Mb; // number of blockrows in C, D and B
|
||||
|
||||
cudaStream_t stream;
|
||||
double *h_x = nullptr, *h_y = nullptr; // CUDA pinned memory for GPU memcpy
|
||||
|
||||
// C and B are stored in BCRS format, D is stored in CSC format (Dune::UMFPack).
|
||||
unsigned int DnumBlocks;
|
||||
unsigned int BnumBlocks;
|
||||
std::vector<double> Cvals;
|
||||
std::vector<double> Dvals;
|
||||
std::vector<double> Bvals;
|
||||
std::vector<unsigned int> Ccols;
|
||||
std::vector<int> Dcols; // Columnpointers, contains M+1 entries
|
||||
std::vector<unsigned int> Bcols;
|
||||
std::vector<unsigned int> Crows;
|
||||
std::vector<int> Drows; // Rowindicies, contains DnumBlocks*dim*dim_wells entries
|
||||
std::vector<unsigned int> Brows;
|
||||
std::vector<double> z1; // z1 = B * x
|
||||
std::vector<double> z2; // z2 = D^-1 * B * x
|
||||
void *UMFPACK_Symbolic, *UMFPACK_Numeric;
|
||||
|
||||
|
||||
public:
|
||||
|
||||
/// Set a cudaStream to be used
|
||||
/// \param[in] stream the cudaStream that is used
|
||||
void setCudaStream(cudaStream_t stream);
|
||||
|
||||
/// Create a new MultisegmentWellContribution
|
||||
/// Matrices C and B are passed in Blocked CSR, matrix D in CSC
|
||||
/// \param[in] dim size of blocks in vectors x and y, equal to MultisegmentWell::numEq
|
||||
/// \param[in] dim_wells size of blocks of C, B and D, equal to MultisegmentWell::numWellEq
|
||||
/// \param[in] Nb number of blocks in vectors x and y
|
||||
/// \param[in] Mb number of blockrows in C, B and D
|
||||
/// \param[in] BnumBlocks number of blocks in C and B
|
||||
/// \param[in] Bvalues nonzero values of matrix B
|
||||
/// \param[in] BcolIndices columnindices of blocks of matrix B
|
||||
/// \param[in] BrowPointers rowpointers of matrix B
|
||||
/// \param[in] DnumBlocks number of blocks in D
|
||||
/// \param[in] Dvalues nonzero values of matrix D
|
||||
/// \param[in] DcolPointers columnpointers of matrix D
|
||||
/// \param[in] DrowIndices rowindices of matrix D
|
||||
/// \param[in] Cvalues nonzero values of matrix C
|
||||
MultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
|
||||
unsigned int Nb, unsigned int Mb,
|
||||
unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
|
||||
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
|
||||
double *Cvalues);
|
||||
|
||||
/// Destroy a MultisegmentWellContribution, and free memory
|
||||
~MultisegmentWellContribution();
|
||||
|
||||
/// Apply the MultisegmentWellContribution on GPU
|
||||
/// performs y -= (C^T * (D^-1 * (B*x))) for MultisegmentWell
|
||||
/// \param[in] d_x vector x, must be on GPU
|
||||
/// \param[inout] d_y vector y, must be on GPU
|
||||
void apply(double *d_x, double *d_y);
|
||||
|
||||
};
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
#endif
|
@ -128,27 +128,39 @@ namespace Opm
|
||||
}
|
||||
|
||||
|
||||
void WellContributions::alloc(){
|
||||
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_wells * dim_wells * dim_wells);
|
||||
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
|
||||
val_pointers = new unsigned int[num_wells + 1];
|
||||
cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_wells + 1));
|
||||
cudaCheckLastError("apply_gpu malloc failed");
|
||||
allocated = true;
|
||||
void WellContributions::alloc()
|
||||
{
|
||||
if (num_std_wells > 0) {
|
||||
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
|
||||
val_pointers = new unsigned int[num_std_wells + 1];
|
||||
cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_std_wells + 1));
|
||||
cudaCheckLastError("apply_gpu malloc failed");
|
||||
allocated = true;
|
||||
}
|
||||
}
|
||||
|
||||
WellContributions::~WellContributions()
|
||||
{
|
||||
cudaFree(d_Cnnzs);
|
||||
cudaFree(d_Dnnzs);
|
||||
cudaFree(d_Bnnzs);
|
||||
cudaFree(d_Ccols);
|
||||
cudaFree(d_Bcols);
|
||||
delete[] val_pointers;
|
||||
cudaFree(d_val_pointers);
|
||||
// delete MultisegmentWellContributions
|
||||
for (auto ms : multisegments) {
|
||||
delete ms;
|
||||
}
|
||||
multisegments.clear();
|
||||
|
||||
// delete data for StandardWell
|
||||
if (num_std_wells > 0) {
|
||||
cudaFree(d_Cnnzs);
|
||||
cudaFree(d_Dnnzs);
|
||||
cudaFree(d_Bnnzs);
|
||||
cudaFree(d_Ccols);
|
||||
cudaFree(d_Bcols);
|
||||
delete[] val_pointers;
|
||||
cudaFree(d_val_pointers);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -156,8 +168,16 @@ namespace Opm
|
||||
// y -= (C^T *(D^-1*( B*x)))
|
||||
void WellContributions::apply(double *d_x, double *d_y)
|
||||
{
|
||||
int smem_size = 2 * sizeof(double) * dim_wells;
|
||||
apply_well_contributions<<<num_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
|
||||
// apply MultisegmentWells
|
||||
for(MultisegmentWellContribution *well : multisegments){
|
||||
well->apply(d_x, d_y);
|
||||
}
|
||||
|
||||
// apply StandardWells
|
||||
if (num_std_wells > 0) {
|
||||
int smem_size = 2 * sizeof(double) * dim_wells;
|
||||
apply_well_contributions<<<num_std_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -173,16 +193,16 @@ namespace Opm
|
||||
cudaMemcpy(d_Ccols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
|
||||
break;
|
||||
case MatrixType::D:
|
||||
cudaMemcpy(d_Dnnzs + num_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_Dnnzs + num_std_wells_so_far * dim_wells * dim_wells, values, sizeof(double) * dim_wells * dim_wells, cudaMemcpyHostToDevice);
|
||||
break;
|
||||
case MatrixType::B:
|
||||
cudaMemcpy(d_Bnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_Bcols + num_blocks_so_far, colIndices, sizeof(int) * val_size, cudaMemcpyHostToDevice);
|
||||
val_pointers[num_wells_so_far] = num_blocks_so_far;
|
||||
if(num_wells_so_far == num_wells - 1){
|
||||
val_pointers[num_wells] = num_blocks;
|
||||
val_pointers[num_std_wells_so_far] = num_blocks_so_far;
|
||||
if(num_std_wells_so_far == num_std_wells - 1){
|
||||
val_pointers[num_std_wells] = num_blocks;
|
||||
}
|
||||
cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_wells+1), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_val_pointers, val_pointers, sizeof(int) * (num_std_wells+1), cudaMemcpyHostToDevice);
|
||||
break;
|
||||
default:
|
||||
OPM_THROW(std::logic_error,"Error unsupported matrix ID for WellContributions::addMatrix()");
|
||||
@ -190,13 +210,16 @@ namespace Opm
|
||||
cudaCheckLastError("WellContributions::addMatrix() failed");
|
||||
if (MatrixType::B == type) {
|
||||
num_blocks_so_far += val_size;
|
||||
num_wells_so_far++;
|
||||
num_std_wells_so_far++;
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributions::setCudaStream(cudaStream_t stream_)
|
||||
{
|
||||
this->stream = stream_;
|
||||
for(MultisegmentWellContribution *well : multisegments){
|
||||
well->setCudaStream(stream_);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
|
||||
@ -211,7 +234,18 @@ namespace Opm
|
||||
OPM_THROW(std::logic_error,"Error cannot add more sizes after allocated in WellContributions");
|
||||
}
|
||||
num_blocks += nnz;
|
||||
num_wells++;
|
||||
num_std_wells++;
|
||||
}
|
||||
|
||||
void WellContributions::addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
|
||||
unsigned int Nb, unsigned int Mb,
|
||||
unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
|
||||
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
|
||||
double *Cvalues)
|
||||
{
|
||||
MultisegmentWellContribution *well = new MultisegmentWellContribution(dim, dim_wells, Nb, Mb, BnumBlocks, Bvalues, BcolIndices, BrowPointers, DnumBlocks, Dvalues, DcolPointers, DrowIndices, Cvalues);
|
||||
multisegments.emplace_back(well);
|
||||
++num_ms_wells;
|
||||
}
|
||||
|
||||
} //namespace Opm
|
||||
|
@ -28,6 +28,8 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace Opm
|
||||
@ -35,6 +37,10 @@ namespace Opm
|
||||
|
||||
/// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver
|
||||
/// If the --matrix-add-well-contributions commandline parameter is true, this class should not be used
|
||||
/// So far, StandardWell and MultisegmentWell are supported
|
||||
/// A single instance (or pointer) of this class is passed to the cusparseSolver.
|
||||
/// For StandardWell, this class contains all the data and handles the computation. For MultisegmentWell, the vector 'multisegments' contains all the data. For more information, check the MultisegmentWellContribution class.
|
||||
|
||||
/// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))
|
||||
/// B and C are vectors, disguised as matrices and contain blocks of StandardWell::numEq by StandardWell::numStaticWellEq
|
||||
/// D is a block, disguised as matrix, the square block has size StandardWell::numStaticWellEq. D is actually stored as D^-1
|
||||
@ -50,14 +56,18 @@ namespace Opm
|
||||
|
||||
private:
|
||||
unsigned int num_blocks = 0; // total number of blocks in all wells
|
||||
unsigned int dim; // number of columns of blocks in B and C, equal to StandardWell::numEq
|
||||
unsigned int dim_wells; // number of rows of blocks in B and C, equal to StandardWell::numStaticWellEq
|
||||
unsigned int num_wells = 0; // number of wellcontributions in this object
|
||||
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
|
||||
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
|
||||
unsigned int num_std_wells = 0; // number of StandardWells in this object
|
||||
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
||||
unsigned int num_blocks_so_far = 0; // keep track of where next data is written
|
||||
unsigned int num_wells_so_far = 0; // keep track of where next data is written
|
||||
unsigned int num_std_wells_so_far = 0; // keep track of where next data is written
|
||||
unsigned int *val_pointers = nullptr; // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
|
||||
bool allocated = false;
|
||||
std::vector<MultisegmentWellContribution*> multisegments;
|
||||
cudaStream_t stream;
|
||||
|
||||
// data for StandardWells, could remain nullptrs if not used
|
||||
double *d_Cnnzs = nullptr;
|
||||
double *d_Dnnzs = nullptr;
|
||||
double *d_Bnnzs = nullptr;
|
||||
@ -66,7 +76,7 @@ namespace Opm
|
||||
double *d_z1 = nullptr;
|
||||
double *d_z2 = nullptr;
|
||||
unsigned int *d_val_pointers = nullptr;
|
||||
cudaStream_t stream;
|
||||
|
||||
public:
|
||||
|
||||
/// StandardWell has C, D and B matrices that need to be copied
|
||||
@ -86,22 +96,22 @@ namespace Opm
|
||||
/// Destroy a WellContributions, and free memory
|
||||
~WellContributions();
|
||||
|
||||
/// Apply all wellcontributions in this object
|
||||
/// performs y -= (C^T * (D^-1 * (B*x))) for StandardWell
|
||||
/// Apply all Wells in this object
|
||||
/// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
|
||||
/// \param[in] x vector x
|
||||
/// \param[inout] y vector y
|
||||
void apply(double *x, double *y);
|
||||
|
||||
/// Allocate memory for the wellcontributions
|
||||
/// Allocate memory for the StandardWells
|
||||
void alloc();
|
||||
|
||||
/// Indicate how large the blocks of the wellcontributions (C and B) are
|
||||
/// Indicate how large the blocks of the StandardWell (C and B) are
|
||||
/// \param[in] dim number of columns
|
||||
/// \param[in] dim_wells number of rows
|
||||
void setBlockSize(unsigned int dim, unsigned int dim_wells);
|
||||
|
||||
/// Indicate how large the next wellcontribution is, this function cannot be called after alloc() is called
|
||||
/// \param[in] numBlocks number of blocks in C and B of next wellcontribution
|
||||
/// Indicate how large the next StandardWell is, this function cannot be called after alloc() is called
|
||||
/// \param[in] numBlocks number of blocks in C and B of next StandardWell
|
||||
void addNumBlocks(unsigned int numBlocks);
|
||||
|
||||
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
||||
@ -111,10 +121,31 @@ namespace Opm
|
||||
/// \param[in] val_size number of blocks in C or B, ignored for D
|
||||
void addMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size);
|
||||
|
||||
/// Add a MultisegmentWellContribution, actually creates an object on heap that is destroyed in the destructor
|
||||
/// Matrices C and B are passed in Blocked CSR, matrix D in CSC
|
||||
/// \param[in] dim size of blocks in vectors x and y, equal to MultisegmentWell::numEq
|
||||
/// \param[in] dim_wells size of blocks of C, B and D, equal to MultisegmentWell::numWellEq
|
||||
/// \param[in] Nb number of blocks in vectors x and y
|
||||
/// \param[in] Mb number of blockrows in C, B and D
|
||||
/// \param[in] BnumBlocks number of blocks in C and B
|
||||
/// \param[in] Bvalues nonzero values of matrix B
|
||||
/// \param[in] BcolIndices columnindices of blocks of matrix B
|
||||
/// \param[in] BrowPointers rowpointers of matrix B
|
||||
/// \param[in] DnumBlocks number of blocks in D
|
||||
/// \param[in] Dvalues nonzero values of matrix D
|
||||
/// \param[in] DcolPointers columnpointers of matrix D
|
||||
/// \param[in] DrowIndices rowindices of matrix D
|
||||
/// \param[in] Cvalues nonzero values of matrix C
|
||||
void addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
|
||||
unsigned int Nb, unsigned int Mb,
|
||||
unsigned int BnumBlocks, double *Bvalues, unsigned int *BcolIndices, unsigned int *BrowPointers,
|
||||
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
|
||||
double *Cvalues);
|
||||
|
||||
/// Return the number of wells added to this object
|
||||
/// \return the number of wells added to this object
|
||||
unsigned int getNumWells(){
|
||||
return num_wells;
|
||||
return num_std_wells + num_ms_wells;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#ifndef CUDA_HEADER_HEADER_INCLUDED
|
||||
#define CUDA_HEADER_HEADER_INCLUDED
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <iostream>
|
||||
|
||||
/// Runtime error checking of CUDA functions
|
||||
|
@ -74,9 +74,9 @@ private:
|
||||
void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);
|
||||
|
||||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
|
||||
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
|
||||
/// \param[in] dim size of block
|
||||
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
|
||||
/// \param[in] nnz number of nonzeroes, divide by dim*dim to get number of blocks
|
||||
/// \param[in] dim size of block
|
||||
void initialize(int N, int nnz, int dim);
|
||||
|
||||
/// Clean memory
|
||||
|
@ -907,22 +907,34 @@ namespace Opm {
|
||||
BlackoilWellModel<TypeTag>::
|
||||
getWellContributions(WellContributions& wellContribs) const
|
||||
{
|
||||
// prepare for StandardWells
|
||||
wellContribs.setBlockSize(StandardWell<TypeTag>::numEq, StandardWell<TypeTag>::numStaticWellEq);
|
||||
for(unsigned int i = 0; i < well_container_.size(); i++){
|
||||
auto& well = well_container_[i];
|
||||
std::shared_ptr<StandardWell<TypeTag> > derived = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
|
||||
unsigned int numBlocks;
|
||||
derived->getNumBlocks(numBlocks);
|
||||
wellContribs.addNumBlocks(numBlocks);
|
||||
if (derived) {
|
||||
unsigned int numBlocks;
|
||||
derived->getNumBlocks(numBlocks);
|
||||
wellContribs.addNumBlocks(numBlocks);
|
||||
}
|
||||
}
|
||||
|
||||
// allocate memory for data from StandardWells
|
||||
wellContribs.alloc();
|
||||
|
||||
for(unsigned int i = 0; i < well_container_.size(); i++){
|
||||
auto& well = well_container_[i];
|
||||
std::shared_ptr<StandardWell<TypeTag> > derived = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
|
||||
if (derived) {
|
||||
derived->addWellContribution(wellContribs);
|
||||
// maybe WellInterface could implement addWellContribution()
|
||||
auto derived_std = std::dynamic_pointer_cast<StandardWell<TypeTag> >(well);
|
||||
if (derived_std) {
|
||||
derived_std->addWellContribution(wellContribs);
|
||||
} else {
|
||||
OpmLog::warning("Warning only StandardWell is supported by WellContributions for GPU");
|
||||
auto derived_ms = std::dynamic_pointer_cast<MultisegmentWell<TypeTag> >(well);
|
||||
if (derived_ms) {
|
||||
derived_ms->addWellContribution(wellContribs);
|
||||
} else {
|
||||
OpmLog::warning("Warning unknown type of well");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -135,6 +135,11 @@ namespace Opm
|
||||
/// r = r - C D^-1 Rw
|
||||
virtual void apply(BVector& r) const override;
|
||||
|
||||
#if HAVE_CUDA
|
||||
/// add the contribution (C, D, B matrices) of this Well to the WellContributions object
|
||||
void addWellContribution(WellContributions& wellContribs) const;
|
||||
#endif
|
||||
|
||||
/// using the solution x to recover the solution xw for wells and applying
|
||||
/// xw to update Well State
|
||||
virtual void recoverWellSolutionAndUpdateWellState(const BVector& x,
|
||||
|
@ -639,6 +639,66 @@ namespace Opm
|
||||
|
||||
|
||||
|
||||
#if HAVE_CUDA
|
||||
template<typename TypeTag>
|
||||
void
|
||||
MultisegmentWell<TypeTag>::
|
||||
addWellContribution(WellContributions& wellContribs) const
|
||||
{
|
||||
unsigned int Nb = duneB_.M(); // number of blockrows in matrix A
|
||||
unsigned int Mb = duneB_.N(); // number of blockrows in duneB_, duneC_ and duneD_
|
||||
unsigned int BnumBlocks = duneB_.nonzeroes();
|
||||
unsigned int DnumBlocks = duneD_.nonzeroes();
|
||||
|
||||
// duneC
|
||||
std::vector<unsigned int> Ccols;
|
||||
std::vector<double> Cvals;
|
||||
Ccols.reserve(BnumBlocks);
|
||||
Cvals.reserve(BnumBlocks * numEq * numWellEq);
|
||||
for (auto rowC = duneC_.begin(); rowC != duneC_.end(); ++rowC) {
|
||||
for (auto colC = rowC->begin(), endC = rowC->end(); colC != endC; ++colC) {
|
||||
Ccols.emplace_back(colC.index());
|
||||
for (int i = 0; i < numWellEq; ++i) {
|
||||
for (int j = 0; j < numEq; ++j) {
|
||||
Cvals.emplace_back((*colC)[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// duneD
|
||||
Dune::UMFPack<DiagMatWell> umfpackMatrix(duneD_, 0);
|
||||
double *Dvals = umfpackMatrix.getInternalMatrix().getValues();
|
||||
int *Dcols = umfpackMatrix.getInternalMatrix().getColStart();
|
||||
int *Drows = umfpackMatrix.getInternalMatrix().getRowIndex();
|
||||
|
||||
// duneB
|
||||
std::vector<unsigned int> Bcols;
|
||||
std::vector<unsigned int> Brows;
|
||||
std::vector<double> Bvals;
|
||||
Bcols.reserve(BnumBlocks);
|
||||
Brows.reserve(Mb+1);
|
||||
Bvals.reserve(BnumBlocks * numEq * numWellEq);
|
||||
Brows.emplace_back(0);
|
||||
unsigned int sumBlocks = 0;
|
||||
for (auto rowB = duneB_.begin(); rowB != duneB_.end(); ++rowB) {
|
||||
int sizeRow = 0;
|
||||
for (auto colB = rowB->begin(), endB = rowB->end(); colB != endB; ++colB) {
|
||||
Bcols.emplace_back(colB.index());
|
||||
for (int i = 0; i < numWellEq; ++i) {
|
||||
for (int j = 0; j < numEq; ++j) {
|
||||
Bvals.emplace_back((*colB)[i][j]);
|
||||
}
|
||||
}
|
||||
sizeRow++;
|
||||
}
|
||||
sumBlocks += sizeRow;
|
||||
Brows.emplace_back(sumBlocks);
|
||||
}
|
||||
|
||||
wellContribs.addMultisegmentWellContribution(numEq, numWellEq, Nb, Mb, BnumBlocks, Bvals.data(), Bcols.data(), Brows.data(), DnumBlocks, Dvals, Dcols, Drows, Cvals.data());
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
template <typename TypeTag>
|
||||
|
Loading…
Reference in New Issue
Block a user