Merge pull request #2821 from ducbueno/add-mswells

Reintroduced multisegment wells to OpenCL backend
This commit is contained in:
Markus Blatt 2020-10-01 21:52:25 +02:00 committed by GitHub
commit e8c030be17
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 62 additions and 45 deletions

View File

@ -43,13 +43,13 @@ WellContributions::WellContributions(std::string gpu_mode){
WellContributions::~WellContributions()
{
#if HAVE_CUDA
// delete MultisegmentWellContributions
for (auto ms : multisegments) {
delete ms;
}
multisegments.clear();
#if HAVE_CUDA
if(cuda_gpu){
freeCudaMemory(); // should come before 'delete[] h_x'
}
@ -147,15 +147,4 @@ void WellContributions::addMultisegmentWellContribution(unsigned int dim_, unsig
++num_ms_wells;
}
void WellContributions::setReordering(int *toOrder_, bool reorder_)
{
this->toOrder = toOrder_;
this->reorder = reorder_;
for (auto& ms : multisegments) {
ms->setReordering(toOrder_, reorder_);
}
}
} //namespace Opm

View File

@ -72,25 +72,21 @@ public:
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
std::vector<MultisegmentWellContribution*> multisegments;
#if HAVE_OPENCL
std::vector<double> h_Cnnzs_ocl, h_Dnnzs_ocl, h_Bnnzs_ocl;
std::vector<int> h_Ccols_ocl, h_Bcols_ocl;
std::vector<unsigned int> h_val_pointers_ocl;
std::vector<double> h_x_ocl, h_y_ocl;
int *toOrder = nullptr;
bool reorder = false;
#endif
private:
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
unsigned int N; // number of rows (not blockrows) in vectors x and y
std::vector<MultisegmentWellContribution*> multisegments;
bool opencl_gpu = false;
bool cuda_gpu = false;
unsigned int N; // number of rows (not blockrows) in vectors x and y
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
#if HAVE_CUDA
bool allocated = false;
unsigned int num_blocks = 0; // total number of blocks in all wells
@ -127,10 +123,6 @@ private:
#endif
public:
//#if HAVE_OPENCL
// void applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y);
//#endif
#if HAVE_CUDA
/// Set a cudaStream to be used
/// \param[in] stream the cudaStream that is used to launch the kernel in
@ -194,12 +186,6 @@ public:
unsigned int DnumBlocks, double *Dvalues,
UMFPackIndex *DcolPointers, UMFPackIndex *DrowIndices,
std::vector<double> &Cvalues);
/// If the rows of the matrix are reordered, the columnindices of the matrixdata are incorrect
/// Those indices need to be mapped via toOrder
/// \param[in] toOrder array with mappings
/// \param[in] reorder whether the columnindices need to be reordered or not
void setReordering(int *toOrder, bool reorder);
};
} //namespace Opm

View File

@ -24,14 +24,14 @@
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
#include<iostream>
namespace bda
{
using Opm::OpmLog;
using Dune::Timer;
void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int Nb_){
void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int N_, int Nb_){
N = N_;
Nb = Nb_;
dim = wellContribs.dim;
dim_wells = wellContribs.dim_wells;
@ -48,9 +48,6 @@ namespace bda
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
}
else{
num_std_wells = 0;
}
}
void WellContributionsOCLContainer::reinit(Opm::WellContributions &wellContribs){
@ -65,10 +62,10 @@ namespace bda
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
}
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs){
if(num_std_wells > 0){
toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + Nb);
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs, int *toOrder_){
toOrder.insert(toOrder.end(), toOrder_, toOrder_ + Nb);
if(num_std_wells > 0){
cl::Event event;
std::vector<cl::Event> events(7);
queue->enqueueWriteBuffer(s.Cnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &events[0]);
@ -80,6 +77,13 @@ namespace bda
queue->enqueueWriteBuffer(s.toOrder, CL_FALSE, 0, sizeof(int) * toOrder.size(), toOrder.data(), nullptr, &events[6]);
event.waitForEvents(events);
}
if(!wellContribs.multisegments.empty()){
multisegments = std::move(wellContribs.multisegments);
num_ms_wells = multisegments.size();
x_msw.reserve(N);
y_msw.reserve(N);
}
}
void WellContributionsOCLContainer::update_on_gpu(Opm::WellContributions &wellContribs){
@ -98,6 +102,10 @@ namespace bda
queue->enqueueWriteBuffer(s.val_pointers, CL_FALSE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &events[5]);
event.waitForEvents(events);
}
if(!wellContribs.multisegments.empty()){
multisegments = std::move(wellContribs.multisegments);
}
}
void WellContributionsOCLContainer::setOpenCLContext(cl::Context *context_){
@ -127,13 +135,42 @@ namespace bda
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
}
void WellContributionsOCLContainer::applyMSWells(cl::Buffer& x, cl::Buffer& y) {
cl::Event event;
std::vector<cl::Event> events(2);
// copy vectors x and y from GPU to CPU
queue->enqueueReadBuffer(x, CL_FALSE, 0, sizeof(double) * N, x_msw.data(), nullptr, &events[0]);
queue->enqueueReadBuffer(y, CL_FALSE, 0, sizeof(double) * N, y_msw.data(), nullptr, &events[1]);
event.waitForEvents(events);
// actually apply MultisegmentWells
for(Opm::MultisegmentWellContribution *well: multisegments){
well->setReordering(toOrder.data(), true);
well->apply(x_msw.data(), y_msw.data());
}
// copy vector y from CPU to GPU
queue->enqueueWriteBuffer(y, CL_FALSE, 0, sizeof(double) * N, y_msw.data(), nullptr, &event);
event.wait();
}
void WellContributionsOCLContainer::apply(cl::Buffer& x, cl::Buffer& y){
if(num_std_wells > 0){
applyStdWells(x, y);
}
if(num_ms_wells > 0){
applyMSWells(x, y);
}
}
WellContributionsOCLContainer::~WellContributionsOCLContainer(){
toOrder.clear();
if(num_ms_wells > 0){
for (auto ms : multisegments) {
delete ms;
}
}
}
} // end namespace bda

View File

@ -22,18 +22,22 @@
#include <opm/simulators/linalg/bda/opencl.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
namespace bda
{
class WellContributionsOCLContainer
{
private:
int N, Nb;
unsigned int dim, dim_wells;
unsigned int num_blocks = 0;
unsigned int num_std_wells = 0;
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
int Nb;
std::vector<int> toOrder;
std::vector<double> x_msw, y_msw;
std::vector<Opm::MultisegmentWellContribution*> multisegments;
typedef struct {
cl::Buffer Cnnzs, Dnnzs, Bnnzs;
@ -51,14 +55,16 @@ namespace bda
void reinit(Opm::WellContributions &wellContribs);
void applyStdWells(cl::Buffer& x, cl::Buffer& y);
void applyMSWells(cl::Buffer& x, cl::Buffer& y);
public:
WellContributionsOCLContainer() {};
~WellContributionsOCLContainer();
WellContributionsOCLContainer(const WellContributionsOCLContainer&) = delete;
void apply(cl::Buffer& x, cl::Buffer& y);
void init(Opm::WellContributions &wellContribs, int Nb);
void copy_to_gpu(Opm::WellContributions &wellContribs);
void init(Opm::WellContributions &wellContribs, int N, int Nb);
void copy_to_gpu(Opm::WellContributions &wellContribs, int *toOrder_);
void update_on_gpu(Opm::WellContributions &wellContribs);
void setOpenCLContext(cl::Context *context);
void setOpenCLQueue(cl::CommandQueue *queue);

View File

@ -496,7 +496,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
wcontainer->init(wellContribs, Nb);
wcontainer->init(wellContribs, N, Nb);
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
@ -566,8 +566,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu(WellContributions &well
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
event.wait();
wellContribs.setReordering(toOrder, true);
wcontainer->copy_to_gpu(wellContribs);
wcontainer->copy_to_gpu(wellContribs, toOrder);
if (verbosity > 2) {
std::ostringstream out;