mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-11-23 01:36:25 -06:00
Merge pull request #2821 from ducbueno/add-mswells
Reintroduced multisegment wells to OpenCL backend
This commit is contained in:
commit
e8c030be17
@ -43,13 +43,13 @@ WellContributions::WellContributions(std::string gpu_mode){
|
||||
|
||||
WellContributions::~WellContributions()
|
||||
{
|
||||
#if HAVE_CUDA
|
||||
// delete MultisegmentWellContributions
|
||||
for (auto ms : multisegments) {
|
||||
delete ms;
|
||||
}
|
||||
multisegments.clear();
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(cuda_gpu){
|
||||
freeCudaMemory(); // should come before 'delete[] h_x'
|
||||
}
|
||||
@ -147,15 +147,4 @@ void WellContributions::addMultisegmentWellContribution(unsigned int dim_, unsig
|
||||
++num_ms_wells;
|
||||
}
|
||||
|
||||
|
||||
void WellContributions::setReordering(int *toOrder_, bool reorder_)
|
||||
{
|
||||
this->toOrder = toOrder_;
|
||||
this->reorder = reorder_;
|
||||
for (auto& ms : multisegments) {
|
||||
ms->setReordering(toOrder_, reorder_);
|
||||
}
|
||||
}
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
|
@ -72,25 +72,21 @@ public:
|
||||
|
||||
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
|
||||
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
|
||||
std::vector<MultisegmentWellContribution*> multisegments;
|
||||
|
||||
#if HAVE_OPENCL
|
||||
std::vector<double> h_Cnnzs_ocl, h_Dnnzs_ocl, h_Bnnzs_ocl;
|
||||
std::vector<int> h_Ccols_ocl, h_Bcols_ocl;
|
||||
std::vector<unsigned int> h_val_pointers_ocl;
|
||||
std::vector<double> h_x_ocl, h_y_ocl;
|
||||
|
||||
int *toOrder = nullptr;
|
||||
bool reorder = false;
|
||||
#endif
|
||||
|
||||
private:
|
||||
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
||||
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
||||
std::vector<MultisegmentWellContribution*> multisegments;
|
||||
|
||||
bool opencl_gpu = false;
|
||||
bool cuda_gpu = false;
|
||||
|
||||
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
||||
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
||||
|
||||
#if HAVE_CUDA
|
||||
bool allocated = false;
|
||||
unsigned int num_blocks = 0; // total number of blocks in all wells
|
||||
@ -127,10 +123,6 @@ private:
|
||||
#endif
|
||||
|
||||
public:
|
||||
//#if HAVE_OPENCL
|
||||
// void applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y);
|
||||
//#endif
|
||||
|
||||
#if HAVE_CUDA
|
||||
/// Set a cudaStream to be used
|
||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||
@ -194,12 +186,6 @@ public:
|
||||
unsigned int DnumBlocks, double *Dvalues,
|
||||
UMFPackIndex *DcolPointers, UMFPackIndex *DrowIndices,
|
||||
std::vector<double> &Cvalues);
|
||||
|
||||
/// If the rows of the matrix are reordered, the columnindices of the matrixdata are incorrect
|
||||
/// Those indices need to be mapped via toOrder
|
||||
/// \param[in] toOrder array with mappings
|
||||
/// \param[in] reorder whether the columnindices need to be reordered or not
|
||||
void setReordering(int *toOrder, bool reorder);
|
||||
};
|
||||
|
||||
} //namespace Opm
|
||||
|
@ -24,14 +24,14 @@
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
|
||||
#include<iostream>
|
||||
|
||||
namespace bda
|
||||
{
|
||||
using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int Nb_){
|
||||
void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int N_, int Nb_){
|
||||
N = N_;
|
||||
Nb = Nb_;
|
||||
dim = wellContribs.dim;
|
||||
dim_wells = wellContribs.dim_wells;
|
||||
@ -48,9 +48,6 @@ namespace bda
|
||||
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
|
||||
s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
|
||||
}
|
||||
else{
|
||||
num_std_wells = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCLContainer::reinit(Opm::WellContributions &wellContribs){
|
||||
@ -65,10 +62,10 @@ namespace bda
|
||||
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
|
||||
}
|
||||
|
||||
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs){
|
||||
if(num_std_wells > 0){
|
||||
toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + Nb);
|
||||
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs, int *toOrder_){
|
||||
toOrder.insert(toOrder.end(), toOrder_, toOrder_ + Nb);
|
||||
|
||||
if(num_std_wells > 0){
|
||||
cl::Event event;
|
||||
std::vector<cl::Event> events(7);
|
||||
queue->enqueueWriteBuffer(s.Cnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &events[0]);
|
||||
@ -80,6 +77,13 @@ namespace bda
|
||||
queue->enqueueWriteBuffer(s.toOrder, CL_FALSE, 0, sizeof(int) * toOrder.size(), toOrder.data(), nullptr, &events[6]);
|
||||
event.waitForEvents(events);
|
||||
}
|
||||
|
||||
if(!wellContribs.multisegments.empty()){
|
||||
multisegments = std::move(wellContribs.multisegments);
|
||||
num_ms_wells = multisegments.size();
|
||||
x_msw.reserve(N);
|
||||
y_msw.reserve(N);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCLContainer::update_on_gpu(Opm::WellContributions &wellContribs){
|
||||
@ -98,6 +102,10 @@ namespace bda
|
||||
queue->enqueueWriteBuffer(s.val_pointers, CL_FALSE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &events[5]);
|
||||
event.waitForEvents(events);
|
||||
}
|
||||
|
||||
if(!wellContribs.multisegments.empty()){
|
||||
multisegments = std::move(wellContribs.multisegments);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCLContainer::setOpenCLContext(cl::Context *context_){
|
||||
@ -127,13 +135,42 @@ namespace bda
|
||||
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||
}
|
||||
|
||||
|
||||
void WellContributionsOCLContainer::applyMSWells(cl::Buffer& x, cl::Buffer& y) {
|
||||
cl::Event event;
|
||||
std::vector<cl::Event> events(2);
|
||||
|
||||
// copy vectors x and y from GPU to CPU
|
||||
queue->enqueueReadBuffer(x, CL_FALSE, 0, sizeof(double) * N, x_msw.data(), nullptr, &events[0]);
|
||||
queue->enqueueReadBuffer(y, CL_FALSE, 0, sizeof(double) * N, y_msw.data(), nullptr, &events[1]);
|
||||
event.waitForEvents(events);
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for(Opm::MultisegmentWellContribution *well: multisegments){
|
||||
well->setReordering(toOrder.data(), true);
|
||||
well->apply(x_msw.data(), y_msw.data());
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
queue->enqueueWriteBuffer(y, CL_FALSE, 0, sizeof(double) * N, y_msw.data(), nullptr, &event);
|
||||
event.wait();
|
||||
}
|
||||
|
||||
void WellContributionsOCLContainer::apply(cl::Buffer& x, cl::Buffer& y){
|
||||
if(num_std_wells > 0){
|
||||
applyStdWells(x, y);
|
||||
}
|
||||
|
||||
if(num_ms_wells > 0){
|
||||
applyMSWells(x, y);
|
||||
}
|
||||
}
|
||||
|
||||
WellContributionsOCLContainer::~WellContributionsOCLContainer(){
|
||||
toOrder.clear();
|
||||
if(num_ms_wells > 0){
|
||||
for (auto ms : multisegments) {
|
||||
delete ms;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // end namespace bda
|
||||
|
@ -22,18 +22,22 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/opencl.hpp>
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
|
||||
|
||||
namespace bda
|
||||
{
|
||||
class WellContributionsOCLContainer
|
||||
{
|
||||
private:
|
||||
int N, Nb;
|
||||
unsigned int dim, dim_wells;
|
||||
unsigned int num_blocks = 0;
|
||||
unsigned int num_std_wells = 0;
|
||||
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
||||
int Nb;
|
||||
|
||||
std::vector<int> toOrder;
|
||||
std::vector<double> x_msw, y_msw;
|
||||
std::vector<Opm::MultisegmentWellContribution*> multisegments;
|
||||
|
||||
typedef struct {
|
||||
cl::Buffer Cnnzs, Dnnzs, Bnnzs;
|
||||
@ -51,14 +55,16 @@ namespace bda
|
||||
|
||||
void reinit(Opm::WellContributions &wellContribs);
|
||||
void applyStdWells(cl::Buffer& x, cl::Buffer& y);
|
||||
void applyMSWells(cl::Buffer& x, cl::Buffer& y);
|
||||
|
||||
public:
|
||||
WellContributionsOCLContainer() {};
|
||||
~WellContributionsOCLContainer();
|
||||
WellContributionsOCLContainer(const WellContributionsOCLContainer&) = delete;
|
||||
|
||||
void apply(cl::Buffer& x, cl::Buffer& y);
|
||||
void init(Opm::WellContributions &wellContribs, int Nb);
|
||||
void copy_to_gpu(Opm::WellContributions &wellContribs);
|
||||
void init(Opm::WellContributions &wellContribs, int N, int Nb);
|
||||
void copy_to_gpu(Opm::WellContributions &wellContribs, int *toOrder_);
|
||||
void update_on_gpu(Opm::WellContributions &wellContribs);
|
||||
void setOpenCLContext(cl::Context *context);
|
||||
void setOpenCLQueue(cl::CommandQueue *queue);
|
||||
|
@ -496,7 +496,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
||||
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
||||
|
||||
wcontainer->init(wellContribs, Nb);
|
||||
wcontainer->init(wellContribs, N, Nb);
|
||||
|
||||
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
|
||||
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
|
||||
@ -566,8 +566,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu(WellContributions &well
|
||||
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
|
||||
wellContribs.setReordering(toOrder, true);
|
||||
wcontainer->copy_to_gpu(wellContribs);
|
||||
wcontainer->copy_to_gpu(wellContribs, toOrder);
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
|
Loading…
Reference in New Issue
Block a user