mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #2821 from ducbueno/add-mswells
Reintroduced multisegment wells to OpenCL backend
This commit is contained in:
commit
e8c030be17
@ -43,13 +43,13 @@ WellContributions::WellContributions(std::string gpu_mode){
|
|||||||
|
|
||||||
WellContributions::~WellContributions()
|
WellContributions::~WellContributions()
|
||||||
{
|
{
|
||||||
|
#if HAVE_CUDA
|
||||||
// delete MultisegmentWellContributions
|
// delete MultisegmentWellContributions
|
||||||
for (auto ms : multisegments) {
|
for (auto ms : multisegments) {
|
||||||
delete ms;
|
delete ms;
|
||||||
}
|
}
|
||||||
multisegments.clear();
|
multisegments.clear();
|
||||||
|
|
||||||
#if HAVE_CUDA
|
|
||||||
if(cuda_gpu){
|
if(cuda_gpu){
|
||||||
freeCudaMemory(); // should come before 'delete[] h_x'
|
freeCudaMemory(); // should come before 'delete[] h_x'
|
||||||
}
|
}
|
||||||
@ -147,15 +147,4 @@ void WellContributions::addMultisegmentWellContribution(unsigned int dim_, unsig
|
|||||||
++num_ms_wells;
|
++num_ms_wells;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void WellContributions::setReordering(int *toOrder_, bool reorder_)
|
|
||||||
{
|
|
||||||
this->toOrder = toOrder_;
|
|
||||||
this->reorder = reorder_;
|
|
||||||
for (auto& ms : multisegments) {
|
|
||||||
ms->setReordering(toOrder_, reorder_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} //namespace Opm
|
} //namespace Opm
|
||||||
|
|
||||||
|
@ -72,25 +72,21 @@ public:
|
|||||||
|
|
||||||
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
|
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
|
||||||
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
|
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
|
||||||
|
std::vector<MultisegmentWellContribution*> multisegments;
|
||||||
|
|
||||||
#if HAVE_OPENCL
|
#if HAVE_OPENCL
|
||||||
std::vector<double> h_Cnnzs_ocl, h_Dnnzs_ocl, h_Bnnzs_ocl;
|
std::vector<double> h_Cnnzs_ocl, h_Dnnzs_ocl, h_Bnnzs_ocl;
|
||||||
std::vector<int> h_Ccols_ocl, h_Bcols_ocl;
|
std::vector<int> h_Ccols_ocl, h_Bcols_ocl;
|
||||||
std::vector<unsigned int> h_val_pointers_ocl;
|
std::vector<unsigned int> h_val_pointers_ocl;
|
||||||
std::vector<double> h_x_ocl, h_y_ocl;
|
|
||||||
|
|
||||||
int *toOrder = nullptr;
|
|
||||||
bool reorder = false;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
|
||||||
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
|
||||||
std::vector<MultisegmentWellContribution*> multisegments;
|
|
||||||
|
|
||||||
bool opencl_gpu = false;
|
bool opencl_gpu = false;
|
||||||
bool cuda_gpu = false;
|
bool cuda_gpu = false;
|
||||||
|
|
||||||
|
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
||||||
|
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
||||||
|
|
||||||
#if HAVE_CUDA
|
#if HAVE_CUDA
|
||||||
bool allocated = false;
|
bool allocated = false;
|
||||||
unsigned int num_blocks = 0; // total number of blocks in all wells
|
unsigned int num_blocks = 0; // total number of blocks in all wells
|
||||||
@ -127,10 +123,6 @@ private:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
//#if HAVE_OPENCL
|
|
||||||
// void applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y);
|
|
||||||
//#endif
|
|
||||||
|
|
||||||
#if HAVE_CUDA
|
#if HAVE_CUDA
|
||||||
/// Set a cudaStream to be used
|
/// Set a cudaStream to be used
|
||||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||||
@ -194,12 +186,6 @@ public:
|
|||||||
unsigned int DnumBlocks, double *Dvalues,
|
unsigned int DnumBlocks, double *Dvalues,
|
||||||
UMFPackIndex *DcolPointers, UMFPackIndex *DrowIndices,
|
UMFPackIndex *DcolPointers, UMFPackIndex *DrowIndices,
|
||||||
std::vector<double> &Cvalues);
|
std::vector<double> &Cvalues);
|
||||||
|
|
||||||
/// If the rows of the matrix are reordered, the columnindices of the matrixdata are incorrect
|
|
||||||
/// Those indices need to be mapped via toOrder
|
|
||||||
/// \param[in] toOrder array with mappings
|
|
||||||
/// \param[in] reorder whether the columnindices need to be reordered or not
|
|
||||||
void setReordering(int *toOrder, bool reorder);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} //namespace Opm
|
} //namespace Opm
|
||||||
|
@ -24,14 +24,14 @@
|
|||||||
#include <dune/common/timer.hh>
|
#include <dune/common/timer.hh>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
|
#include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
|
||||||
#include<iostream>
|
|
||||||
|
|
||||||
namespace bda
|
namespace bda
|
||||||
{
|
{
|
||||||
using Opm::OpmLog;
|
using Opm::OpmLog;
|
||||||
using Dune::Timer;
|
using Dune::Timer;
|
||||||
|
|
||||||
void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int Nb_){
|
void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int N_, int Nb_){
|
||||||
|
N = N_;
|
||||||
Nb = Nb_;
|
Nb = Nb_;
|
||||||
dim = wellContribs.dim;
|
dim = wellContribs.dim;
|
||||||
dim_wells = wellContribs.dim_wells;
|
dim_wells = wellContribs.dim_wells;
|
||||||
@ -48,9 +48,6 @@ namespace bda
|
|||||||
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
|
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
|
||||||
s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
|
s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
|
||||||
}
|
}
|
||||||
else{
|
|
||||||
num_std_wells = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributionsOCLContainer::reinit(Opm::WellContributions &wellContribs){
|
void WellContributionsOCLContainer::reinit(Opm::WellContributions &wellContribs){
|
||||||
@ -65,10 +62,10 @@ namespace bda
|
|||||||
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
|
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs){
|
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs, int *toOrder_){
|
||||||
if(num_std_wells > 0){
|
toOrder.insert(toOrder.end(), toOrder_, toOrder_ + Nb);
|
||||||
toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + Nb);
|
|
||||||
|
|
||||||
|
if(num_std_wells > 0){
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
std::vector<cl::Event> events(7);
|
std::vector<cl::Event> events(7);
|
||||||
queue->enqueueWriteBuffer(s.Cnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &events[0]);
|
queue->enqueueWriteBuffer(s.Cnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &events[0]);
|
||||||
@ -80,6 +77,13 @@ namespace bda
|
|||||||
queue->enqueueWriteBuffer(s.toOrder, CL_FALSE, 0, sizeof(int) * toOrder.size(), toOrder.data(), nullptr, &events[6]);
|
queue->enqueueWriteBuffer(s.toOrder, CL_FALSE, 0, sizeof(int) * toOrder.size(), toOrder.data(), nullptr, &events[6]);
|
||||||
event.waitForEvents(events);
|
event.waitForEvents(events);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!wellContribs.multisegments.empty()){
|
||||||
|
multisegments = std::move(wellContribs.multisegments);
|
||||||
|
num_ms_wells = multisegments.size();
|
||||||
|
x_msw.reserve(N);
|
||||||
|
y_msw.reserve(N);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributionsOCLContainer::update_on_gpu(Opm::WellContributions &wellContribs){
|
void WellContributionsOCLContainer::update_on_gpu(Opm::WellContributions &wellContribs){
|
||||||
@ -98,6 +102,10 @@ namespace bda
|
|||||||
queue->enqueueWriteBuffer(s.val_pointers, CL_FALSE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &events[5]);
|
queue->enqueueWriteBuffer(s.val_pointers, CL_FALSE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &events[5]);
|
||||||
event.waitForEvents(events);
|
event.waitForEvents(events);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!wellContribs.multisegments.empty()){
|
||||||
|
multisegments = std::move(wellContribs.multisegments);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributionsOCLContainer::setOpenCLContext(cl::Context *context_){
|
void WellContributionsOCLContainer::setOpenCLContext(cl::Context *context_){
|
||||||
@ -127,13 +135,42 @@ namespace bda
|
|||||||
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void WellContributionsOCLContainer::applyMSWells(cl::Buffer& x, cl::Buffer& y) {
|
||||||
|
cl::Event event;
|
||||||
|
std::vector<cl::Event> events(2);
|
||||||
|
|
||||||
|
// copy vectors x and y from GPU to CPU
|
||||||
|
queue->enqueueReadBuffer(x, CL_FALSE, 0, sizeof(double) * N, x_msw.data(), nullptr, &events[0]);
|
||||||
|
queue->enqueueReadBuffer(y, CL_FALSE, 0, sizeof(double) * N, y_msw.data(), nullptr, &events[1]);
|
||||||
|
event.waitForEvents(events);
|
||||||
|
|
||||||
|
// actually apply MultisegmentWells
|
||||||
|
for(Opm::MultisegmentWellContribution *well: multisegments){
|
||||||
|
well->setReordering(toOrder.data(), true);
|
||||||
|
well->apply(x_msw.data(), y_msw.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
// copy vector y from CPU to GPU
|
||||||
|
queue->enqueueWriteBuffer(y, CL_FALSE, 0, sizeof(double) * N, y_msw.data(), nullptr, &event);
|
||||||
|
event.wait();
|
||||||
|
}
|
||||||
|
|
||||||
void WellContributionsOCLContainer::apply(cl::Buffer& x, cl::Buffer& y){
|
void WellContributionsOCLContainer::apply(cl::Buffer& x, cl::Buffer& y){
|
||||||
if(num_std_wells > 0){
|
if(num_std_wells > 0){
|
||||||
applyStdWells(x, y);
|
applyStdWells(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(num_ms_wells > 0){
|
||||||
|
applyMSWells(x, y);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
WellContributionsOCLContainer::~WellContributionsOCLContainer(){
|
WellContributionsOCLContainer::~WellContributionsOCLContainer(){
|
||||||
toOrder.clear();
|
if(num_ms_wells > 0){
|
||||||
|
for (auto ms : multisegments) {
|
||||||
|
delete ms;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} // end namespace bda
|
} // end namespace bda
|
||||||
|
@ -22,18 +22,22 @@
|
|||||||
|
|
||||||
#include <opm/simulators/linalg/bda/opencl.hpp>
|
#include <opm/simulators/linalg/bda/opencl.hpp>
|
||||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||||
|
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
|
||||||
|
|
||||||
namespace bda
|
namespace bda
|
||||||
{
|
{
|
||||||
class WellContributionsOCLContainer
|
class WellContributionsOCLContainer
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
int N, Nb;
|
||||||
unsigned int dim, dim_wells;
|
unsigned int dim, dim_wells;
|
||||||
unsigned int num_blocks = 0;
|
unsigned int num_blocks = 0;
|
||||||
unsigned int num_std_wells = 0;
|
unsigned int num_std_wells = 0;
|
||||||
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
|
||||||
int Nb;
|
|
||||||
std::vector<int> toOrder;
|
std::vector<int> toOrder;
|
||||||
|
std::vector<double> x_msw, y_msw;
|
||||||
|
std::vector<Opm::MultisegmentWellContribution*> multisegments;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
cl::Buffer Cnnzs, Dnnzs, Bnnzs;
|
cl::Buffer Cnnzs, Dnnzs, Bnnzs;
|
||||||
@ -51,14 +55,16 @@ namespace bda
|
|||||||
|
|
||||||
void reinit(Opm::WellContributions &wellContribs);
|
void reinit(Opm::WellContributions &wellContribs);
|
||||||
void applyStdWells(cl::Buffer& x, cl::Buffer& y);
|
void applyStdWells(cl::Buffer& x, cl::Buffer& y);
|
||||||
|
void applyMSWells(cl::Buffer& x, cl::Buffer& y);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
WellContributionsOCLContainer() {};
|
WellContributionsOCLContainer() {};
|
||||||
~WellContributionsOCLContainer();
|
~WellContributionsOCLContainer();
|
||||||
|
WellContributionsOCLContainer(const WellContributionsOCLContainer&) = delete;
|
||||||
|
|
||||||
void apply(cl::Buffer& x, cl::Buffer& y);
|
void apply(cl::Buffer& x, cl::Buffer& y);
|
||||||
void init(Opm::WellContributions &wellContribs, int Nb);
|
void init(Opm::WellContributions &wellContribs, int N, int Nb);
|
||||||
void copy_to_gpu(Opm::WellContributions &wellContribs);
|
void copy_to_gpu(Opm::WellContributions &wellContribs, int *toOrder_);
|
||||||
void update_on_gpu(Opm::WellContributions &wellContribs);
|
void update_on_gpu(Opm::WellContributions &wellContribs);
|
||||||
void setOpenCLContext(cl::Context *context);
|
void setOpenCLContext(cl::Context *context);
|
||||||
void setOpenCLQueue(cl::CommandQueue *queue);
|
void setOpenCLQueue(cl::CommandQueue *queue);
|
||||||
|
@ -496,7 +496,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
||||||
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
||||||
|
|
||||||
wcontainer->init(wellContribs, Nb);
|
wcontainer->init(wellContribs, N, Nb);
|
||||||
|
|
||||||
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
|
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
|
||||||
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
|
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
|
||||||
@ -566,8 +566,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu(WellContributions &well
|
|||||||
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||||
event.wait();
|
event.wait();
|
||||||
|
|
||||||
wellContribs.setReordering(toOrder, true);
|
wcontainer->copy_to_gpu(wellContribs, toOrder);
|
||||||
wcontainer->copy_to_gpu(wellContribs);
|
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
|
Loading…
Reference in New Issue
Block a user