Implemented reinit; reduced sync events

This commit is contained in:
Jose Eduardo Bueno 2020-09-28 17:27:12 -03:00
parent bedf3d16cf
commit 268bcd95e8
3 changed files with 36 additions and 50 deletions

View File

@ -56,30 +56,6 @@ WellContributions::~WellContributions()
#endif
}
/*
#if HAVE_OPENCL
void WellContributions::applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y) {
// apply MultisegmentWells
if (num_ms_wells > 0) {
h_x_ocl.reserve(N);
h_y_ocl.reserve(N);
// copy vectors x and y from GPU to CPU
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, h_x_ocl.data());
queue->enqueueReadBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y_ocl.data());
// actually apply MultisegmentWells
for (MultisegmentWellContribution *well : multisegments) {
well->apply(h_x_ocl.data(), h_y_ocl.data());
}
// copy vector y from CPU to GPU
queue->enqueueWriteBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y_ocl.data());
}
}
#endif
*/
void WellContributions::addMatrix([[maybe_unused]] MatrixType type, [[maybe_unused]] int *colIndices, [[maybe_unused]] double *values, [[maybe_unused]] unsigned int val_size)
{

View File

@ -37,6 +37,7 @@ namespace bda
dim_wells = wellContribs.dim_wells;
if(!wellContribs.h_val_pointers_ocl.empty()){
num_blocks = wellContribs.h_Ccols_ocl.size();
num_std_wells = wellContribs.h_val_pointers_ocl.size() - 1;
s.Cnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Cnnzs_ocl.size());
@ -52,43 +53,50 @@ namespace bda
}
}
void WellContributionsOCLContainer::reinit(Opm::WellContributions &wellContribs){
num_blocks = wellContribs.h_Ccols_ocl.size();
num_std_wells = wellContribs.h_val_pointers_ocl.size() - 1;
s.Cnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Cnnzs_ocl.size());
s.Dnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Dnnzs_ocl.size());
s.Bnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Bnnzs_ocl.size());
s.Ccols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * wellContribs.h_Ccols_ocl.size());
s.Bcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * wellContribs.h_Bcols_ocl.size());
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
}
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs){
if(num_std_wells > 0){
toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + Nb);
cl::Event event;
queue->enqueueWriteBuffer(s.Cnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Dnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Bnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Bnnzs_ocl.size(), wellContribs.h_Bnnzs_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Ccols, CL_TRUE, 0, sizeof(int) * wellContribs.h_Ccols_ocl.size(), wellContribs.h_Ccols_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Bcols, CL_TRUE, 0, sizeof(int) * wellContribs.h_Bcols_ocl.size(), wellContribs.h_Bcols_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.val_pointers, CL_TRUE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.toOrder, CL_TRUE, 0, sizeof(int) * toOrder.size(), toOrder.data(), nullptr, &event);
event.wait();
std::vector<cl::Event> events(7);
queue->enqueueWriteBuffer(s.Cnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &events[0]);
queue->enqueueWriteBuffer(s.Dnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data(), nullptr, &events[1]);
queue->enqueueWriteBuffer(s.Bnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Bnnzs_ocl.size(), wellContribs.h_Bnnzs_ocl.data(), nullptr, &events[2]);
queue->enqueueWriteBuffer(s.Ccols, CL_FALSE, 0, sizeof(int) * wellContribs.h_Ccols_ocl.size(), wellContribs.h_Ccols_ocl.data(), nullptr, &events[3]);
queue->enqueueWriteBuffer(s.Bcols, CL_FALSE, 0, sizeof(int) * wellContribs.h_Bcols_ocl.size(), wellContribs.h_Bcols_ocl.data(), nullptr, &events[4]);
queue->enqueueWriteBuffer(s.val_pointers, CL_FALSE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &events[5]);
queue->enqueueWriteBuffer(s.toOrder, CL_FALSE, 0, sizeof(int) * toOrder.size(), toOrder.data(), nullptr, &events[6]);
event.waitForEvents(events);
}
}
void WellContributionsOCLContainer::update_on_gpu(Opm::WellContributions &wellContribs){
if(num_std_wells > 0){
if(num_std_wells != wellContribs.h_val_pointers_ocl.size() || num_blocks != wellContribs.h_Ccols_ocl.size()){
reinit(wellContribs);
}
cl::Event event;
queue->enqueueWriteBuffer(s.Cnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Dnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Bnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Bnnzs_ocl.size(), wellContribs.h_Bnnzs_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Ccols, CL_TRUE, 0, sizeof(int) * wellContribs.h_Ccols_ocl.size(), wellContribs.h_Ccols_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.Bcols, CL_TRUE, 0, sizeof(int) * wellContribs.h_Bcols_ocl.size(), wellContribs.h_Bcols_ocl.data(), nullptr, &event);
event.wait();
queue->enqueueWriteBuffer(s.val_pointers, CL_TRUE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &event);
event.wait();
std::vector<cl::Event> events(6);
queue->enqueueWriteBuffer(s.Cnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data(), nullptr, &events[0]);
queue->enqueueWriteBuffer(s.Dnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data(), nullptr, &events[1]);
queue->enqueueWriteBuffer(s.Bnnzs, CL_FALSE, 0, sizeof(double) * wellContribs.h_Bnnzs_ocl.size(), wellContribs.h_Bnnzs_ocl.data(), nullptr, &events[2]);
queue->enqueueWriteBuffer(s.Ccols, CL_FALSE, 0, sizeof(int) * wellContribs.h_Ccols_ocl.size(), wellContribs.h_Ccols_ocl.data(), nullptr, &events[3]);
queue->enqueueWriteBuffer(s.Bcols, CL_FALSE, 0, sizeof(int) * wellContribs.h_Bcols_ocl.size(), wellContribs.h_Bcols_ocl.data(), nullptr, &events[4]);
queue->enqueueWriteBuffer(s.val_pointers, CL_FALSE, 0, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size(), wellContribs.h_val_pointers_ocl.data(), nullptr, &events[5]);
event.waitForEvents(events);
}
}

View File

@ -29,6 +29,7 @@ namespace bda
{
private:
unsigned int dim, dim_wells;
unsigned int num_blocks = 0;
unsigned int num_std_wells = 0;
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
int Nb;
@ -48,6 +49,7 @@ namespace bda
const unsigned int, const unsigned int, cl::Buffer&,
cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> *stdwell_apply;
void reinit(Opm::WellContributions &wellContribs);
void applyStdWells(cl::Buffer& x, cl::Buffer& y);
public: