Fixed errors

This commit is contained in:
Jose Eduardo Bueno 2020-09-24 18:07:10 -03:00
parent 1ba81cece2
commit ab57d5ecd2
6 changed files with 30 additions and 29 deletions

View File

@ -63,22 +63,23 @@ public:
B
};
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
#if HAVE_OPENCL
std::vector<double> h_Cnnzs_ocl, h_Dnnzs_ocl, h_Bnnzs_ocl;
std::vector<int> h_Ccols_ocl, h_Bcols_ocl;
std::vector<unsigned int> h_val_pointers_ocl;
std::vector<double> h_x_ocl, h_y_ocl;
#endif
private:
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
unsigned int N; // number of rows (not blockrows) in vectors x and y
std::vector<MultisegmentWellContribution*> multisegments;
int *toOrder = nullptr;
bool reorder = false;
#endif
private:
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
unsigned int N; // number of rows (not blockrows) in vectors x and y
std::vector<MultisegmentWellContribution*> multisegments;
bool opencl_gpu = false;
bool cuda_gpu = false;

View File

@ -24,19 +24,18 @@
#include <dune/common/timer.hh>
#include <opm/simulators/linalg/bda/WellContributionsOCLContainer.hpp>
#include<iostream>
namespace bda
{
using Opm::OpmLog;
using Dune::Timer;
void WellContributionsOCLContainer::initBuffers(WellContributions &wellContribs)
{
void WellContributionsOCLContainer::init(Opm::WellContributions &wellContribs, int Nb_){
Nb = Nb_;
dim = wellContribs.dim;
dim_wells = wellContribs.dim_wells;
num_std_wells = wellContribs.h_val_pointers_ocl.size() - 1;
toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + wellContribs.h_Ccols_ocl.size());
s.Cnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Cnnzs_ocl.size());
s.Dnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * wellContribs.h_Dnnzs_ocl.size());
@ -44,10 +43,12 @@ namespace bda
s.Ccols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * wellContribs.h_Ccols_ocl.size());
s.Bcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * wellContribs.h_Bcols_ocl.size());
s.val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * wellContribs.h_val_pointers_ocl.size());
s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * toOrder.size());
s.toOrder = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * Nb);
}
void WellcontributionsOCLContainer::copy_to_gpu(WellContributions &wellContribs){
void WellContributionsOCLContainer::copy_to_gpu(Opm::WellContributions &wellContribs){
toOrder.insert(toOrder.end(), wellContribs.toOrder, wellContribs.toOrder + Nb);
cl::Event event;
queue->enqueueWriteBuffer(s.Cnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data());
queue->enqueueWriteBuffer(s.Dnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data());
@ -59,7 +60,7 @@ namespace bda
event.wait();
}
void WellcontributionsOCLContainer::update_on_gpu(WellContributions &wellContribs){
void WellContributionsOCLContainer::update_on_gpu(Opm::WellContributions &wellContribs){
cl::Event event;
queue->enqueueWriteBuffer(s.Cnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Cnnzs_ocl.size(), wellContribs.h_Cnnzs_ocl.data());
queue->enqueueWriteBuffer(s.Dnnzs, CL_TRUE, 0, sizeof(double) * wellContribs.h_Dnnzs_ocl.size(), wellContribs.h_Dnnzs_ocl.data());
@ -90,7 +91,7 @@ namespace bda
cl::Event event;
event = (*stdwell_apply)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
s.Cnnzs, s.Dnnzs, s.Bnnzs, s.Ccols, s.Bcols, s.toOrder,x, y, dim, dim_wells, s.val_pointers,
s.Cnnzs, s.Dnnzs, s.Bnnzs, s.Ccols, s.Bcols, x, y, s.toOrder, dim, dim_wells, s.val_pointers,
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
}

View File

@ -31,6 +31,7 @@ namespace bda
unsigned int dim, dim_wells;
unsigned int num_std_wells = 0;
unsigned int num_ms_wells = 0; // number of MultisegmentWells in this object, must equal multisegments.size()
int Nb;
std::vector<int> toOrder;
typedef struct {
@ -50,16 +51,15 @@ namespace bda
void applyStdWells(cl::Buffer& x, cl::Buffer& y);
public:
WellContributionsOCLContainer();
~WellContributionsOCLContainer();
WellContributionsOCLContainer() {};
~WellContributionsOCLContainer() {};
void apply(cl::Buffer& x, cl::Buffer& y);
void initBuffers(WellContributions &wellContribs);
void copy_to_gpu(WellContributions &wellContribs);
void update_on_gpu(WellContributions &wellContribs);
void init(Opm::WellContributions &wellContribs, int Nb);
void copy_to_gpu(Opm::WellContributions &wellContribs);
void update_on_gpu(Opm::WellContributions &wellContribs);
void setOpenCLContext(cl::Context *context);
void setOpenCLQueue(cl::CommandQueue *queue);
void setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group);
void setKernel(cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
const unsigned int, const unsigned int, cl::Buffer&,

View File

@ -436,7 +436,7 @@ namespace bda
for (unsigned int j = 0; j < dim_wells; ++j){
temp += Cnnzs[bb*dim*dim_wells + j*dim + c]*z2[j];
}
colIdx = toOrder[Ccols[bb]];
int colIdx = toOrder[Ccols[bb]];
y[colIdx*dim + c] -= temp;
}
}

View File

@ -236,12 +236,12 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(BdaResult& res) {
// v = A * pw
t_spmv.start();
wcontainer->apply(d_pw, d_v);
spmv_blocked_w(d_Avals, d_Acols, d_Arows, d_pw, d_v);
t_spmv.stop();
// apply wellContributions
t_well.start();
stdwell_w(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_pw, d_v, d_val_pointers);
wcontainer->apply(d_pw, d_v);
t_well.stop();
t_rest.start();
@ -496,7 +496,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
wcontainer->initBuffers(wellContribs);
wcontainer->init(wellContribs, Nb);
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
@ -566,6 +566,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu(WellContributions &well
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
event.wait();
wellContribs.setReordering(toOrder, true);
wcontainer->copy_to_gpu(wellContribs);
if (verbosity > 2) {

View File

@ -129,8 +129,6 @@ private:
/// \param[out] b output vector
void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);
void stdwell_w(cl::Buffer Cnnzs, cl::Buffer Dnnzs, cl::Buffer Bnnzs, cl::Buffer Ccols, cl::Buffer Bcols, cl::Buffer x, cl::Buffer y, cl::Buffer val_pointers);
/// Solve linear system using ilu0-bicgstab
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
@ -170,7 +168,7 @@ private:
/// Solve linear system
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
/// \param[inout] res summary of solver result
void solve_system(WellContributions &wellContribs, BdaResult &res);
void solve_system(BdaResult &res);
public: