mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #2762 from ducbueno/opencl-stdwell-clean
Fixed out of resources problem
This commit is contained in:
@@ -102,8 +102,19 @@ WellContributions::~WellContributions()
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if HAVE_OPENCL
|
#if HAVE_OPENCL
|
||||||
|
void WellContributions::setOpenCLContext(cl::Context *context_){
|
||||||
|
this->context = context_;
|
||||||
|
}
|
||||||
|
|
||||||
void WellContributions::init(cl::Context *context){
|
void WellContributions::setOpenCLQueue(cl::CommandQueue *queue_){
|
||||||
|
this->queue = queue_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void WellContributions::setKernel(kernel_type *stdwell_apply_){
|
||||||
|
this->stdwell_apply = stdwell_apply_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void WellContributions::init(){
|
||||||
if(num_std_wells > 0){
|
if(num_std_wells > 0){
|
||||||
d_Cnnzs_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
d_Cnnzs_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||||
d_Dnnzs_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
d_Dnnzs_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||||
@@ -111,24 +122,17 @@ void WellContributions::init(cl::Context *context){
|
|||||||
d_Ccols_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
d_Ccols_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||||
d_Bcols_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
d_Bcols_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||||
d_val_pointers_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
d_val_pointers_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void WellContributions::copyDataToGPU(cl::CommandQueue *queue){
|
|
||||||
if(num_std_wells > 0){
|
|
||||||
cl::Event event;
|
|
||||||
|
|
||||||
queue->enqueueWriteBuffer(d_Cnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_blocks * dim * dim_wells, h_Cnnzs_ocl);
|
queue->enqueueWriteBuffer(d_Cnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_blocks * dim * dim_wells, h_Cnnzs_ocl);
|
||||||
queue->enqueueWriteBuffer(d_Dnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_std_wells * dim_wells * dim_wells, h_Dnnzs_ocl);
|
queue->enqueueWriteBuffer(d_Dnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_std_wells * dim_wells * dim_wells, h_Dnnzs_ocl);
|
||||||
queue->enqueueWriteBuffer(d_Bnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_blocks * dim * dim_wells, h_Bnnzs_ocl);
|
queue->enqueueWriteBuffer(d_Bnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_blocks * dim * dim_wells, h_Bnnzs_ocl);
|
||||||
queue->enqueueWriteBuffer(d_Ccols_ocl, CL_TRUE, 0, sizeof(int) * num_blocks, h_Ccols_ocl);
|
queue->enqueueWriteBuffer(d_Ccols_ocl, CL_TRUE, 0, sizeof(int) * num_blocks, h_Ccols_ocl);
|
||||||
queue->enqueueWriteBuffer(d_Bcols_ocl, CL_TRUE, 0, sizeof(int) * num_blocks, h_Bcols_ocl);
|
queue->enqueueWriteBuffer(d_Bcols_ocl, CL_TRUE, 0, sizeof(int) * num_blocks, h_Bcols_ocl);
|
||||||
queue->enqueueWriteBuffer(d_val_pointers_ocl, CL_TRUE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers, nullptr, &event);
|
queue->enqueueWriteBuffer(d_val_pointers_ocl, CL_TRUE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers);
|
||||||
event.wait();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributions::applyMSWell(cl::CommandQueue *queue, cl::Buffer& d_x, cl::Buffer& d_y) {
|
void WellContributions::applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y) {
|
||||||
// apply MultisegmentWells
|
// apply MultisegmentWells
|
||||||
if (num_ms_wells > 0) {
|
if (num_ms_wells > 0) {
|
||||||
// allocate pinned memory on host if not yet done
|
// allocate pinned memory on host if not yet done
|
||||||
@@ -151,26 +155,25 @@ void WellContributions::applyMSWell(cl::CommandQueue *queue, cl::Buffer& d_x, cl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributions::applyStdWell(cl::CommandQueue *queue, cl::Buffer& d_x, cl::Buffer& d_y, kernel_type *kernel){
|
void WellContributions::applyStdWell(cl::Buffer& d_x, cl::Buffer& d_y){
|
||||||
const unsigned int work_group_size = 32;
|
const unsigned int work_group_size = 32;
|
||||||
const unsigned int total_work_items = num_std_wells * work_group_size;
|
const unsigned int total_work_items = num_std_wells * work_group_size;
|
||||||
const unsigned int lmem1 = sizeof(double) * work_group_size;
|
const unsigned int lmem1 = sizeof(double) * work_group_size;
|
||||||
const unsigned int lmem2 = sizeof(double) * dim_wells;
|
const unsigned int lmem2 = sizeof(double) * dim_wells;
|
||||||
|
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
event = (*kernel)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
event = (*stdwell_apply)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
||||||
d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl, d_Ccols_ocl, d_Bcols_ocl, d_x, d_y, dim, dim_wells,
|
d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl, d_Ccols_ocl, d_Bcols_ocl, d_x, d_y, dim, dim_wells,
|
||||||
d_val_pointers_ocl, cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
d_val_pointers_ocl, cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||||
event.wait();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributions::apply(cl::CommandQueue *queue, cl::Buffer& d_x, cl::Buffer& d_y, kernel_type *kernel){
|
void WellContributions::apply(cl::Buffer& d_x, cl::Buffer& d_y){
|
||||||
if(num_std_wells > 0){
|
if(num_std_wells > 0){
|
||||||
applyStdWell(queue, d_x, d_y, kernel);
|
applyStdWell(d_x, d_y);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(num_ms_wells > 0){
|
if(num_ms_wells > 0){
|
||||||
applyMSWell(queue, d_x, d_y);
|
applyMSWell(d_x, d_y);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -115,6 +115,9 @@ private:
|
|||||||
cl::Buffer&, cl::Buffer&, cl::Buffer&,
|
cl::Buffer&, cl::Buffer&, cl::Buffer&,
|
||||||
cl::Buffer&, const unsigned int, const unsigned int,
|
cl::Buffer&, const unsigned int, const unsigned int,
|
||||||
cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> kernel_type;
|
cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> kernel_type;
|
||||||
|
kernel_type *stdwell_apply;
|
||||||
|
cl::Context *context;
|
||||||
|
cl::CommandQueue *queue;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_CUDA
|
#if HAVE_CUDA
|
||||||
@@ -133,8 +136,8 @@ private:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_OPENCL
|
#if HAVE_OPENCL
|
||||||
void applyStdWell(cl::CommandQueue *queue, cl::Buffer& d_x, cl::Buffer& d_y, kernel_type *kernel);
|
void applyStdWell(cl::Buffer& d_x, cl::Buffer& d_y);
|
||||||
void applyMSWell(cl::CommandQueue *queue, cl::Buffer& d_x, cl::Buffer& d_y);
|
void applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
public:
|
public:
|
||||||
@@ -155,9 +158,11 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_OPENCL
|
#if HAVE_OPENCL
|
||||||
void init(cl::Context *context);
|
void init();
|
||||||
void copyDataToGPU(cl::CommandQueue *queue);
|
void apply(cl::Buffer& x, cl::Buffer& y);
|
||||||
void apply(cl::CommandQueue *queue, cl::Buffer& x, cl::Buffer& y, kernel_type *kernel);
|
void setOpenCLContext(cl::Context *context);
|
||||||
|
void setOpenCLQueue(cl::CommandQueue *queue);
|
||||||
|
void setKernel(kernel_type *stdwell_apply);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/// Create a new WellContributions
|
/// Create a new WellContributions
|
||||||
|
|||||||
@@ -242,7 +242,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
t_well.start();
|
t_well.start();
|
||||||
wellContribs.apply(queue.get(), d_pw, d_v, add_well_contributions_k.get());
|
wellContribs.apply(d_pw, d_v);
|
||||||
t_well.stop();
|
t_well.stop();
|
||||||
|
|
||||||
t_rest.start();
|
t_rest.start();
|
||||||
@@ -271,7 +271,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
t_well.start();
|
t_well.start();
|
||||||
wellContribs.apply(queue.get(), d_s, d_t, add_well_contributions_k.get());
|
wellContribs.apply(d_s, d_t);
|
||||||
t_well.stop();
|
t_well.stop();
|
||||||
|
|
||||||
t_rest.start();
|
t_rest.start();
|
||||||
@@ -319,7 +319,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, WellContributions& wellContribs) {
|
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
|
||||||
this->N = N_;
|
this->N = N_;
|
||||||
this->nnz = nnz_;
|
this->nnz = nnz_;
|
||||||
this->nnzb = nnz_ / block_size / block_size;
|
this->nnzb = nnz_ / block_size / block_size;
|
||||||
@@ -462,9 +462,9 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
source.emplace_back(std::make_pair(ILU_apply1_s, strlen(ILU_apply1_s)));
|
source.emplace_back(std::make_pair(ILU_apply1_s, strlen(ILU_apply1_s)));
|
||||||
source.emplace_back(std::make_pair(ILU_apply2_s, strlen(ILU_apply2_s)));
|
source.emplace_back(std::make_pair(ILU_apply2_s, strlen(ILU_apply2_s)));
|
||||||
source.emplace_back(std::make_pair(add_well_contributions_s, strlen(add_well_contributions_s)));
|
source.emplace_back(std::make_pair(add_well_contributions_s, strlen(add_well_contributions_s)));
|
||||||
cl::Program program_ = cl::Program(*context, source);
|
program = cl::Program(*context, source);
|
||||||
|
|
||||||
program_.build(devices);
|
program.build(devices);
|
||||||
|
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
queue.reset(new cl::CommandQueue(*context, devices[deviceID], 0, &err));
|
queue.reset(new cl::CommandQueue(*context, devices[deviceID], 0, &err));
|
||||||
@@ -495,20 +495,17 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
||||||
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
||||||
|
|
||||||
wellContribs.init(context.get());
|
|
||||||
|
|
||||||
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
|
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
|
||||||
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
|
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
|
||||||
|
|
||||||
// actually creating the kernels
|
// actually creating the kernels
|
||||||
dot_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "dot_1")));
|
dot_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "dot_1")));
|
||||||
norm_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "norm")));
|
norm_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "norm")));
|
||||||
axpy_k.reset(new cl::make_kernel<cl::Buffer&, const double, cl::Buffer&, const unsigned int>(cl::Kernel(program_, "axpy")));
|
axpy_k.reset(new cl::make_kernel<cl::Buffer&, const double, cl::Buffer&, const unsigned int>(cl::Kernel(program, "axpy")));
|
||||||
custom_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const double, const double, const unsigned int>(cl::Kernel(program_, "custom")));
|
custom_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const double, const double, const unsigned int>(cl::Kernel(program, "custom")));
|
||||||
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "spmv_blocked")));
|
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "spmv_blocked")));
|
||||||
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "ILU_apply1")));
|
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply1")));
|
||||||
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "ILU_apply2")));
|
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply2")));
|
||||||
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program_, "add_well_contributions")));
|
|
||||||
|
|
||||||
prec->setKernels(ILU_apply1_k.get(), ILU_apply2_k.get());
|
prec->setKernels(ILU_apply1_k.get(), ILU_apply2_k.get());
|
||||||
|
|
||||||
@@ -523,10 +520,19 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
initialized = true;
|
initialized = true;
|
||||||
} // end initialize()
|
} // end initialize()
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::initialize_wellContribs(WellContributions& wellContribs){
|
||||||
|
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program, "add_well_contributions")));
|
||||||
|
|
||||||
|
wellContribs.setOpenCLContext(context.get());
|
||||||
|
wellContribs.setOpenCLQueue(queue.get());
|
||||||
|
wellContribs.init();
|
||||||
|
wellContribs.setKernel(add_well_contributions_k.get());
|
||||||
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::finalize() {
|
void openclSolverBackend<block_size>::finalize() {
|
||||||
delete[] rb;
|
delete[] rb;
|
||||||
@@ -539,7 +545,7 @@ void openclSolverBackend<block_size>::finalize() {
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::copy_system_to_gpu(WellContributions& wellContribs) {
|
void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||||
Timer t;
|
Timer t;
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
|
|
||||||
@@ -561,8 +567,6 @@ void openclSolverBackend<block_size>::copy_system_to_gpu(WellContributions& well
|
|||||||
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||||
event.wait();
|
event.wait();
|
||||||
|
|
||||||
wellContribs.copyDataToGPU(queue.get());
|
|
||||||
|
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "openclSolver::copy_system_to_gpu(): " << t.stop() << " s";
|
out << "openclSolver::copy_system_to_gpu(): " << t.stop() << " s";
|
||||||
@@ -702,11 +706,11 @@ void openclSolverBackend<block_size>::get_result(double *x) {
|
|||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||||
if (initialized == false) {
|
if (initialized == false) {
|
||||||
initialize(N_, nnz_, dim, vals, rows, cols, wellContribs);
|
initialize(N_, nnz_, dim, vals, rows, cols);
|
||||||
|
initialize_wellContribs(wellContribs);
|
||||||
if (analysis_done == false) {
|
if (analysis_done == false) {
|
||||||
if (!analyse_matrix()) {
|
if (!analyse_matrix()) {
|
||||||
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
@@ -716,9 +720,10 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
|
|||||||
if (!create_preconditioner()) {
|
if (!create_preconditioner()) {
|
||||||
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
}
|
}
|
||||||
copy_system_to_gpu(wellContribs);
|
copy_system_to_gpu();
|
||||||
} else {
|
} else {
|
||||||
update_system(vals, b);
|
update_system(vals, b);
|
||||||
|
initialize_wellContribs(wellContribs);
|
||||||
if (!create_preconditioner()) {
|
if (!create_preconditioner()) {
|
||||||
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -72,6 +72,7 @@ private:
|
|||||||
//cl::Buffer d_Ccols, d_Bcols, d_val_pointers;
|
//cl::Buffer d_Ccols, d_Bcols, d_val_pointers;
|
||||||
|
|
||||||
// shared pointers are also passed to other objects
|
// shared pointers are also passed to other objects
|
||||||
|
cl::Program program;
|
||||||
std::shared_ptr<cl::Context> context;
|
std::shared_ptr<cl::Context> context;
|
||||||
std::shared_ptr<cl::CommandQueue> queue;
|
std::shared_ptr<cl::CommandQueue> queue;
|
||||||
std::unique_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
|
std::unique_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
|
||||||
@@ -149,13 +150,15 @@ private:
|
|||||||
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
||||||
/// \param[in] rows array of rowPointers, contains N/dim+1 values
|
/// \param[in] rows array of rowPointers, contains N/dim+1 values
|
||||||
/// \param[in] cols array of columnIndices, contains nnz values
|
/// \param[in] cols array of columnIndices, contains nnz values
|
||||||
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols, WellContributions& wellContribs);
|
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
|
||||||
|
|
||||||
|
void initialize_wellContribs(WellContributions& wellContribs);
|
||||||
|
|
||||||
/// Clean memory
|
/// Clean memory
|
||||||
void finalize();
|
void finalize();
|
||||||
|
|
||||||
/// Copy linear system to GPU
|
/// Copy linear system to GPU
|
||||||
void copy_system_to_gpu(WellContributions& wellContribs);
|
void copy_system_to_gpu();
|
||||||
|
|
||||||
/// Reorder the linear system so it corresponds with the coloring
|
/// Reorder the linear system so it corresponds with the coloring
|
||||||
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
||||||
|
|||||||
Reference in New Issue
Block a user