mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
New function for setting OpenCL in wellContribs
This commit is contained in:
parent
c7adc3495f
commit
f8898e3158
@ -71,7 +71,7 @@ unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A,
|
||||
template <unsigned int block_size>
|
||||
double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
|
||||
{
|
||||
const unsigned int work_group_size = 256;
|
||||
const unsigned int work_group_size = 1024;
|
||||
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
||||
const unsigned int total_work_items = num_work_groups * work_group_size;
|
||||
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
||||
@ -99,7 +99,7 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
|
||||
template <unsigned int block_size>
|
||||
double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
|
||||
{
|
||||
const unsigned int work_group_size = 256;
|
||||
const unsigned int work_group_size = 1024;
|
||||
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
|
||||
const unsigned int total_work_items = num_work_groups * work_group_size;
|
||||
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
|
||||
@ -319,7 +319,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, WellContributions& wellContribs) {
|
||||
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
|
||||
this->N = N_;
|
||||
this->nnz = nnz_;
|
||||
this->nnzb = nnz_ / block_size / block_size;
|
||||
@ -462,9 +462,9 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
source.emplace_back(std::make_pair(ILU_apply1_s, strlen(ILU_apply1_s)));
|
||||
source.emplace_back(std::make_pair(ILU_apply2_s, strlen(ILU_apply2_s)));
|
||||
source.emplace_back(std::make_pair(add_well_contributions_s, strlen(add_well_contributions_s)));
|
||||
cl::Program program_ = cl::Program(*context, source);
|
||||
program = cl::Program(*context, source);
|
||||
|
||||
program_.build(devices);
|
||||
program.build(devices);
|
||||
|
||||
cl::Event event;
|
||||
queue.reset(new cl::CommandQueue(*context, devices[deviceID], 0, &err));
|
||||
@ -495,25 +495,19 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
|
||||
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
|
||||
|
||||
wellContribs.setOpenCLContext(context.get());
|
||||
wellContribs.setOpenCLQueue(queue.get());
|
||||
wellContribs.init();
|
||||
|
||||
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
|
||||
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
|
||||
|
||||
// actually creating the kernels
|
||||
dot_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "dot_1")));
|
||||
norm_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "norm")));
|
||||
axpy_k.reset(new cl::make_kernel<cl::Buffer&, const double, cl::Buffer&, const unsigned int>(cl::Kernel(program_, "axpy")));
|
||||
custom_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const double, const double, const unsigned int>(cl::Kernel(program_, "custom")));
|
||||
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "spmv_blocked")));
|
||||
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "ILU_apply1")));
|
||||
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "ILU_apply2")));
|
||||
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program_, "add_well_contributions")));
|
||||
dot_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "dot_1")));
|
||||
norm_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "norm")));
|
||||
axpy_k.reset(new cl::make_kernel<cl::Buffer&, const double, cl::Buffer&, const unsigned int>(cl::Kernel(program, "axpy")));
|
||||
custom_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const double, const double, const unsigned int>(cl::Kernel(program, "custom")));
|
||||
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "spmv_blocked")));
|
||||
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply1")));
|
||||
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply2")));
|
||||
|
||||
prec->setKernels(ILU_apply1_k.get(), ILU_apply2_k.get());
|
||||
wellContribs.setKernel(add_well_contributions_k.get());
|
||||
|
||||
} catch (const cl::Error& error) {
|
||||
std::ostringstream oss;
|
||||
@ -526,10 +520,19 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
||||
throw error;
|
||||
}
|
||||
|
||||
|
||||
initialized = true;
|
||||
} // end initialize()
|
||||
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::initialize_wellContribs(WellContributions& wellContribs){
|
||||
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program, "add_well_contributions")));
|
||||
|
||||
wellContribs.setOpenCLContext(context.get());
|
||||
wellContribs.setOpenCLQueue(queue.get());
|
||||
wellContribs.init();
|
||||
wellContribs.setKernel(add_well_contributions_k.get());
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::finalize() {
|
||||
delete[] rb;
|
||||
@ -703,11 +706,11 @@ void openclSolverBackend<block_size>::get_result(double *x) {
|
||||
} // end get_result()
|
||||
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||
if (initialized == false) {
|
||||
initialize(N_, nnz_, dim, vals, rows, cols, wellContribs);
|
||||
initialize(N_, nnz_, dim, vals, rows, cols);
|
||||
initialize_wellContribs(wellContribs);
|
||||
if (analysis_done == false) {
|
||||
if (!analyse_matrix()) {
|
||||
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||
@ -720,6 +723,7 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
|
||||
copy_system_to_gpu();
|
||||
} else {
|
||||
update_system(vals, b);
|
||||
initialize_wellContribs(wellContribs);
|
||||
if (!create_preconditioner()) {
|
||||
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
}
|
||||
|
@ -72,6 +72,7 @@ private:
|
||||
//cl::Buffer d_Ccols, d_Bcols, d_val_pointers;
|
||||
|
||||
// shared pointers are also passed to other objects
|
||||
cl::Program program;
|
||||
std::shared_ptr<cl::Context> context;
|
||||
std::shared_ptr<cl::CommandQueue> queue;
|
||||
std::unique_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
|
||||
@ -149,7 +150,9 @@ private:
|
||||
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
||||
/// \param[in] rows array of rowPointers, contains N/dim+1 values
|
||||
/// \param[in] cols array of columnIndices, contains nnz values
|
||||
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols, WellContributions& wellContribs);
|
||||
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
|
||||
|
||||
void initialize_wellContribs(WellContributions& wellContribs);
|
||||
|
||||
/// Clean memory
|
||||
void finalize();
|
||||
|
Loading…
Reference in New Issue
Block a user