New function for setting OpenCL in wellContribs

This commit is contained in:
Jose Eduardo Bueno 2020-09-03 12:42:02 -03:00
parent c7adc3495f
commit f8898e3158
2 changed files with 29 additions and 22 deletions

View File

@ -71,7 +71,7 @@ unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A,
template <unsigned int block_size>
double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
{
const unsigned int work_group_size = 256;
const unsigned int work_group_size = 1024;
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
const unsigned int total_work_items = num_work_groups * work_group_size;
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
@ -99,7 +99,7 @@ double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl
template <unsigned int block_size>
double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
{
const unsigned int work_group_size = 256;
const unsigned int work_group_size = 1024;
const unsigned int num_work_groups = ceilDivision(N, work_group_size);
const unsigned int total_work_items = num_work_groups * work_group_size;
const unsigned int lmem_per_work_group = sizeof(double) * work_group_size;
@ -319,7 +319,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
template <unsigned int block_size>
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, WellContributions& wellContribs) {
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
this->N = N_;
this->nnz = nnz_;
this->nnzb = nnz_ / block_size / block_size;
@ -462,9 +462,9 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
source.emplace_back(std::make_pair(ILU_apply1_s, strlen(ILU_apply1_s)));
source.emplace_back(std::make_pair(ILU_apply2_s, strlen(ILU_apply2_s)));
source.emplace_back(std::make_pair(add_well_contributions_s, strlen(add_well_contributions_s)));
cl::Program program_ = cl::Program(*context, source);
program = cl::Program(*context, source);
program_.build(devices);
program.build(devices);
cl::Event event;
queue.reset(new cl::CommandQueue(*context, devices[deviceID], 0, &err));
@ -495,25 +495,19 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
d_Acols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * nnzb);
d_Arows = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * (Nb + 1));
wellContribs.setOpenCLContext(context.get());
wellContribs.setOpenCLQueue(queue.get());
wellContribs.init();
// queue.enqueueNDRangeKernel() is a blocking/synchronous call, at least for NVIDIA
// cl::make_kernel<> myKernel(); myKernel(args, arg1, arg2); is also blocking
// actually creating the kernels
dot_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "dot_1")));
norm_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "norm")));
axpy_k.reset(new cl::make_kernel<cl::Buffer&, const double, cl::Buffer&, const unsigned int>(cl::Kernel(program_, "axpy")));
custom_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const double, const double, const unsigned int>(cl::Kernel(program_, "custom")));
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "spmv_blocked")));
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "ILU_apply1")));
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program_, "ILU_apply2")));
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program_, "add_well_contributions")));
dot_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "dot_1")));
norm_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "norm")));
axpy_k.reset(new cl::make_kernel<cl::Buffer&, const double, cl::Buffer&, const unsigned int>(cl::Kernel(program, "axpy")));
custom_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const double, const double, const unsigned int>(cl::Kernel(program, "custom")));
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "spmv_blocked")));
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply1")));
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply2")));
prec->setKernels(ILU_apply1_k.get(), ILU_apply2_k.get());
wellContribs.setKernel(add_well_contributions_k.get());
} catch (const cl::Error& error) {
std::ostringstream oss;
@ -526,10 +520,19 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
throw error;
}
initialized = true;
} // end initialize()
template <unsigned int block_size>
void openclSolverBackend<block_size>::initialize_wellContribs(WellContributions& wellContribs){
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program, "add_well_contributions")));
wellContribs.setOpenCLContext(context.get());
wellContribs.setOpenCLQueue(queue.get());
wellContribs.init();
wellContribs.setKernel(add_well_contributions_k.get());
}
template <unsigned int block_size>
void openclSolverBackend<block_size>::finalize() {
delete[] rb;
@ -703,11 +706,11 @@ void openclSolverBackend<block_size>::get_result(double *x) {
} // end get_result()
template <unsigned int block_size>
SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
if (initialized == false) {
initialize(N_, nnz_, dim, vals, rows, cols, wellContribs);
initialize(N_, nnz_, dim, vals, rows, cols);
initialize_wellContribs(wellContribs);
if (analysis_done == false) {
if (!analyse_matrix()) {
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
@ -720,6 +723,7 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
copy_system_to_gpu();
} else {
update_system(vals, b);
initialize_wellContribs(wellContribs);
if (!create_preconditioner()) {
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
}

View File

@ -72,6 +72,7 @@ private:
//cl::Buffer d_Ccols, d_Bcols, d_val_pointers;
// shared pointers are also passed to other objects
cl::Program program;
std::shared_ptr<cl::Context> context;
std::shared_ptr<cl::CommandQueue> queue;
std::unique_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg> > dot_k;
@ -149,7 +150,9 @@ private:
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
/// \param[in] rows array of rowPointers, contains N/dim+1 values
/// \param[in] cols array of columnIndices, contains nnz values
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols, WellContributions& wellContribs);
void initialize(int N, int nnz, int dim, double *vals, int *rows, int *cols);
void initialize_wellContribs(WellContributions& wellContribs);
/// Clean memory
void finalize();