Move apply_stdwells kernels to OpenclKernels

This commit is contained in:
Tong Dong Qiu 2021-10-19 12:00:06 +02:00
parent 1583292bf4
commit 78b1714481
4 changed files with 65 additions and 20 deletions

View File

@ -23,11 +23,14 @@
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
#include <opm/simulators/linalg/bda/openclKernels.hpp>
#include <opm/simulators/linalg/bda/WellContributions.hpp>
namespace Opm
{
using bda::OpenclKernels;
WellContributions::WellContributions(std::string accelerator_mode, bool useWellConn){
if(accelerator_mode.compare("cusparse") == 0){
cuda_gpu = true;
@ -95,22 +98,13 @@ void WellContributions::setReordering(int *h_toOrder_, bool reorder_)
}
void WellContributions::apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
const unsigned int work_group_size = 32;
const unsigned int total_work_items = num_std_wells * work_group_size;
const unsigned int lmem1 = sizeof(double) * work_group_size;
const unsigned int lmem2 = sizeof(double) * dim_wells;
cl::Event event;
if (reorder) {
event = (*kernel)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl, d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl,
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
OpenclKernels::apply_stdwells_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
} else {
event = (*kernel_no_reorder)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl, d_x, d_y, dim, dim_wells, *d_val_pointers_ocl,
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
OpenclKernels::apply_stdwells_no_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
d_x, d_y, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
}
event.wait();
}
void WellContributions::apply_mswells(cl::Buffer d_x, cl::Buffer d_y){

View File

@ -309,6 +309,49 @@ void OpenclKernels::ILU_decomp(int firstRow, int lastRow, cl::Buffer& vals, cl::
}
}
void OpenclKernels::apply_stdwells_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
cl::Buffer &d_toOrder, int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells)
{
const unsigned int work_group_size = 32;
const unsigned int total_work_items = num_std_wells * work_group_size;
const unsigned int lmem1 = sizeof(double) * work_group_size;
const unsigned int lmem2 = sizeof(double) * dim_wells;
Timer t_apply_stdwells;
cl::Event event = (*stdwell_apply_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl, d_Ccols_ocl, d_Bcols_ocl, d_x, d_y, d_toOrder, dim, dim_wells, d_val_pointers_ocl,
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
if (verbosity >= 4) {
event.wait();
std::ostringstream oss;
oss << std::scientific << "OpenclKernels apply_stdwells() time: " << t_apply_stdwells.stop() << " s";
OpmLog::info(oss.str());
}
}
void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells)
{
const unsigned int work_group_size = 32;
const unsigned int total_work_items = num_std_wells * work_group_size;
const unsigned int lmem1 = sizeof(double) * work_group_size;
const unsigned int lmem2 = sizeof(double) * dim_wells;
Timer t_apply_stdwells;
cl::Event event = (*stdwell_apply_no_reorder_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl, d_Ccols_ocl, d_Bcols_ocl, d_x, d_y, dim, dim_wells, d_val_pointers_ocl,
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
if (verbosity >= 4) {
event.wait();
std::ostringstream oss;
oss << std::scientific << "OpenclKernels apply_stdwells() time: " << t_apply_stdwells.stop() << " s";
OpmLog::info(oss.str());
}
}
std::string OpenclKernels::get_axpy_string() {

View File

@ -127,10 +127,22 @@ public:
static void scale(cl::Buffer& in, const double a, int N);
static void custom(cl::Buffer& p, cl::Buffer& v, cl::Buffer& r, const double omega, const double beta, int N);
static void spmv_blocked(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& x, cl::Buffer& b, int Nb, unsigned int block_size);
static void ILU_apply1(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex, const cl::Buffer& y, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
static void ILU_apply2(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex, cl::Buffer& invDiagVals, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
static void ILU_apply1(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex,
const cl::Buffer& y, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
static void ILU_decomp(int firstRow, int lastRow, cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex, cl::Buffer& invDiagVals, int Nb, unsigned int block_size);
static void ILU_apply2(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex,
cl::Buffer& invDiagVals, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
static void ILU_decomp(int firstRow, int lastRow, cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows,
cl::Buffer& diagIndex, cl::Buffer& invDiagVals, int Nb, unsigned int block_size);
static void apply_stdwells_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
cl::Buffer &d_toOrder, int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells);
static void apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells);
};
} // end namespace bda

View File

@ -202,10 +202,6 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
double norm, norm_0;
if(wellContribs.getNumWells() > 0){
// wellContribs.setKernel(stdwell_apply_k.get(), stdwell_apply_no_reorder_k.get());
}
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
// set r to the initial residual