mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Move apply_stdwells kernels to OpenclKernels
This commit is contained in:
parent
1583292bf4
commit
78b1714481
@ -23,11 +23,14 @@
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/openclKernels.hpp>
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
using bda::OpenclKernels;
|
||||
|
||||
WellContributions::WellContributions(std::string accelerator_mode, bool useWellConn){
|
||||
if(accelerator_mode.compare("cusparse") == 0){
|
||||
cuda_gpu = true;
|
||||
@ -95,22 +98,13 @@ void WellContributions::setReordering(int *h_toOrder_, bool reorder_)
|
||||
}
|
||||
|
||||
void WellContributions::apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
const unsigned int work_group_size = 32;
|
||||
const unsigned int total_work_items = num_std_wells * work_group_size;
|
||||
const unsigned int lmem1 = sizeof(double) * work_group_size;
|
||||
const unsigned int lmem2 = sizeof(double) * dim_wells;
|
||||
|
||||
cl::Event event;
|
||||
if (reorder) {
|
||||
event = (*kernel)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
||||
*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl, d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl,
|
||||
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||
OpenclKernels::apply_stdwells_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
} else {
|
||||
event = (*kernel_no_reorder)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
||||
*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl, d_x, d_y, dim, dim_wells, *d_val_pointers_ocl,
|
||||
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||
OpenclKernels::apply_stdwells_no_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
}
|
||||
event.wait();
|
||||
}
|
||||
|
||||
void WellContributions::apply_mswells(cl::Buffer d_x, cl::Buffer d_y){
|
||||
|
@ -309,6 +309,49 @@ void OpenclKernels::ILU_decomp(int firstRow, int lastRow, cl::Buffer& vals, cl::
|
||||
}
|
||||
}
|
||||
|
||||
void OpenclKernels::apply_stdwells_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
|
||||
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
|
||||
cl::Buffer &d_toOrder, int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells)
|
||||
{
|
||||
const unsigned int work_group_size = 32;
|
||||
const unsigned int total_work_items = num_std_wells * work_group_size;
|
||||
const unsigned int lmem1 = sizeof(double) * work_group_size;
|
||||
const unsigned int lmem2 = sizeof(double) * dim_wells;
|
||||
Timer t_apply_stdwells;
|
||||
|
||||
cl::Event event = (*stdwell_apply_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
||||
d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl, d_Ccols_ocl, d_Bcols_ocl, d_x, d_y, d_toOrder, dim, dim_wells, d_val_pointers_ocl,
|
||||
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||
|
||||
if (verbosity >= 4) {
|
||||
event.wait();
|
||||
std::ostringstream oss;
|
||||
oss << std::scientific << "OpenclKernels apply_stdwells() time: " << t_apply_stdwells.stop() << " s";
|
||||
OpmLog::info(oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
void OpenclKernels::apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
|
||||
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
|
||||
int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells)
|
||||
{
|
||||
const unsigned int work_group_size = 32;
|
||||
const unsigned int total_work_items = num_std_wells * work_group_size;
|
||||
const unsigned int lmem1 = sizeof(double) * work_group_size;
|
||||
const unsigned int lmem2 = sizeof(double) * dim_wells;
|
||||
Timer t_apply_stdwells;
|
||||
|
||||
cl::Event event = (*stdwell_apply_no_reorder_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
||||
d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl, d_Ccols_ocl, d_Bcols_ocl, d_x, d_y, dim, dim_wells, d_val_pointers_ocl,
|
||||
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||
|
||||
if (verbosity >= 4) {
|
||||
event.wait();
|
||||
std::ostringstream oss;
|
||||
oss << std::scientific << "OpenclKernels apply_stdwells() time: " << t_apply_stdwells.stop() << " s";
|
||||
OpmLog::info(oss.str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::string OpenclKernels::get_axpy_string() {
|
||||
|
@ -127,10 +127,22 @@ public:
|
||||
static void scale(cl::Buffer& in, const double a, int N);
|
||||
static void custom(cl::Buffer& p, cl::Buffer& v, cl::Buffer& r, const double omega, const double beta, int N);
|
||||
static void spmv_blocked(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& x, cl::Buffer& b, int Nb, unsigned int block_size);
|
||||
static void ILU_apply1(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex, const cl::Buffer& y, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
|
||||
static void ILU_apply2(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex, cl::Buffer& invDiagVals, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
|
||||
static void ILU_apply1(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex,
|
||||
const cl::Buffer& y, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
|
||||
|
||||
static void ILU_decomp(int firstRow, int lastRow, cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex, cl::Buffer& invDiagVals, int Nb, unsigned int block_size);
|
||||
static void ILU_apply2(cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows, cl::Buffer& diagIndex,
|
||||
cl::Buffer& invDiagVals, cl::Buffer& x, cl::Buffer& rowsPerColor, int color, int Nb, unsigned int block_size);
|
||||
|
||||
static void ILU_decomp(int firstRow, int lastRow, cl::Buffer& vals, cl::Buffer& cols, cl::Buffer& rows,
|
||||
cl::Buffer& diagIndex, cl::Buffer& invDiagVals, int Nb, unsigned int block_size);
|
||||
|
||||
static void apply_stdwells_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
|
||||
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
|
||||
cl::Buffer &d_toOrder, int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells);
|
||||
|
||||
static void apply_stdwells_no_reorder(cl::Buffer& d_Cnnzs_ocl, cl::Buffer &d_Dnnzs_ocl, cl::Buffer &d_Bnnzs_ocl,
|
||||
cl::Buffer &d_Ccols_ocl, cl::Buffer &d_Bcols_ocl, cl::Buffer &d_x, cl::Buffer &d_y,
|
||||
int dim, int dim_wells, cl::Buffer &d_val_pointers_ocl, int num_std_wells);
|
||||
};
|
||||
|
||||
} // end namespace bda
|
||||
|
@ -202,10 +202,6 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
||||
double norm, norm_0;
|
||||
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
// wellContribs.setKernel(stdwell_apply_k.get(), stdwell_apply_no_reorder_k.get());
|
||||
}
|
||||
|
||||
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
||||
|
||||
// set r to the initial residual
|
||||
|
Loading…
Reference in New Issue
Block a user