mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Fixed kernel; fixed time step chopping;
This commit is contained in:
parent
c41aafcc58
commit
c43648076f
@ -101,37 +101,8 @@ WellContributions::~WellContributions()
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
#if HAVE_OPENCL
|
#if HAVE_OPENCL
|
||||||
void WellContributions::setOpenCLContext(cl::Context *context_){
|
|
||||||
this->context = context_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void WellContributions::setOpenCLQueue(cl::CommandQueue *queue_){
|
|
||||||
this->queue = queue_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void WellContributions::setKernel(kernel_type *stdwell_apply_){
|
|
||||||
this->stdwell_apply = stdwell_apply_;
|
|
||||||
}
|
|
||||||
|
|
||||||
void WellContributions::init(){
|
|
||||||
if(num_std_wells > 0){
|
|
||||||
d_Cnnzs_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
|
||||||
d_Dnnzs_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
|
||||||
d_Bnnzs_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
|
||||||
d_Ccols_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
|
||||||
d_Bcols_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
|
||||||
d_val_pointers_ocl = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
|
||||||
|
|
||||||
queue->enqueueWriteBuffer(d_Cnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_blocks * dim * dim_wells, h_Cnnzs_ocl);
|
|
||||||
queue->enqueueWriteBuffer(d_Dnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_std_wells * dim_wells * dim_wells, h_Dnnzs_ocl);
|
|
||||||
queue->enqueueWriteBuffer(d_Bnnzs_ocl, CL_TRUE, 0, sizeof(double) * num_blocks * dim * dim_wells, h_Bnnzs_ocl);
|
|
||||||
queue->enqueueWriteBuffer(d_Ccols_ocl, CL_TRUE, 0, sizeof(int) * num_blocks, h_Ccols_ocl);
|
|
||||||
queue->enqueueWriteBuffer(d_Bcols_ocl, CL_TRUE, 0, sizeof(int) * num_blocks, h_Bcols_ocl);
|
|
||||||
queue->enqueueWriteBuffer(d_val_pointers_ocl, CL_TRUE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void WellContributions::applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y) {
|
void WellContributions::applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y) {
|
||||||
// apply MultisegmentWells
|
// apply MultisegmentWells
|
||||||
if (num_ms_wells > 0) {
|
if (num_ms_wells > 0) {
|
||||||
@ -154,30 +125,8 @@ void WellContributions::applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y) {
|
|||||||
queue->enqueueWriteBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y_ocl);
|
queue->enqueueWriteBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y_ocl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void WellContributions::applyStdWell(cl::Buffer& d_x, cl::Buffer& d_y){
|
|
||||||
const unsigned int work_group_size = 32;
|
|
||||||
const unsigned int total_work_items = num_std_wells * work_group_size;
|
|
||||||
const unsigned int lmem1 = sizeof(double) * work_group_size;
|
|
||||||
const unsigned int lmem2 = sizeof(double) * dim_wells;
|
|
||||||
|
|
||||||
cl::Event event;
|
|
||||||
event = (*stdwell_apply)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
|
||||||
d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl, d_Ccols_ocl, d_Bcols_ocl, d_x, d_y, dim, dim_wells,
|
|
||||||
d_val_pointers_ocl, cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
|
||||||
}
|
|
||||||
|
|
||||||
void WellContributions::apply(cl::Buffer& d_x, cl::Buffer& d_y){
|
|
||||||
if(num_std_wells > 0){
|
|
||||||
applyStdWell(d_x, d_y);
|
|
||||||
}
|
|
||||||
|
|
||||||
if(num_ms_wells > 0){
|
|
||||||
applyMSWell(d_x, d_y);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
void WellContributions::addMatrix([[maybe_unused]] MatrixType type, [[maybe_unused]] int *colIndices, [[maybe_unused]] double *values, [[maybe_unused]] unsigned int val_size)
|
void WellContributions::addMatrix([[maybe_unused]] MatrixType type, [[maybe_unused]] int *colIndices, [[maybe_unused]] double *values, [[maybe_unused]] unsigned int val_size)
|
||||||
{
|
{
|
||||||
|
@ -64,7 +64,6 @@ public:
|
|||||||
B
|
B
|
||||||
};
|
};
|
||||||
|
|
||||||
private:
|
|
||||||
unsigned int num_blocks = 0; // total number of blocks in all wells
|
unsigned int num_blocks = 0; // total number of blocks in all wells
|
||||||
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
|
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
|
||||||
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
|
unsigned int dim_wells; // number of rows in blocks in B and C, equal to StandardWell::numStaticWellEq
|
||||||
@ -73,6 +72,18 @@ private:
|
|||||||
unsigned int num_blocks_so_far = 0; // keep track of where next data is written
|
unsigned int num_blocks_so_far = 0; // keep track of where next data is written
|
||||||
unsigned int num_std_wells_so_far = 0; // keep track of where next data is written
|
unsigned int num_std_wells_so_far = 0; // keep track of where next data is written
|
||||||
unsigned int *val_pointers = nullptr; // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
|
unsigned int *val_pointers = nullptr; // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
|
||||||
|
|
||||||
|
#if HAVE_OPENCL
|
||||||
|
double *h_Cnnzs_ocl = nullptr;
|
||||||
|
double *h_Dnnzs_ocl = nullptr;
|
||||||
|
double *h_Bnnzs_ocl = nullptr;
|
||||||
|
int *h_Ccols_ocl = nullptr;
|
||||||
|
int *h_Bcols_ocl = nullptr;
|
||||||
|
double *h_x_ocl = nullptr;
|
||||||
|
double *h_y_ocl = nullptr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
private:
|
||||||
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
||||||
bool allocated = false;
|
bool allocated = false;
|
||||||
std::vector<MultisegmentWellContribution*> multisegments;
|
std::vector<MultisegmentWellContribution*> multisegments;
|
||||||
@ -97,30 +108,7 @@ private:
|
|||||||
unsigned int *d_val_pointers = nullptr;
|
unsigned int *d_val_pointers = nullptr;
|
||||||
double *h_x = nullptr;
|
double *h_x = nullptr;
|
||||||
double *h_y = nullptr;
|
double *h_y = nullptr;
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HAVE_OPENCL
|
|
||||||
double *h_Cnnzs_ocl = nullptr;
|
|
||||||
double *h_Dnnzs_ocl = nullptr;
|
|
||||||
double *h_Bnnzs_ocl = nullptr;
|
|
||||||
int *h_Ccols_ocl = nullptr;
|
|
||||||
int *h_Bcols_ocl = nullptr;
|
|
||||||
double *h_x_ocl = nullptr;
|
|
||||||
double *h_y_ocl = nullptr;
|
|
||||||
|
|
||||||
cl::Buffer d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl;
|
|
||||||
cl::Buffer d_Ccols_ocl, d_Bcols_ocl, d_val_pointers_ocl;
|
|
||||||
|
|
||||||
typedef cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&,
|
|
||||||
cl::Buffer&, cl::Buffer&, cl::Buffer&,
|
|
||||||
cl::Buffer&, const unsigned int, const unsigned int,
|
|
||||||
cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> kernel_type;
|
|
||||||
kernel_type *stdwell_apply;
|
|
||||||
cl::Context *context;
|
|
||||||
cl::CommandQueue *queue;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HAVE_CUDA
|
|
||||||
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
||||||
/// \param[in] type indicate if C, D or B is sent
|
/// \param[in] type indicate if C, D or B is sent
|
||||||
/// \param[in] colIndices columnindices of blocks in C or B, ignored for D
|
/// \param[in] colIndices columnindices of blocks in C or B, ignored for D
|
||||||
@ -135,12 +123,11 @@ private:
|
|||||||
void freeCudaMemory();
|
void freeCudaMemory();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_OPENCL
|
|
||||||
void applyStdWell(cl::Buffer& d_x, cl::Buffer& d_y);
|
|
||||||
void applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
//#if HAVE_OPENCL
|
||||||
|
// void applyMSWell(cl::Buffer& d_x, cl::Buffer& d_y);
|
||||||
|
//#endif
|
||||||
|
|
||||||
#if HAVE_CUDA
|
#if HAVE_CUDA
|
||||||
/// Set a cudaStream to be used
|
/// Set a cudaStream to be used
|
||||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||||
@ -157,14 +144,6 @@ public:
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if HAVE_OPENCL
|
|
||||||
void init();
|
|
||||||
void apply(cl::Buffer& x, cl::Buffer& y);
|
|
||||||
void setOpenCLContext(cl::Context *context);
|
|
||||||
void setOpenCLQueue(cl::CommandQueue *queue);
|
|
||||||
void setKernel(kernel_type *stdwell_apply);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/// Create a new WellContributions
|
/// Create a new WellContributions
|
||||||
WellContributions(std::string gpu_mode);
|
WellContributions(std::string gpu_mode);
|
||||||
|
|
||||||
|
@ -455,7 +455,7 @@ namespace bda
|
|||||||
for (unsigned int j = 0; j < blnr; ++j){
|
for (unsigned int j = 0; j < blnr; ++j){
|
||||||
temp += valsC[bb*blnc*blnr + j*blnc + c]*z2[j];
|
temp += valsC[bb*blnc*blnr + j*blnc + c]*z2[j];
|
||||||
}
|
}
|
||||||
atomicAdd(&y[colIdx*blnc + c], temp);
|
atomicAdd(&y[colIdx*blnc + c], -temp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)";
|
)";
|
||||||
|
@ -181,7 +181,21 @@ void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
void openclSolverBackend<block_size>::stdwell_w(cl::Buffer Cnnzs, cl::Buffer Dnnzs, cl::Buffer Bnnzs, cl::Buffer Ccols, cl::Buffer Bcols, cl::Buffer x, cl::Buffer y, cl::Buffer val_pointers)
|
||||||
|
{
|
||||||
|
const unsigned int work_group_size = 32;
|
||||||
|
const unsigned int total_work_items = num_std_wells * work_group_size;
|
||||||
|
const unsigned int lmem1 = sizeof(double) * work_group_size;
|
||||||
|
const unsigned int lmem2 = sizeof(double) * dim_wells;
|
||||||
|
|
||||||
|
cl::Event event;
|
||||||
|
event = (*add_well_contributions_k)(cl::EnqueueArgs(*queue, cl::NDRange(total_work_items), cl::NDRange(work_group_size)),
|
||||||
|
Cnnzs, Dnnzs, Bnnzs, Ccols, Bcols, x, y, dim_weqs, dim_wells, val_pointers,
|
||||||
|
cl::Local(lmem1), cl::Local(lmem2), cl::Local(lmem2));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::gpu_pbicgstab(BdaResult& res) {
|
||||||
float it;
|
float it;
|
||||||
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
||||||
double norm, norm_0;
|
double norm, norm_0;
|
||||||
@ -208,8 +222,6 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
queue->enqueueCopyBuffer(d_r, d_p, 0, 0, sizeof(double) * N, nullptr, &event);
|
queue->enqueueCopyBuffer(d_r, d_p, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||||
event.wait();
|
event.wait();
|
||||||
|
|
||||||
wellContribs.setReordering(toOrder, true);
|
|
||||||
|
|
||||||
norm = norm_w(d_r, d_tmp);
|
norm = norm_w(d_r, d_tmp);
|
||||||
norm_0 = norm;
|
norm_0 = norm;
|
||||||
|
|
||||||
@ -242,7 +254,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
t_well.start();
|
t_well.start();
|
||||||
wellContribs.apply(d_pw, d_v);
|
stdwell_w(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_pw, d_v, d_val_pointers);
|
||||||
t_well.stop();
|
t_well.stop();
|
||||||
|
|
||||||
t_rest.start();
|
t_rest.start();
|
||||||
@ -271,7 +283,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
|||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
t_well.start();
|
t_well.start();
|
||||||
wellContribs.apply(d_s, d_t);
|
stdwell_w(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_s, d_t, d_val_pointers);
|
||||||
t_well.stop();
|
t_well.stop();
|
||||||
|
|
||||||
t_rest.start();
|
t_rest.start();
|
||||||
@ -506,6 +518,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "spmv_blocked")));
|
spmv_blocked_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "spmv_blocked")));
|
||||||
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply1")));
|
ILU_apply1_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply1")));
|
||||||
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply2")));
|
ILU_apply2_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg>(cl::Kernel(program, "ILU_apply2")));
|
||||||
|
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program, "add_well_contributions")));
|
||||||
|
|
||||||
prec->setKernels(ILU_apply1_k.get(), ILU_apply2_k.get());
|
prec->setKernels(ILU_apply1_k.get(), ILU_apply2_k.get());
|
||||||
|
|
||||||
@ -525,12 +538,19 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
|
|||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::initialize_wellContribs(WellContributions& wellContribs){
|
void openclSolverBackend<block_size>::initialize_wellContribs(WellContributions& wellContribs){
|
||||||
add_well_contributions_k.reset(new cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg>(cl::Kernel(program, "add_well_contributions")));
|
dim_weqs = wellContribs.dim;
|
||||||
|
dim_wells = wellContribs.dim_wells;
|
||||||
|
num_blocks = wellContribs.num_blocks;
|
||||||
|
num_std_wells = wellContribs.num_std_wells;
|
||||||
|
|
||||||
wellContribs.setOpenCLContext(context.get());
|
if(num_std_wells > 0){
|
||||||
wellContribs.setOpenCLQueue(queue.get());
|
d_Cnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim_weqs * dim_wells);
|
||||||
wellContribs.init();
|
d_Dnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||||
wellContribs.setKernel(add_well_contributions_k.get());
|
d_Bnnzs = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim_weqs * dim_wells);
|
||||||
|
d_Ccols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||||
|
d_Bcols = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||||
|
d_val_pointers = cl::Buffer(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
@ -574,6 +594,29 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
|||||||
}
|
}
|
||||||
} // end copy_system_to_gpu()
|
} // end copy_system_to_gpu()
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::copy_wells_to_gpu(WellContributions& wellContribs) {
|
||||||
|
wellContribs.setReordering(toOrder, true);
|
||||||
|
|
||||||
|
if(num_std_wells > 0){
|
||||||
|
Timer t;
|
||||||
|
cl::Event event;
|
||||||
|
|
||||||
|
queue->enqueueWriteBuffer(d_Cnnzs, CL_TRUE, 0, sizeof(double) * num_blocks * dim_weqs * dim_wells, wellContribs.h_Cnnzs_ocl);
|
||||||
|
queue->enqueueWriteBuffer(d_Dnnzs, CL_TRUE, 0, sizeof(double) * num_std_wells * dim_wells * dim_wells, wellContribs.h_Dnnzs_ocl);
|
||||||
|
queue->enqueueWriteBuffer(d_Bnnzs, CL_TRUE, 0, sizeof(double) * num_blocks * dim_weqs * dim_wells, wellContribs.h_Bnnzs_ocl);
|
||||||
|
queue->enqueueWriteBuffer(d_Ccols, CL_TRUE, 0, sizeof(int) * num_blocks, wellContribs.h_Ccols_ocl);
|
||||||
|
queue->enqueueWriteBuffer(d_Bcols, CL_TRUE, 0, sizeof(int) * num_blocks, wellContribs.h_Bcols_ocl);
|
||||||
|
queue->enqueueWriteBuffer(d_val_pointers, CL_TRUE, 0, sizeof(unsigned int) * (num_std_wells + 1), wellContribs.val_pointers, nullptr, &event);
|
||||||
|
event.wait();
|
||||||
|
|
||||||
|
if (verbosity > 2) {
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "openclSolver::copy_wells_to_gpu(): " << t.stop() << " s";
|
||||||
|
OpmLog::info(out.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// don't copy rowpointers and colindices, they stay the same
|
// don't copy rowpointers and colindices, they stay the same
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
@ -663,12 +706,12 @@ bool openclSolverBackend<block_size>::create_preconditioner() {
|
|||||||
|
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template <unsigned int block_size>
|
||||||
void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
void openclSolverBackend<block_size>::solve_system(BdaResult &res) {
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
// actually solve
|
// actually solve
|
||||||
try {
|
try {
|
||||||
gpu_pbicgstab(wellContribs, res);
|
gpu_pbicgstab(res);
|
||||||
} catch (const cl::Error& error) {
|
} catch (const cl::Error& error) {
|
||||||
std::ostringstream oss;
|
std::ostringstream oss;
|
||||||
oss << "openclSolverBackend::solve_system error: " << error.what() << "(" << error.err() << ")\n";
|
oss << "openclSolverBackend::solve_system error: " << error.what() << "(" << error.err() << ")\n";
|
||||||
@ -710,6 +753,7 @@ template <unsigned int block_size>
|
|||||||
SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||||
if (initialized == false) {
|
if (initialized == false) {
|
||||||
initialize(N_, nnz_, dim, vals, rows, cols);
|
initialize(N_, nnz_, dim, vals, rows, cols);
|
||||||
|
initialize_wellContribs(wellContribs);
|
||||||
if (analysis_done == false) {
|
if (analysis_done == false) {
|
||||||
if (!analyse_matrix()) {
|
if (!analyse_matrix()) {
|
||||||
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
return SolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
@ -720,6 +764,7 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
|
|||||||
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
return SolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
}
|
}
|
||||||
copy_system_to_gpu();
|
copy_system_to_gpu();
|
||||||
|
copy_wells_to_gpu(wellContribs);
|
||||||
} else {
|
} else {
|
||||||
update_system(vals, b);
|
update_system(vals, b);
|
||||||
if (!create_preconditioner()) {
|
if (!create_preconditioner()) {
|
||||||
@ -727,8 +772,7 @@ SolverStatus openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int
|
|||||||
}
|
}
|
||||||
update_system_on_gpu();
|
update_system_on_gpu();
|
||||||
}
|
}
|
||||||
initialize_wellContribs(wellContribs);
|
solve_system(res);
|
||||||
solve_system(wellContribs, res);
|
|
||||||
return SolverStatus::BDA_SOLVER_SUCCESS;
|
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -64,12 +64,12 @@ private:
|
|||||||
cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
|
cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
|
||||||
double *tmp = nullptr; // used as tmp CPU buffer for dot() and norm()
|
double *tmp = nullptr; // used as tmp CPU buffer for dot() and norm()
|
||||||
|
|
||||||
//unsigned int num_blocks, dim_, dim_wells, num_std_wells;
|
unsigned int num_blocks, dim_weqs, dim_wells, num_std_wells;
|
||||||
//unsigned int *h_val_pointers;
|
unsigned int *h_val_pointers;
|
||||||
//int *h_Ccols, *h_Bcols;
|
int *h_Ccols, *h_Bcols;
|
||||||
//double *h_Cnnzs, *h_Dnnzs, *h_Bnnzs;
|
double *h_Cnnzs, *h_Dnnzs, *h_Bnnzs;
|
||||||
//cl::Buffer d_Cnnzs, d_Dnnzs, d_Bnnzs;
|
cl::Buffer d_Cnnzs, d_Dnnzs, d_Bnnzs;
|
||||||
//cl::Buffer d_Ccols, d_Bcols, d_val_pointers;
|
cl::Buffer d_Ccols, d_Bcols, d_val_pointers;
|
||||||
|
|
||||||
// shared pointers are also passed to other objects
|
// shared pointers are also passed to other objects
|
||||||
cl::Program program;
|
cl::Program program;
|
||||||
@ -84,12 +84,10 @@ private:
|
|||||||
std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> > ILU_apply2_k;
|
std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> > ILU_apply2_k;
|
||||||
std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> > add_well_contributions_k;
|
std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&, cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> > add_well_contributions_k;
|
||||||
|
|
||||||
Preconditioner *prec = nullptr; // only supported preconditioner is BILU0
|
Preconditioner *prec = nullptr; // only supported preconditioner is BILU0
|
||||||
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
|
||||||
std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr; // original matrix
|
std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr; // original matrix
|
||||||
BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix, used for spmv
|
BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix, used for spmv
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Divide A by B, and round up: return (int)ceil(A/B)
|
/// Divide A by B, and round up: return (int)ceil(A/B)
|
||||||
/// \param[in] A dividend
|
/// \param[in] A dividend
|
||||||
@ -136,12 +134,12 @@ private:
|
|||||||
/// \param[out] b output vector
|
/// \param[out] b output vector
|
||||||
void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);
|
void spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b);
|
||||||
|
|
||||||
//void add_well_contributions_w(cl::Buffer valsC, cl::Buffer valsD, cl::Buffer valsB, cl::Buffer colsC, cl::Buffer colsB, cl::Buffer x, cl::Buffer y, cl::Buffer val_pointers);
|
void stdwell_w(cl::Buffer Cnnzs, cl::Buffer Dnnzs, cl::Buffer Bnnzs, cl::Buffer Ccols, cl::Buffer Bcols, cl::Buffer x, cl::Buffer y, cl::Buffer val_pointers);
|
||||||
|
|
||||||
/// Solve linear system using ilu0-bicgstab
|
/// Solve linear system using ilu0-bicgstab
|
||||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] res summary of solver result
|
/// \param[inout] res summary of solver result
|
||||||
void gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res);
|
void gpu_pbicgstab(BdaResult& res);
|
||||||
|
|
||||||
/// Initialize GPU and allocate memory
|
/// Initialize GPU and allocate memory
|
||||||
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
|
/// \param[in] N number of nonzeroes, divide by dim*dim to get number of blocks
|
||||||
@ -160,6 +158,8 @@ private:
|
|||||||
/// Copy linear system to GPU
|
/// Copy linear system to GPU
|
||||||
void copy_system_to_gpu();
|
void copy_system_to_gpu();
|
||||||
|
|
||||||
|
void copy_wells_to_gpu(WellContributions& wellContribs);
|
||||||
|
|
||||||
/// Reorder the linear system so it corresponds with the coloring
|
/// Reorder the linear system so it corresponds with the coloring
|
||||||
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
||||||
/// \param[in] b input vectors, contains N values
|
/// \param[in] b input vectors, contains N values
|
||||||
@ -179,7 +179,7 @@ private:
|
|||||||
/// Solve linear system
|
/// Solve linear system
|
||||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] res summary of solver result
|
/// \param[inout] res summary of solver result
|
||||||
void solve_system(WellContributions& wellContribs, BdaResult &res);
|
void solve_system(BdaResult &res);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user