mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Add more waiting for opencl functions, using shortened opencl kernel params in WellContributions
This commit is contained in:
parent
688d8ff627
commit
0caae966b8
@ -74,7 +74,7 @@ void WellContributions::setOpenCLEnv(cl::Context *context_, cl::CommandQueue *qu
|
||||
this->queue = queue_;
|
||||
}
|
||||
|
||||
void WellContributions::setKernel(kernel_type *kernel_, kernel_type_no_reorder *kernel_no_reorder_){
|
||||
void WellContributions::setKernel(stdwell_apply_kernel_type *kernel_, stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_){
|
||||
this->kernel = kernel_;
|
||||
this->kernel_no_reorder = kernel_no_reorder_;
|
||||
}
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
#if HAVE_OPENCL
|
||||
#include <opm/simulators/linalg/bda/opencl.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclKernels.hpp>
|
||||
#endif
|
||||
|
||||
#include <vector>
|
||||
@ -39,6 +40,9 @@
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
using bda::stdwell_apply_kernel_type;
|
||||
using bda::stdwell_apply_no_reorder_kernel_type;
|
||||
|
||||
/// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver
|
||||
/// If the --matrix-add-well-contributions commandline parameter is true, this class should not be used
|
||||
/// So far, StandardWell and MultisegmentWell are supported
|
||||
@ -92,17 +96,10 @@ private:
|
||||
std::vector<MultisegmentWellContribution*> multisegments;
|
||||
|
||||
#if HAVE_OPENCL
|
||||
typedef cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
|
||||
cl::Buffer&, const unsigned int, const unsigned int, cl::Buffer&,
|
||||
cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> kernel_type;
|
||||
typedef cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&, cl::Buffer&,
|
||||
const unsigned int, const unsigned int, cl::Buffer&,
|
||||
cl::LocalSpaceArg, cl::LocalSpaceArg, cl::LocalSpaceArg> kernel_type_no_reorder;
|
||||
|
||||
cl::Context *context;
|
||||
cl::CommandQueue *queue;
|
||||
kernel_type *kernel;
|
||||
kernel_type_no_reorder *kernel_no_reorder;
|
||||
stdwell_apply_kernel_type *kernel;
|
||||
stdwell_apply_no_reorder_kernel_type *kernel_no_reorder;
|
||||
std::vector<cl::Event> events;
|
||||
|
||||
std::unique_ptr<cl::Buffer> d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl;
|
||||
@ -154,7 +151,7 @@ public:
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void setKernel(kernel_type *kernel_, kernel_type_no_reorder *kernel_no_reorder_);
|
||||
void setKernel(stdwell_apply_kernel_type *kernel_, stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_);
|
||||
void setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_);
|
||||
|
||||
/// Since the rows of the matrix are reordered, the columnindices of the matrixdata is incorrect
|
||||
|
@ -45,7 +45,6 @@ template <unsigned int block_size>
|
||||
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_, ILUReorder opencl_ilu_reorder_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_), opencl_ilu_reorder(opencl_ilu_reorder_) {
|
||||
prec = new Preconditioner(opencl_ilu_reorder, verbosity_);
|
||||
|
||||
cl_int err = CL_SUCCESS;
|
||||
std::ostringstream out;
|
||||
try {
|
||||
std::vector<cl::Platform> platforms;
|
||||
@ -331,20 +330,23 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
//applyblockedscaleadd(-1.0, mat, x, r);
|
||||
|
||||
// set initial values
|
||||
cl::Event event;
|
||||
queue->enqueueFillBuffer(d_p, 0, 0, sizeof(double) * N);
|
||||
queue->enqueueFillBuffer(d_v, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
events.resize(5);
|
||||
queue->enqueueFillBuffer(d_p, 0, 0, sizeof(double) * N, nullptr, &events[0]);
|
||||
queue->enqueueFillBuffer(d_v, 0, 0, sizeof(double) * N, nullptr, &events[1]);
|
||||
rho = 1.0;
|
||||
alpha = 1.0;
|
||||
omega = 1.0;
|
||||
|
||||
queue->enqueueCopyBuffer(d_b, d_r, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
queue->enqueueCopyBuffer(d_r, d_rw, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
queue->enqueueCopyBuffer(d_r, d_p, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
queue->enqueueCopyBuffer(d_b, d_r, 0, 0, sizeof(double) * N, nullptr, &events[2]);
|
||||
queue->enqueueCopyBuffer(d_r, d_rw, 0, 0, sizeof(double) * N, nullptr, &events[3]);
|
||||
queue->enqueueCopyBuffer(d_r, d_p, 0, 0, sizeof(double) * N, nullptr, &events[4]);
|
||||
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||
OPM_THROW(std::logic_error, "openclSolverBackend OpenCL enqueue[Fill|Copy]Buffer error");
|
||||
}
|
||||
|
||||
norm = norm_w(d_r, d_tmp);
|
||||
norm_0 = norm;
|
||||
@ -591,7 +593,7 @@ void openclSolverBackend<block_size>::finalize() {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||
Timer t;
|
||||
cl::Event event;
|
||||
events.resize(5);
|
||||
|
||||
#if COPY_ROW_BY_ROW
|
||||
int sum = 0;
|
||||
@ -600,19 +602,25 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||
memcpy(vals_contiguous + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
|
||||
sum += size_row * block_size * block_size;
|
||||
}
|
||||
queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous);
|
||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous, nullptr, &events[0]);
|
||||
#else
|
||||
queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues);
|
||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
|
||||
#endif
|
||||
|
||||
queue->enqueueWriteBuffer(d_Acols, CL_TRUE, 0, sizeof(int) * nnzb, rmat->colIndices);
|
||||
queue->enqueueWriteBuffer(d_Arows, CL_TRUE, 0, sizeof(int) * (Nb + 1), rmat->rowPointers);
|
||||
queue->enqueueWriteBuffer(d_b, CL_TRUE, 0, sizeof(double) * N, rb);
|
||||
err |= queue->enqueueWriteBuffer(d_Acols, CL_TRUE, 0, sizeof(int) * nnzb, rmat->colIndices, nullptr, &events[1]);
|
||||
err |= queue->enqueueWriteBuffer(d_Arows, CL_TRUE, 0, sizeof(int) * (Nb + 1), rmat->rowPointers, nullptr, &events[2]);
|
||||
err |= queue->enqueueWriteBuffer(d_b, CL_TRUE, 0, sizeof(double) * N, rb, nullptr, &events[3]);
|
||||
err |= queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &events[4]);
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
queue->enqueueWriteBuffer(d_toOrder, CL_TRUE, 0, sizeof(int) * Nb, toOrder);
|
||||
events.resize(6);
|
||||
queue->enqueueWriteBuffer(d_toOrder, CL_TRUE, 0, sizeof(int) * Nb, toOrder, nullptr, &events[5]);
|
||||
}
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||
OPM_THROW(std::logic_error, "openclSolverBackend OpenCL enqueueWriteBuffer error");
|
||||
}
|
||||
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
@ -625,7 +633,7 @@ void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::update_system_on_gpu() {
|
||||
Timer t;
|
||||
cl::Event event;
|
||||
events.resize(3);
|
||||
|
||||
#if COPY_ROW_BY_ROW
|
||||
int sum = 0;
|
||||
@ -634,14 +642,19 @@ void openclSolverBackend<block_size>::update_system_on_gpu() {
|
||||
memcpy(vals_contiguous + sum, reinterpret_cast<double*>(rmat->nnzValues) + sum, size_row * sizeof(double) * block_size * block_size);
|
||||
sum += size_row * block_size * block_size;
|
||||
}
|
||||
queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous);
|
||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, vals_contiguous, nullptr, &events[0]);
|
||||
#else
|
||||
queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues);
|
||||
err = queue->enqueueWriteBuffer(d_Avals, CL_TRUE, 0, sizeof(double) * nnz, rmat->nnzValues, nullptr, &events[0]);
|
||||
#endif
|
||||
|
||||
queue->enqueueWriteBuffer(d_b, CL_TRUE, 0, sizeof(double) * N, rb);
|
||||
queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &event);
|
||||
event.wait();
|
||||
err |= queue->enqueueWriteBuffer(d_b, CL_TRUE, 0, sizeof(double) * N, rb, nullptr, &events[1]);
|
||||
err |= queue->enqueueFillBuffer(d_x, 0, 0, sizeof(double) * N, nullptr, &events[2]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
if (err != CL_SUCCESS) {
|
||||
// enqueueWriteBuffer is C and does not throw exceptions like C++ OpenCL
|
||||
OPM_THROW(std::logic_error, "openclSolverBackend OpenCL enqueueWriteBuffer error");
|
||||
}
|
||||
|
||||
if (verbosity > 2) {
|
||||
std::ostringstream out;
|
||||
|
@ -82,6 +82,8 @@ private:
|
||||
std::unique_ptr<BlockedMatrix<block_size> > mat = nullptr; // original matrix
|
||||
BlockedMatrix<block_size> *rmat = nullptr; // reordered matrix (or original if no reordering), used for spmv
|
||||
ILUReorder opencl_ilu_reorder; // reordering strategy
|
||||
std::vector<cl::Event> events;
|
||||
cl_int err;
|
||||
|
||||
/// Divide A by B, and round up: return (int)ceil(A/B)
|
||||
/// \param[in] A dividend
|
||||
|
Loading…
Reference in New Issue
Block a user