mirror of
https://github.com/OPM/opm-simulators.git
synced 2024-11-25 10:40:21 -06:00
changed: refactor BDA well contributions
split in API specific classes for Cuda/OpenCL this to 1) it's cleaner 2) it avoids pulling in openCL code in cuda classes which leads to clashes between nvidia headers and opencl.hpp there is still too much API specific things in interface between the bda components to work through a virtual interface so we still have to cast to the relevant implementation in various places.
This commit is contained in:
parent
352d31a1e9
commit
e25caba8ed
@ -49,6 +49,7 @@ list (APPEND MAIN_SOURCE_FILES
|
||||
opm/simulators/linalg/FlexibleSolver6.cpp
|
||||
opm/simulators/linalg/PropertyTree.cpp
|
||||
opm/simulators/linalg/setupPropertyTree.cpp
|
||||
opm/simulators/linalg/bda/WellContributions.cpp
|
||||
opm/simulators/utils/PartiallySupportedFlowKeywords.cpp
|
||||
opm/simulators/utils/readDeck.cpp
|
||||
opm/simulators/utils/UnsupportedFlowKeywords.cpp
|
||||
@ -93,8 +94,7 @@ list (APPEND MAIN_SOURCE_FILES
|
||||
|
||||
if(CUDA_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuWellContributions.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
endif()
|
||||
@ -106,6 +106,7 @@ if(OPENCL_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclKernels.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclSolverBackend.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclWellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
|
||||
@ -265,6 +266,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
||||
opm/simulators/linalg/bda/opencl.hpp
|
||||
opm/simulators/linalg/bda/openclKernels.hpp
|
||||
opm/simulators/linalg/bda/openclSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/openclWellContributions.hpp
|
||||
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
|
||||
opm/simulators/linalg/bda/WellContributions.hpp
|
||||
opm/simulators/linalg/amgcpr.hh
|
||||
|
@ -262,18 +262,18 @@ namespace Opm
|
||||
bool use_fpga = bdaBridge->getUseFpga();
|
||||
if (use_gpu || use_fpga) {
|
||||
const std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
|
||||
WellContributions wellContribs(accelerator_mode, useWellConn_);
|
||||
bdaBridge->initWellContributions(wellContribs);
|
||||
auto wellContribs = WellContributions::create(accelerator_mode, useWellConn_);
|
||||
bdaBridge->initWellContributions(*wellContribs);
|
||||
|
||||
// the WellContributions can only be applied separately with CUDA or OpenCL, not with an FPGA or amgcl
|
||||
#if HAVE_CUDA || HAVE_OPENCL
|
||||
if (!useWellConn_) {
|
||||
simulator_.problem().wellModel().getWellContributions(wellContribs);
|
||||
simulator_.problem().wellModel().getWellContributions(*wellContribs);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
|
||||
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), *rhs_, wellContribs, result);
|
||||
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), *rhs_, *wellContribs, result);
|
||||
if (result.converged) {
|
||||
// get result vector x from non-Dune backend, iff solve was successful
|
||||
bdaBridge->get_result(x);
|
||||
|
@ -35,6 +35,7 @@
|
||||
|
||||
#if HAVE_OPENCL
|
||||
#include <opm/simulators/linalg/bda/openclSolverBackend.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
#if HAVE_FPGA
|
||||
@ -273,7 +274,7 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::initWellContributions([[
|
||||
if(accelerator_mode.compare("opencl") == 0){
|
||||
#if HAVE_OPENCL
|
||||
const auto openclBackend = static_cast<const Opm::Accelerator::openclSolverBackend<block_size>*>(backend.get());
|
||||
wellContribs.setOpenCLEnv(openclBackend->context.get(), openclBackend->queue.get());
|
||||
static_cast<WellContributionsOCL&>(wellContribs).setOpenCLEnv(openclBackend->context.get(), openclBackend->queue.get());
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
|
||||
#endif
|
||||
|
@ -25,178 +25,68 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include <opm/simulators/linalg/bda/cuWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
#if HAVE_OPENCL
|
||||
using Opm::Accelerator::OpenclKernels;
|
||||
#endif
|
||||
|
||||
WellContributions::WellContributions(std::string accelerator_mode, bool useWellConn){
|
||||
std::unique_ptr<WellContributions>
|
||||
WellContributions::create(const std::string& accelerator_mode, bool useWellConn)
|
||||
{
|
||||
if(accelerator_mode.compare("cusparse") == 0){
|
||||
cuda_gpu = true;
|
||||
#if HAVE_CUDA
|
||||
return std::make_unique<WellContributionsCuda>();
|
||||
#else
|
||||
OPM_THROW(std::runtime_error, "Cannot initialize well contributions: CUDA is not enabled");
|
||||
#endif
|
||||
}
|
||||
else if(accelerator_mode.compare("opencl") == 0){
|
||||
opencl_gpu = true;
|
||||
#if HAVE_OPENCL
|
||||
return std::make_unique<WellContributionsOCL>();
|
||||
#else
|
||||
OPM_THROW(std::runtime_error, "Cannot initialize well contributions: OpenCL is not enabled");
|
||||
#endif
|
||||
}
|
||||
else if(accelerator_mode.compare("fpga") == 0){
|
||||
// unused for FPGA, but must be defined to avoid error
|
||||
return std::make_unique<WellContributions>();
|
||||
}
|
||||
else if(accelerator_mode.compare("amgcl") == 0){
|
||||
if (!useWellConn) {
|
||||
OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true");
|
||||
}
|
||||
return std::make_unique<WellContributions>();
|
||||
}
|
||||
else{
|
||||
OPM_THROW(std::logic_error, "Invalid accelerator mode");
|
||||
}
|
||||
}
|
||||
|
||||
WellContributions::~WellContributions()
|
||||
void WellContributions::addMatrix([[maybe_unused]] MatrixType type,
|
||||
[[maybe_unused]] int* colIndices,
|
||||
[[maybe_unused]] double* values,
|
||||
[[maybe_unused]] unsigned int val_size)
|
||||
{
|
||||
multisegments.clear();
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(cuda_gpu){
|
||||
freeCudaMemory(); // should come before 'delete[] h_x'
|
||||
}
|
||||
#if !HAVE_CUDA && !HAVE_OPENCL
|
||||
OPM_THROW(std::logic_error, "Error cannot add StandardWell matrix on GPU because neither CUDA nor OpenCL were found by cmake");
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
if(opencl_gpu){
|
||||
if(num_ms_wells > 0){
|
||||
delete[] h_x;
|
||||
delete[] h_y;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void WellContributions::setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_){
|
||||
this->context = context_;
|
||||
this->queue = queue_;
|
||||
}
|
||||
|
||||
void WellContributions::setKernel(Opm::Accelerator::stdwell_apply_kernel_type *kernel_,
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_){
|
||||
this->kernel = kernel_;
|
||||
this->kernel_no_reorder = kernel_no_reorder_;
|
||||
}
|
||||
|
||||
void WellContributions::setReordering(int *h_toOrder_, bool reorder_)
|
||||
{
|
||||
this->h_toOrder = h_toOrder_;
|
||||
this->reorder = reorder_;
|
||||
}
|
||||
|
||||
void WellContributions::apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if (reorder) {
|
||||
OpenclKernels::apply_stdwells_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
} else {
|
||||
OpenclKernels::apply_stdwells_no_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributions::apply_mswells(cl::Buffer d_x, cl::Buffer d_y){
|
||||
if(h_x == nullptr){
|
||||
h_x = new double[N];
|
||||
h_y = new double[N];
|
||||
}
|
||||
|
||||
events.resize(2);
|
||||
queue->enqueueReadBuffer(d_x, CL_FALSE, 0, sizeof(double) * N, h_x, nullptr, &events[0]);
|
||||
queue->enqueueReadBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for (auto& well : multisegments) {
|
||||
well->setReordering(h_toOrder, reorder);
|
||||
well->apply(h_x, h_y);
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y, nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
|
||||
void WellContributions::apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if(num_std_wells > 0){
|
||||
apply_stdwells(d_x, d_y, d_toOrder);
|
||||
}
|
||||
|
||||
if(num_ms_wells > 0){
|
||||
apply_mswells(d_x, d_y);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void WellContributions::addMatrix([[maybe_unused]] MatrixType type, [[maybe_unused]] int *colIndices, [[maybe_unused]] double *values, [[maybe_unused]] unsigned int val_size)
|
||||
{
|
||||
if (!allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
|
||||
}
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(cuda_gpu){
|
||||
addMatrixGpu(type, colIndices, values, val_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
if(opencl_gpu){
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Cnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Ccols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::D:
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_Dnnzs_ocl, CL_FALSE, sizeof(double) * num_std_wells_so_far * dim_wells * dim_wells, sizeof(double) * dim_wells * dim_wells, values, nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::B:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Bnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Bcols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
val_pointers[num_std_wells_so_far] = num_blocks_so_far;
|
||||
if (num_std_wells_so_far == num_std_wells - 1) {
|
||||
val_pointers[num_std_wells] = num_blocks;
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_val_pointers_ocl, CL_FALSE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers.data(), nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
OPM_THROW(std::logic_error, "Error unsupported matrix ID for WellContributions::addMatrix()");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
this->APIaddMatrix(type, colIndices, values, val_size);
|
||||
|
||||
if(MatrixType::B == type) {
|
||||
num_blocks_so_far += val_size;
|
||||
num_std_wells_so_far++;
|
||||
}
|
||||
|
||||
#if !HAVE_CUDA && !HAVE_OPENCL
|
||||
OPM_THROW(std::logic_error, "Error cannot add StandardWell matrix on GPU because neither CUDA nor OpenCL were found by cmake");
|
||||
#endif
|
||||
}
|
||||
|
||||
void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
|
||||
@ -225,22 +115,7 @@ void WellContributions::alloc()
|
||||
if (num_std_wells > 0) {
|
||||
val_pointers.resize(num_std_wells+1);
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(cuda_gpu){
|
||||
allocStandardWells();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
if(opencl_gpu){
|
||||
d_Cnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Dnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
d_Bnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Ccols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_Bcols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_val_pointers_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
}
|
||||
#endif
|
||||
this->APIalloc();
|
||||
allocated = true;
|
||||
}
|
||||
}
|
||||
|
@ -20,15 +20,6 @@
|
||||
#ifndef WELLCONTRIBUTIONS_HEADER_INCLUDED
|
||||
#define WELLCONTRIBUTIONS_HEADER_INCLUDED
|
||||
|
||||
#if HAVE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
#include <opm/simulators/linalg/bda/opencl.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclKernels.hpp>
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
@ -61,6 +52,8 @@ namespace Opm
|
||||
class WellContributions
|
||||
{
|
||||
public:
|
||||
static std::unique_ptr<WellContributions> create(const std::string& accelerator_mode, bool useWellConn);
|
||||
|
||||
#if DUNE_VERSION_NEWER(DUNE_ISTL, 2, 7)
|
||||
using UMFPackIndex = SuiteSparse_long;
|
||||
#else
|
||||
@ -73,9 +66,7 @@ public:
|
||||
B
|
||||
};
|
||||
|
||||
private:
|
||||
bool opencl_gpu = false;
|
||||
bool cuda_gpu = false;
|
||||
protected:
|
||||
bool allocated = false;
|
||||
|
||||
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
||||
@ -88,80 +79,9 @@ private:
|
||||
unsigned int num_std_wells_so_far = 0; // keep track of where next data is written
|
||||
std::vector<unsigned int> val_pointers; // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
|
||||
|
||||
double *h_x = nullptr;
|
||||
double *h_y = nullptr;
|
||||
std::vector<std::unique_ptr<MultisegmentWellContribution>> multisegments;
|
||||
|
||||
#if HAVE_OPENCL
|
||||
cl::Context *context;
|
||||
cl::CommandQueue *queue;
|
||||
Opm::Accelerator::stdwell_apply_kernel_type *kernel;
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder;
|
||||
std::vector<cl::Event> events;
|
||||
|
||||
std::unique_ptr<cl::Buffer> d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_Ccols_ocl, d_Bcols_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_val_pointers_ocl;
|
||||
|
||||
bool reorder = false;
|
||||
int *h_toOrder = nullptr;
|
||||
#endif
|
||||
|
||||
#if HAVE_CUDA
|
||||
cudaStream_t stream;
|
||||
|
||||
// data for StandardWells, could remain nullptrs if not used
|
||||
double *d_Cnnzs = nullptr;
|
||||
double *d_Dnnzs = nullptr;
|
||||
double *d_Bnnzs = nullptr;
|
||||
int *d_Ccols = nullptr;
|
||||
int *d_Bcols = nullptr;
|
||||
double *d_z1 = nullptr;
|
||||
double *d_z2 = nullptr;
|
||||
unsigned int *d_val_pointers = nullptr;
|
||||
|
||||
/// Allocate GPU memory for StandardWells
|
||||
void allocStandardWells();
|
||||
|
||||
/// Free GPU memory allocated with cuda.
|
||||
void freeCudaMemory();
|
||||
|
||||
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
||||
/// \param[in] type indicate if C, D or B is sent
|
||||
/// \param[in] colIndices columnindices of blocks in C or B, ignored for D
|
||||
/// \param[in] values array of nonzeroes
|
||||
/// \param[in] val_size number of blocks in C or B, ignored for D
|
||||
void addMatrixGpu(MatrixType type, int *colIndices, double *values, unsigned int val_size);
|
||||
#endif
|
||||
|
||||
public:
|
||||
#if HAVE_CUDA
|
||||
/// Set a cudaStream to be used
|
||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||
void setCudaStream(cudaStream_t stream);
|
||||
|
||||
/// Apply all Wells in this object
|
||||
/// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
|
||||
/// \param[in] d_x vector x, must be on GPU
|
||||
/// \param[inout] d_y vector y, must be on GPU
|
||||
void apply(double *d_x, double *d_y);
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void setKernel(Opm::Accelerator::stdwell_apply_kernel_type *kernel_,
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_);
|
||||
void setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_);
|
||||
|
||||
/// Since the rows of the matrix are reordered, the columnindices of the matrixdata is incorrect
|
||||
/// Those indices need to be mapped via toOrder
|
||||
/// \param[in] toOrder array with mappings
|
||||
/// \param[in] reorder whether reordering is actually used or not
|
||||
void setReordering(int *toOrder, bool reorder);
|
||||
void apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
void apply_mswells(cl::Buffer d_x, cl::Buffer d_y);
|
||||
void apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
#endif
|
||||
|
||||
unsigned int getNumWells(){
|
||||
return num_std_wells + num_ms_wells;
|
||||
}
|
||||
@ -173,13 +93,8 @@ public:
|
||||
/// Allocate memory for the StandardWells
|
||||
void alloc();
|
||||
|
||||
/// Create a new WellContributions
|
||||
/// \param[in] accelerator_mode string indicating which solverBackend is used
|
||||
/// \param[in] useWellConn true iff wellcontributions are added to the matrix
|
||||
WellContributions(std::string accelerator_mode, bool useWellConn);
|
||||
|
||||
/// Destroy a WellContributions, and free memory
|
||||
~WellContributions();
|
||||
/// Empty destructor.
|
||||
virtual ~WellContributions() = default;
|
||||
|
||||
/// Indicate how large the blocks of the StandardWell (C and B) are
|
||||
/// \param[in] dim number of columns
|
||||
@ -212,6 +127,12 @@ public:
|
||||
unsigned int DnumBlocks, double *Dvalues,
|
||||
UMFPackIndex *DcolPointers, UMFPackIndex *DrowIndices,
|
||||
std::vector<double> &Cvalues);
|
||||
protected:
|
||||
//! \brief API specific allocation.
|
||||
virtual void APIalloc() {}
|
||||
|
||||
/// Api specific upload of matrix.
|
||||
virtual void APIaddMatrix(MatrixType, int*, double*, unsigned int) {}
|
||||
};
|
||||
} //namespace Opm
|
||||
|
||||
|
@ -22,12 +22,13 @@
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "opm/simulators/linalg/bda/cuWellContributions.hpp"
|
||||
|
||||
#include "opm/simulators/linalg/bda/cuda_header.hpp"
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include "opm/simulators/linalg/bda/WellContributions.hpp"
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
@ -125,18 +126,8 @@ __global__ void apply_well_contributions(
|
||||
|
||||
}
|
||||
|
||||
void WellContributions::allocStandardWells()
|
||||
WellContributionsCuda::~WellContributionsCuda()
|
||||
{
|
||||
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_val_pointers, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
cudaCheckLastError("apply_gpu malloc failed");
|
||||
}
|
||||
|
||||
void WellContributions::freeCudaMemory() {
|
||||
// delete data for StandardWell
|
||||
if (num_std_wells > 0) {
|
||||
cudaFree(d_Cnnzs);
|
||||
@ -154,10 +145,20 @@ void WellContributions::freeCudaMemory() {
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsCuda::APIalloc()
|
||||
{
|
||||
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_val_pointers, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
cudaCheckLastError("apply_gpu malloc failed");
|
||||
}
|
||||
|
||||
// Apply the WellContributions, similar to StandardWell::apply()
|
||||
// y -= (C^T *(D^-1*( B*x)))
|
||||
void WellContributions::apply(double *d_x, double *d_y)
|
||||
void WellContributionsCuda::apply(double *d_x, double *d_y)
|
||||
{
|
||||
// apply MultisegmentWells
|
||||
|
||||
@ -194,7 +195,7 @@ void WellContributions::apply(double *d_x, double *d_y)
|
||||
}
|
||||
|
||||
|
||||
void WellContributions::addMatrixGpu(MatrixType type, int *colIndices, double *values, unsigned int val_size)
|
||||
void WellContributionsCuda::APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size)
|
||||
{
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
@ -219,7 +220,7 @@ void WellContributions::addMatrixGpu(MatrixType type, int *colIndices, double *v
|
||||
cudaCheckLastError("WellContributions::addMatrix() failed");
|
||||
}
|
||||
|
||||
void WellContributions::setCudaStream(cudaStream_t stream_)
|
||||
void WellContributionsCuda::setCudaStream(cudaStream_t stream_)
|
||||
{
|
||||
this->stream = stream_;
|
||||
for (auto& well : multisegments) {
|
78
opm/simulators/linalg/bda/cuWellContributions.hpp
Normal file
78
opm/simulators/linalg/bda/cuWellContributions.hpp
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef WELLCONTRIBUTIONS_CUDA_HEADER_INCLUDED
|
||||
#define WELLCONTRIBUTIONS_CUDA_HEADER_INCLUDED
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
class WellContributionsCuda : public WellContributions
|
||||
{
|
||||
public:
|
||||
~WellContributionsCuda() override;
|
||||
|
||||
/// Set a cudaStream to be used
|
||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||
void setCudaStream(cudaStream_t stream);
|
||||
|
||||
/// Apply all Wells in this object
|
||||
/// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
|
||||
/// \param[in] d_x vector x, must be on GPU
|
||||
/// \param[inout] d_y vector y, must be on GPU
|
||||
void apply(double *d_x, double *d_y);
|
||||
|
||||
protected:
|
||||
/// Allocate memory for the StandardWells
|
||||
void APIalloc() override;
|
||||
|
||||
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
||||
/// \param[in] type indicate if C, D or B is sent
|
||||
/// \param[in] colIndices columnindices of blocks in C or B, ignored for D
|
||||
/// \param[in] values array of nonzeroes
|
||||
/// \param[in] val_size number of blocks in C or B, ignored for D
|
||||
void APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size) override;
|
||||
|
||||
cudaStream_t stream;
|
||||
|
||||
// data for StandardWells, could remain nullptrs if not used
|
||||
double *d_Cnnzs = nullptr;
|
||||
double *d_Dnnzs = nullptr;
|
||||
double *d_Bnnzs = nullptr;
|
||||
int *d_Ccols = nullptr;
|
||||
int *d_Bcols = nullptr;
|
||||
double *d_z1 = nullptr;
|
||||
double *d_z2 = nullptr;
|
||||
unsigned int *d_val_pointers = nullptr;
|
||||
double* h_x = nullptr;
|
||||
double* h_y = nullptr;
|
||||
|
||||
};
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
#endif
|
@ -26,6 +26,7 @@
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
|
||||
#include <opm/simulators/linalg/bda/cuWellContributions.hpp>
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
#include <opm/simulators/linalg/bda/cuda_header.hpp>
|
||||
|
||||
@ -72,7 +73,7 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
||||
float it;
|
||||
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
wellContribs.setCudaStream(stream);
|
||||
static_cast<WellContributionsCuda&>(wellContribs).setCudaStream(stream);
|
||||
}
|
||||
|
||||
cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
|
||||
@ -116,7 +117,7 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
wellContribs.apply(d_pw, d_v);
|
||||
static_cast<WellContributionsCuda&>(wellContribs).apply(d_pw, d_v);
|
||||
}
|
||||
|
||||
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
|
||||
@ -147,7 +148,7 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
wellContribs.apply(d_s, d_t);
|
||||
static_cast<WellContributionsCuda&>(wellContribs).apply(d_s, d_t);
|
||||
}
|
||||
|
||||
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
#include <opm/simulators/linalg/bda/openclSolverBackend.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
#include <opm/simulators/linalg/bda/Reorder.hpp>
|
||||
@ -262,7 +263,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
// apply wellContributions
|
||||
t_well.start();
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
wellContribs.apply(d_pw, d_v, d_toOrder);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).apply(d_pw, d_v, d_toOrder);
|
||||
}
|
||||
t_well.stop();
|
||||
|
||||
@ -293,7 +294,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
// apply wellContributions
|
||||
t_well.start();
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
wellContribs.apply(d_s, d_t, d_toOrder);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).apply(d_s, d_t, d_toOrder);
|
||||
}
|
||||
t_well.stop();
|
||||
|
||||
@ -520,10 +521,10 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b, Wel
|
||||
mat->nnzValues = vals;
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
reorderBlockedVectorByPattern<block_size>(mat->Nb, b, fromOrder, rb);
|
||||
wellContribs.setReordering(toOrder, true);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).setReordering(toOrder, true);
|
||||
} else {
|
||||
rb = b;
|
||||
wellContribs.setReordering(nullptr, false);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).setReordering(nullptr, false);
|
||||
}
|
||||
|
||||
if (verbosity > 2) {
|
||||
|
154
opm/simulators/linalg/bda/openclWellContributions.cpp
Normal file
154
opm/simulators/linalg/bda/openclWellContributions.cpp
Normal file
@ -0,0 +1,154 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h> // CMake
|
||||
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
using Accelerator::OpenclKernels;
|
||||
|
||||
void WellContributionsOCL::setOpenCLEnv(cl::Context* context_, cl::CommandQueue* queue_) {
|
||||
this->context = context_;
|
||||
this->queue = queue_;
|
||||
}
|
||||
|
||||
void WellContributionsOCL::setKernel(Accelerator::stdwell_apply_kernel_type* kernel_,
|
||||
Accelerator::stdwell_apply_no_reorder_kernel_type* kernel_no_reorder_) {
|
||||
this->kernel = kernel_;
|
||||
this->kernel_no_reorder = kernel_no_reorder_;
|
||||
}
|
||||
|
||||
void WellContributionsOCL::setReordering(int* h_toOrder_, bool reorder_)
|
||||
{
|
||||
this->h_toOrder = h_toOrder_;
|
||||
this->reorder = reorder_;
|
||||
}
|
||||
|
||||
void WellContributionsOCL::apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if (reorder) {
|
||||
OpenclKernels::apply_stdwells_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
} else {
|
||||
OpenclKernels::apply_stdwells_no_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCL::apply_mswells(cl::Buffer d_x, cl::Buffer d_y){
|
||||
if (h_x.empty()) {
|
||||
h_x.resize(N);
|
||||
h_y.resize(N);
|
||||
}
|
||||
|
||||
events.resize(2);
|
||||
queue->enqueueReadBuffer(d_x, CL_FALSE, 0, sizeof(double) * N, h_x.data(), nullptr, &events[0]);
|
||||
queue->enqueueReadBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y.data(), nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for (auto& well : multisegments) {
|
||||
well->setReordering(h_toOrder, reorder);
|
||||
well->apply(h_x.data(), h_y.data());
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y.data(), nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
|
||||
void WellContributionsOCL::apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if(num_std_wells > 0){
|
||||
apply_stdwells(d_x, d_y, d_toOrder);
|
||||
}
|
||||
|
||||
if(num_ms_wells > 0){
|
||||
apply_mswells(d_x, d_y);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCL::APIaddMatrix(MatrixType type,
|
||||
int* colIndices,
|
||||
double* values,
|
||||
unsigned int val_size)
|
||||
{
|
||||
if (!allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Cnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Ccols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::D:
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_Dnnzs_ocl, CL_FALSE, sizeof(double) * num_std_wells_so_far * dim_wells * dim_wells, sizeof(double) * dim_wells * dim_wells, values, nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::B:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Bnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Bcols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
val_pointers[num_std_wells_so_far] = num_blocks_so_far;
|
||||
if (num_std_wells_so_far == num_std_wells - 1) {
|
||||
val_pointers[num_std_wells] = num_blocks;
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_val_pointers_ocl, CL_FALSE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers.data(), nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
OPM_THROW(std::logic_error, "Error unsupported matrix ID for WellContributionsOCL::addMatrix()");
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCL::APIalloc()
|
||||
{
|
||||
d_Cnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Dnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
d_Bnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Ccols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_Bcols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_val_pointers_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
}
|
||||
|
||||
} //namespace Opm
|
75
opm/simulators/linalg/bda/openclWellContributions.hpp
Normal file
75
opm/simulators/linalg/bda/openclWellContributions.hpp
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef WELLCONTRIBUTIONS_OPENCL_HEADER_INCLUDED
|
||||
#define WELLCONTRIBUTIONS_OPENCL_HEADER_INCLUDED
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/opencl.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclKernels.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
class WellContributionsOCL : public WellContributions
|
||||
{
|
||||
public:
|
||||
void setKernel(Opm::Accelerator::stdwell_apply_kernel_type *kernel_,
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_);
|
||||
void setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_);
|
||||
|
||||
/// Since the rows of the matrix are reordered, the columnindices of the matrixdata is incorrect
|
||||
/// Those indices need to be mapped via toOrder
|
||||
/// \param[in] toOrder array with mappings
|
||||
/// \param[in] reorder whether reordering is actually used or not
|
||||
void setReordering(int* toOrder, bool reorder);
|
||||
void apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
void apply_mswells(cl::Buffer d_x, cl::Buffer d_y);
|
||||
void apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
|
||||
protected:
|
||||
/// Allocate memory for the StandardWells
|
||||
void APIalloc() override;
|
||||
|
||||
void APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size) override;
|
||||
|
||||
cl::Context* context;
|
||||
cl::CommandQueue* queue;
|
||||
Opm::Accelerator::stdwell_apply_kernel_type* kernel;
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type* kernel_no_reorder;
|
||||
std::vector<cl::Event> events;
|
||||
|
||||
std::unique_ptr<cl::Buffer> d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_Ccols_ocl, d_Bcols_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_val_pointers_ocl;
|
||||
|
||||
bool reorder = false;
|
||||
int *h_toOrder = nullptr;
|
||||
std::vector<double> h_x;
|
||||
std::vector<double> h_y;
|
||||
};
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
#endif
|
@ -81,7 +81,7 @@ testCusparseSolver(const boost::property_tree::ptree& prm, const std::string& ma
|
||||
Dune::InverseOperatorResult result;
|
||||
|
||||
Vector x(rhs.size());
|
||||
Opm::WellContributions wellContribs("cusparse", false);
|
||||
auto wellContribs = Opm::WellContributions::create("cusparse", false);
|
||||
std::unique_ptr<Opm::BdaBridge<Matrix, Vector, bz> > bridge;
|
||||
try {
|
||||
bridge = std::make_unique<Opm::BdaBridge<Matrix, Vector, bz> >(gpu_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder);
|
||||
@ -89,7 +89,7 @@ testCusparseSolver(const boost::property_tree::ptree& prm, const std::string& ma
|
||||
BOOST_WARN_MESSAGE(true, error.what());
|
||||
throw DeviceInitException(error.what());
|
||||
}
|
||||
bridge->solve_system(&matrix, rhs, wellContribs, result);
|
||||
bridge->solve_system(&matrix, rhs, *wellContribs, result);
|
||||
bridge->get_result(x);
|
||||
|
||||
return x;
|
||||
|
@ -80,7 +80,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, const std::string& matr
|
||||
Dune::InverseOperatorResult result;
|
||||
|
||||
Vector x(rhs.size());
|
||||
Opm::WellContributions wellContribs("opencl", false);
|
||||
auto wellContribs = Opm::WellContributions::create("opencl", false);
|
||||
std::unique_ptr<Opm::BdaBridge<Matrix, Vector, bz> > bridge;
|
||||
try {
|
||||
bridge = std::make_unique<Opm::BdaBridge<Matrix, Vector, bz> >(gpu_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder);
|
||||
@ -88,7 +88,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, const std::string& matr
|
||||
BOOST_WARN_MESSAGE(true, error.what());
|
||||
throw PlatformInitException(error.what());
|
||||
}
|
||||
bridge->solve_system(&matrix, rhs, wellContribs, result);
|
||||
bridge->solve_system(&matrix, rhs, *wellContribs, result);
|
||||
bridge->get_result(x);
|
||||
|
||||
return x;
|
||||
|
Loading…
Reference in New Issue
Block a user