mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #3691 from akva2/bda_wellcontrib_refactor
Refactor BDA well contributions
This commit is contained in:
commit
afc5ed751f
@ -49,6 +49,7 @@ list (APPEND MAIN_SOURCE_FILES
|
||||
opm/simulators/linalg/FlexibleSolver6.cpp
|
||||
opm/simulators/linalg/PropertyTree.cpp
|
||||
opm/simulators/linalg/setupPropertyTree.cpp
|
||||
opm/simulators/linalg/bda/WellContributions.cpp
|
||||
opm/simulators/utils/PartiallySupportedFlowKeywords.cpp
|
||||
opm/simulators/utils/readDeck.cpp
|
||||
opm/simulators/utils/UnsupportedFlowKeywords.cpp
|
||||
@ -93,8 +94,7 @@ list (APPEND MAIN_SOURCE_FILES
|
||||
|
||||
if(CUDA_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuWellContributions.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
endif()
|
||||
@ -106,6 +106,7 @@ if(OPENCL_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclKernels.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclSolverBackend.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclWellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
|
||||
@ -265,6 +266,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
||||
opm/simulators/linalg/bda/opencl.hpp
|
||||
opm/simulators/linalg/bda/openclKernels.hpp
|
||||
opm/simulators/linalg/bda/openclSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/openclWellContributions.hpp
|
||||
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
|
||||
opm/simulators/linalg/bda/WellContributions.hpp
|
||||
opm/simulators/linalg/amgcpr.hh
|
||||
|
@ -262,18 +262,18 @@ namespace Opm
|
||||
bool use_fpga = bdaBridge->getUseFpga();
|
||||
if (use_gpu || use_fpga) {
|
||||
const std::string accelerator_mode = EWOMS_GET_PARAM(TypeTag, std::string, AcceleratorMode);
|
||||
WellContributions wellContribs(accelerator_mode, useWellConn_);
|
||||
bdaBridge->initWellContributions(wellContribs);
|
||||
auto wellContribs = WellContributions::create(accelerator_mode, useWellConn_);
|
||||
bdaBridge->initWellContributions(*wellContribs);
|
||||
|
||||
// the WellContributions can only be applied separately with CUDA or OpenCL, not with an FPGA or amgcl
|
||||
#if HAVE_CUDA || HAVE_OPENCL
|
||||
if (!useWellConn_) {
|
||||
simulator_.problem().wellModel().getWellContributions(wellContribs);
|
||||
simulator_.problem().wellModel().getWellContributions(*wellContribs);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
|
||||
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), *rhs_, wellContribs, result);
|
||||
bdaBridge->solve_system(const_cast<Matrix*>(&getMatrix()), *rhs_, *wellContribs, result);
|
||||
if (result.converged) {
|
||||
// get result vector x from non-Dune backend, iff solve was successful
|
||||
bdaBridge->get_result(x);
|
||||
|
@ -19,6 +19,9 @@
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include "dune/istl/bcrsmatrix.hh"
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <opm/material/common/Unused.hpp>
|
||||
@ -32,6 +35,7 @@
|
||||
|
||||
#if HAVE_OPENCL
|
||||
#include <opm/simulators/linalg/bda/openclSolverBackend.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
#if HAVE_FPGA
|
||||
@ -270,7 +274,7 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::initWellContributions([[
|
||||
if(accelerator_mode.compare("opencl") == 0){
|
||||
#if HAVE_OPENCL
|
||||
const auto openclBackend = static_cast<const Opm::Accelerator::openclSolverBackend<block_size>*>(backend.get());
|
||||
wellContribs.setOpenCLEnv(openclBackend->context.get(), openclBackend->queue.get());
|
||||
static_cast<WellContributionsOCL&>(wellContribs).setOpenCLEnv(openclBackend->context.get(), openclBackend->queue.get());
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
|
||||
#endif
|
||||
|
@ -22,17 +22,9 @@
|
||||
|
||||
#include "dune/istl/solver.hh" // for struct InverseOperatorResult
|
||||
|
||||
#include "dune/istl/bcrsmatrix.hh"
|
||||
#include <opm/simulators/linalg/matrixblock.hh>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
||||
#include <opm/simulators/linalg/bda/ILUReorder.hpp>
|
||||
|
||||
|
||||
#if HAVE_FPGA
|
||||
#include <opm/simulators/linalg/bda/FPGASolverBackend.hpp>
|
||||
#endif
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
|
@ -25,178 +25,68 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
#include <opm/simulators/linalg/bda/cuWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
#if HAVE_OPENCL
|
||||
using Opm::Accelerator::OpenclKernels;
|
||||
#endif
|
||||
|
||||
WellContributions::WellContributions(std::string accelerator_mode, bool useWellConn){
|
||||
std::unique_ptr<WellContributions>
|
||||
WellContributions::create(const std::string& accelerator_mode, bool useWellConn)
|
||||
{
|
||||
if(accelerator_mode.compare("cusparse") == 0){
|
||||
cuda_gpu = true;
|
||||
#if HAVE_CUDA
|
||||
return std::make_unique<WellContributionsCuda>();
|
||||
#else
|
||||
OPM_THROW(std::runtime_error, "Cannot initialize well contributions: CUDA is not enabled");
|
||||
#endif
|
||||
}
|
||||
else if(accelerator_mode.compare("opencl") == 0){
|
||||
opencl_gpu = true;
|
||||
#if HAVE_OPENCL
|
||||
return std::make_unique<WellContributionsOCL>();
|
||||
#else
|
||||
OPM_THROW(std::runtime_error, "Cannot initialize well contributions: OpenCL is not enabled");
|
||||
#endif
|
||||
}
|
||||
else if(accelerator_mode.compare("fpga") == 0){
|
||||
// unused for FPGA, but must be defined to avoid error
|
||||
return std::make_unique<WellContributions>();
|
||||
}
|
||||
else if(accelerator_mode.compare("amgcl") == 0){
|
||||
if (!useWellConn) {
|
||||
OPM_THROW(std::logic_error, "Error amgcl requires --matrix-add-well-contributions=true");
|
||||
}
|
||||
return std::make_unique<WellContributions>();
|
||||
}
|
||||
else{
|
||||
OPM_THROW(std::logic_error, "Invalid accelerator mode");
|
||||
}
|
||||
}
|
||||
|
||||
WellContributions::~WellContributions()
|
||||
void WellContributions::addMatrix([[maybe_unused]] MatrixType type,
|
||||
[[maybe_unused]] int* colIndices,
|
||||
[[maybe_unused]] double* values,
|
||||
[[maybe_unused]] unsigned int val_size)
|
||||
{
|
||||
multisegments.clear();
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(cuda_gpu){
|
||||
freeCudaMemory(); // should come before 'delete[] h_x'
|
||||
}
|
||||
#if !HAVE_CUDA && !HAVE_OPENCL
|
||||
OPM_THROW(std::logic_error, "Error cannot add StandardWell matrix on GPU because neither CUDA nor OpenCL were found by cmake");
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
if(opencl_gpu){
|
||||
if(num_ms_wells > 0){
|
||||
delete[] h_x;
|
||||
delete[] h_y;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void WellContributions::setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_){
|
||||
this->context = context_;
|
||||
this->queue = queue_;
|
||||
}
|
||||
|
||||
void WellContributions::setKernel(Opm::Accelerator::stdwell_apply_kernel_type *kernel_,
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_){
|
||||
this->kernel = kernel_;
|
||||
this->kernel_no_reorder = kernel_no_reorder_;
|
||||
}
|
||||
|
||||
void WellContributions::setReordering(int *h_toOrder_, bool reorder_)
|
||||
{
|
||||
this->h_toOrder = h_toOrder_;
|
||||
this->reorder = reorder_;
|
||||
}
|
||||
|
||||
void WellContributions::apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if (reorder) {
|
||||
OpenclKernels::apply_stdwells_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
} else {
|
||||
OpenclKernels::apply_stdwells_no_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributions::apply_mswells(cl::Buffer d_x, cl::Buffer d_y){
|
||||
if(h_x == nullptr){
|
||||
h_x = new double[N];
|
||||
h_y = new double[N];
|
||||
}
|
||||
|
||||
events.resize(2);
|
||||
queue->enqueueReadBuffer(d_x, CL_FALSE, 0, sizeof(double) * N, h_x, nullptr, &events[0]);
|
||||
queue->enqueueReadBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for (auto& well : multisegments) {
|
||||
well->setReordering(h_toOrder, reorder);
|
||||
well->apply(h_x, h_y);
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y, nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
|
||||
void WellContributions::apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if(num_std_wells > 0){
|
||||
apply_stdwells(d_x, d_y, d_toOrder);
|
||||
}
|
||||
|
||||
if(num_ms_wells > 0){
|
||||
apply_mswells(d_x, d_y);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void WellContributions::addMatrix([[maybe_unused]] MatrixType type, [[maybe_unused]] int *colIndices, [[maybe_unused]] double *values, [[maybe_unused]] unsigned int val_size)
|
||||
{
|
||||
if (!allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
|
||||
}
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(cuda_gpu){
|
||||
addMatrixGpu(type, colIndices, values, val_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
if(opencl_gpu){
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Cnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Ccols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::D:
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_Dnnzs_ocl, CL_FALSE, sizeof(double) * num_std_wells_so_far * dim_wells * dim_wells, sizeof(double) * dim_wells * dim_wells, values, nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::B:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Bnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Bcols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
val_pointers[num_std_wells_so_far] = num_blocks_so_far;
|
||||
if (num_std_wells_so_far == num_std_wells - 1) {
|
||||
val_pointers[num_std_wells] = num_blocks;
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_val_pointers_ocl, CL_FALSE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers.data(), nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
OPM_THROW(std::logic_error, "Error unsupported matrix ID for WellContributions::addMatrix()");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
this->APIaddMatrix(type, colIndices, values, val_size);
|
||||
|
||||
if(MatrixType::B == type) {
|
||||
num_blocks_so_far += val_size;
|
||||
num_std_wells_so_far++;
|
||||
}
|
||||
|
||||
#if !HAVE_CUDA && !HAVE_OPENCL
|
||||
OPM_THROW(std::logic_error, "Error cannot add StandardWell matrix on GPU because neither CUDA nor OpenCL were found by cmake");
|
||||
#endif
|
||||
}
|
||||
|
||||
void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
|
||||
@ -225,22 +115,7 @@ void WellContributions::alloc()
|
||||
if (num_std_wells > 0) {
|
||||
val_pointers.resize(num_std_wells+1);
|
||||
|
||||
#if HAVE_CUDA
|
||||
if(cuda_gpu){
|
||||
allocStandardWells();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
if(opencl_gpu){
|
||||
d_Cnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Dnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
d_Bnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Ccols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_Bcols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_val_pointers_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
}
|
||||
#endif
|
||||
this->APIalloc();
|
||||
allocated = true;
|
||||
}
|
||||
}
|
||||
|
@ -20,15 +20,6 @@
|
||||
#ifndef WELLCONTRIBUTIONS_HEADER_INCLUDED
|
||||
#define WELLCONTRIBUTIONS_HEADER_INCLUDED
|
||||
|
||||
#if HAVE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
#include <opm/simulators/linalg/bda/opencl.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclKernels.hpp>
|
||||
#endif
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
@ -61,6 +52,8 @@ namespace Opm
|
||||
class WellContributions
|
||||
{
|
||||
public:
|
||||
static std::unique_ptr<WellContributions> create(const std::string& accelerator_mode, bool useWellConn);
|
||||
|
||||
#if DUNE_VERSION_NEWER(DUNE_ISTL, 2, 7)
|
||||
using UMFPackIndex = SuiteSparse_long;
|
||||
#else
|
||||
@ -73,9 +66,7 @@ public:
|
||||
B
|
||||
};
|
||||
|
||||
private:
|
||||
bool opencl_gpu = false;
|
||||
bool cuda_gpu = false;
|
||||
protected:
|
||||
bool allocated = false;
|
||||
|
||||
unsigned int N; // number of rows (not blockrows) in vectors x and y
|
||||
@ -88,80 +79,9 @@ private:
|
||||
unsigned int num_std_wells_so_far = 0; // keep track of where next data is written
|
||||
std::vector<unsigned int> val_pointers; // val_pointers[wellID] == index of first block for this well in Ccols and Bcols
|
||||
|
||||
double *h_x = nullptr;
|
||||
double *h_y = nullptr;
|
||||
std::vector<std::unique_ptr<MultisegmentWellContribution>> multisegments;
|
||||
|
||||
#if HAVE_OPENCL
|
||||
cl::Context *context;
|
||||
cl::CommandQueue *queue;
|
||||
Opm::Accelerator::stdwell_apply_kernel_type *kernel;
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder;
|
||||
std::vector<cl::Event> events;
|
||||
|
||||
std::unique_ptr<cl::Buffer> d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_Ccols_ocl, d_Bcols_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_val_pointers_ocl;
|
||||
|
||||
bool reorder = false;
|
||||
int *h_toOrder = nullptr;
|
||||
#endif
|
||||
|
||||
#if HAVE_CUDA
|
||||
cudaStream_t stream;
|
||||
|
||||
// data for StandardWells, could remain nullptrs if not used
|
||||
double *d_Cnnzs = nullptr;
|
||||
double *d_Dnnzs = nullptr;
|
||||
double *d_Bnnzs = nullptr;
|
||||
int *d_Ccols = nullptr;
|
||||
int *d_Bcols = nullptr;
|
||||
double *d_z1 = nullptr;
|
||||
double *d_z2 = nullptr;
|
||||
unsigned int *d_val_pointers = nullptr;
|
||||
|
||||
/// Allocate GPU memory for StandardWells
|
||||
void allocStandardWells();
|
||||
|
||||
/// Free GPU memory allocated with cuda.
|
||||
void freeCudaMemory();
|
||||
|
||||
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
||||
/// \param[in] type indicate if C, D or B is sent
|
||||
/// \param[in] colIndices columnindices of blocks in C or B, ignored for D
|
||||
/// \param[in] values array of nonzeroes
|
||||
/// \param[in] val_size number of blocks in C or B, ignored for D
|
||||
void addMatrixGpu(MatrixType type, int *colIndices, double *values, unsigned int val_size);
|
||||
#endif
|
||||
|
||||
public:
|
||||
#if HAVE_CUDA
|
||||
/// Set a cudaStream to be used
|
||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||
void setCudaStream(cudaStream_t stream);
|
||||
|
||||
/// Apply all Wells in this object
|
||||
/// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
|
||||
/// \param[in] d_x vector x, must be on GPU
|
||||
/// \param[inout] d_y vector y, must be on GPU
|
||||
void apply(double *d_x, double *d_y);
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void setKernel(Opm::Accelerator::stdwell_apply_kernel_type *kernel_,
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_);
|
||||
void setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_);
|
||||
|
||||
/// Since the rows of the matrix are reordered, the columnindices of the matrixdata is incorrect
|
||||
/// Those indices need to be mapped via toOrder
|
||||
/// \param[in] toOrder array with mappings
|
||||
/// \param[in] reorder whether reordering is actually used or not
|
||||
void setReordering(int *toOrder, bool reorder);
|
||||
void apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
void apply_mswells(cl::Buffer d_x, cl::Buffer d_y);
|
||||
void apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
#endif
|
||||
|
||||
unsigned int getNumWells(){
|
||||
return num_std_wells + num_ms_wells;
|
||||
}
|
||||
@ -173,13 +93,8 @@ public:
|
||||
/// Allocate memory for the StandardWells
|
||||
void alloc();
|
||||
|
||||
/// Create a new WellContributions
|
||||
/// \param[in] accelerator_mode string indicating which solverBackend is used
|
||||
/// \param[in] useWellConn true iff wellcontributions are added to the matrix
|
||||
WellContributions(std::string accelerator_mode, bool useWellConn);
|
||||
|
||||
/// Destroy a WellContributions, and free memory
|
||||
~WellContributions();
|
||||
/// Empty destructor.
|
||||
virtual ~WellContributions() = default;
|
||||
|
||||
/// Indicate how large the blocks of the StandardWell (C and B) are
|
||||
/// \param[in] dim number of columns
|
||||
@ -212,6 +127,12 @@ public:
|
||||
unsigned int DnumBlocks, double *Dvalues,
|
||||
UMFPackIndex *DcolPointers, UMFPackIndex *DrowIndices,
|
||||
std::vector<double> &Cvalues);
|
||||
protected:
|
||||
//! \brief API specific allocation.
|
||||
virtual void APIalloc() {}
|
||||
|
||||
/// Api specific upload of matrix.
|
||||
virtual void APIaddMatrix(MatrixType, int*, double*, unsigned int) {}
|
||||
};
|
||||
} //namespace Opm
|
||||
|
||||
|
@ -22,12 +22,13 @@
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "opm/simulators/linalg/bda/cuWellContributions.hpp"
|
||||
|
||||
#include "opm/simulators/linalg/bda/cuda_header.hpp"
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include "opm/simulators/linalg/bda/WellContributions.hpp"
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
@ -125,18 +126,8 @@ __global__ void apply_well_contributions(
|
||||
|
||||
}
|
||||
|
||||
void WellContributions::allocStandardWells()
|
||||
WellContributionsCuda::~WellContributionsCuda()
|
||||
{
|
||||
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_val_pointers, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
cudaCheckLastError("apply_gpu malloc failed");
|
||||
}
|
||||
|
||||
void WellContributions::freeCudaMemory() {
|
||||
// delete data for StandardWell
|
||||
if (num_std_wells > 0) {
|
||||
cudaFree(d_Cnnzs);
|
||||
@ -154,10 +145,20 @@ void WellContributions::freeCudaMemory() {
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsCuda::APIalloc()
|
||||
{
|
||||
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Ccols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_Bcols, sizeof(int) * num_blocks);
|
||||
cudaMalloc((void**)&d_val_pointers, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
cudaCheckLastError("apply_gpu malloc failed");
|
||||
}
|
||||
|
||||
// Apply the WellContributions, similar to StandardWell::apply()
|
||||
// y -= (C^T *(D^-1*( B*x)))
|
||||
void WellContributions::apply(double *d_x, double *d_y)
|
||||
void WellContributionsCuda::apply(double *d_x, double *d_y)
|
||||
{
|
||||
// apply MultisegmentWells
|
||||
|
||||
@ -194,7 +195,7 @@ void WellContributions::apply(double *d_x, double *d_y)
|
||||
}
|
||||
|
||||
|
||||
void WellContributions::addMatrixGpu(MatrixType type, int *colIndices, double *values, unsigned int val_size)
|
||||
void WellContributionsCuda::APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size)
|
||||
{
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
@ -219,7 +220,7 @@ void WellContributions::addMatrixGpu(MatrixType type, int *colIndices, double *v
|
||||
cudaCheckLastError("WellContributions::addMatrix() failed");
|
||||
}
|
||||
|
||||
void WellContributions::setCudaStream(cudaStream_t stream_)
|
||||
void WellContributionsCuda::setCudaStream(cudaStream_t stream_)
|
||||
{
|
||||
this->stream = stream_;
|
||||
for (auto& well : multisegments) {
|
78
opm/simulators/linalg/bda/cuWellContributions.hpp
Normal file
78
opm/simulators/linalg/bda/cuWellContributions.hpp
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef WELLCONTRIBUTIONS_CUDA_HEADER_INCLUDED
|
||||
#define WELLCONTRIBUTIONS_CUDA_HEADER_INCLUDED
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
class WellContributionsCuda : public WellContributions
|
||||
{
|
||||
public:
|
||||
~WellContributionsCuda() override;
|
||||
|
||||
/// Set a cudaStream to be used
|
||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||
void setCudaStream(cudaStream_t stream);
|
||||
|
||||
/// Apply all Wells in this object
|
||||
/// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
|
||||
/// \param[in] d_x vector x, must be on GPU
|
||||
/// \param[inout] d_y vector y, must be on GPU
|
||||
void apply(double *d_x, double *d_y);
|
||||
|
||||
protected:
|
||||
/// Allocate memory for the StandardWells
|
||||
void APIalloc() override;
|
||||
|
||||
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
||||
/// \param[in] type indicate if C, D or B is sent
|
||||
/// \param[in] colIndices columnindices of blocks in C or B, ignored for D
|
||||
/// \param[in] values array of nonzeroes
|
||||
/// \param[in] val_size number of blocks in C or B, ignored for D
|
||||
void APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size) override;
|
||||
|
||||
cudaStream_t stream;
|
||||
|
||||
// data for StandardWells, could remain nullptrs if not used
|
||||
double *d_Cnnzs = nullptr;
|
||||
double *d_Dnnzs = nullptr;
|
||||
double *d_Bnnzs = nullptr;
|
||||
int *d_Ccols = nullptr;
|
||||
int *d_Bcols = nullptr;
|
||||
double *d_z1 = nullptr;
|
||||
double *d_z2 = nullptr;
|
||||
unsigned int *d_val_pointers = nullptr;
|
||||
double* h_x = nullptr;
|
||||
double* h_y = nullptr;
|
||||
|
||||
};
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
#endif
|
@ -26,6 +26,7 @@
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
#include <opm/simulators/linalg/bda/cusparseSolverBackend.hpp>
|
||||
#include <opm/simulators/linalg/bda/cuWellContributions.hpp>
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
#include <opm/simulators/linalg/bda/cuda_header.hpp>
|
||||
|
||||
@ -72,7 +73,7 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
||||
float it;
|
||||
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
wellContribs.setCudaStream(stream);
|
||||
static_cast<WellContributionsCuda&>(wellContribs).setCudaStream(stream);
|
||||
}
|
||||
|
||||
cusparseDbsrmv(cusparseHandle, order, operation, Nb, Nb, nnzb, &one, descr_M, d_bVals, d_bRows, d_bCols, block_size, d_x, &zero, d_r);
|
||||
@ -116,7 +117,7 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
wellContribs.apply(d_pw, d_v);
|
||||
static_cast<WellContributionsCuda&>(wellContribs).apply(d_pw, d_v);
|
||||
}
|
||||
|
||||
cublasDdot(cublasHandle, n, d_rw, 1, d_v, 1, &tmp1);
|
||||
@ -147,7 +148,7 @@ void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellCon
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
wellContribs.apply(d_s, d_t);
|
||||
static_cast<WellContributionsCuda&>(wellContribs).apply(d_s, d_t);
|
||||
}
|
||||
|
||||
cublasDdot(cublasHandle, n, d_t, 1, d_r, 1, &tmp1);
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
#include <opm/simulators/linalg/bda/openclSolverBackend.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
#include <opm/simulators/linalg/bda/Reorder.hpp>
|
||||
@ -262,7 +263,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
// apply wellContributions
|
||||
t_well.start();
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
wellContribs.apply(d_pw, d_v, d_toOrder);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).apply(d_pw, d_v, d_toOrder);
|
||||
}
|
||||
t_well.stop();
|
||||
|
||||
@ -293,7 +294,7 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
|
||||
// apply wellContributions
|
||||
t_well.start();
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
wellContribs.apply(d_s, d_t, d_toOrder);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).apply(d_s, d_t, d_toOrder);
|
||||
}
|
||||
t_well.stop();
|
||||
|
||||
@ -520,10 +521,10 @@ void openclSolverBackend<block_size>::update_system(double *vals, double *b, Wel
|
||||
mat->nnzValues = vals;
|
||||
if (opencl_ilu_reorder != ILUReorder::NONE) {
|
||||
reorderBlockedVectorByPattern<block_size>(mat->Nb, b, fromOrder, rb);
|
||||
wellContribs.setReordering(toOrder, true);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).setReordering(toOrder, true);
|
||||
} else {
|
||||
rb = b;
|
||||
wellContribs.setReordering(nullptr, false);
|
||||
static_cast<WellContributionsOCL&>(wellContribs).setReordering(nullptr, false);
|
||||
}
|
||||
|
||||
if (verbosity > 2) {
|
||||
|
154
opm/simulators/linalg/bda/openclWellContributions.cpp
Normal file
154
opm/simulators/linalg/bda/openclWellContributions.cpp
Normal file
@ -0,0 +1,154 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h> // CMake
|
||||
|
||||
#include <opm/simulators/linalg/bda/openclWellContributions.hpp>
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
using Accelerator::OpenclKernels;
|
||||
|
||||
void WellContributionsOCL::setOpenCLEnv(cl::Context* context_, cl::CommandQueue* queue_) {
|
||||
this->context = context_;
|
||||
this->queue = queue_;
|
||||
}
|
||||
|
||||
void WellContributionsOCL::setKernel(Accelerator::stdwell_apply_kernel_type* kernel_,
|
||||
Accelerator::stdwell_apply_no_reorder_kernel_type* kernel_no_reorder_) {
|
||||
this->kernel = kernel_;
|
||||
this->kernel_no_reorder = kernel_no_reorder_;
|
||||
}
|
||||
|
||||
void WellContributionsOCL::setReordering(int* h_toOrder_, bool reorder_)
|
||||
{
|
||||
this->h_toOrder = h_toOrder_;
|
||||
this->reorder = reorder_;
|
||||
}
|
||||
|
||||
void WellContributionsOCL::apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if (reorder) {
|
||||
OpenclKernels::apply_stdwells_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, d_toOrder, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
} else {
|
||||
OpenclKernels::apply_stdwells_no_reorder(*d_Cnnzs_ocl, *d_Dnnzs_ocl, *d_Bnnzs_ocl, *d_Ccols_ocl, *d_Bcols_ocl,
|
||||
d_x, d_y, dim, dim_wells, *d_val_pointers_ocl, num_std_wells);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCL::apply_mswells(cl::Buffer d_x, cl::Buffer d_y){
|
||||
if (h_x.empty()) {
|
||||
h_x.resize(N);
|
||||
h_y.resize(N);
|
||||
}
|
||||
|
||||
events.resize(2);
|
||||
queue->enqueueReadBuffer(d_x, CL_FALSE, 0, sizeof(double) * N, h_x.data(), nullptr, &events[0]);
|
||||
queue->enqueueReadBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y.data(), nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for (auto& well : multisegments) {
|
||||
well->setReordering(h_toOrder, reorder);
|
||||
well->apply(h_x.data(), h_y.data());
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(d_y, CL_FALSE, 0, sizeof(double) * N, h_y.data(), nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
|
||||
void WellContributionsOCL::apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder){
|
||||
if(num_std_wells > 0){
|
||||
apply_stdwells(d_x, d_y, d_toOrder);
|
||||
}
|
||||
|
||||
if(num_ms_wells > 0){
|
||||
apply_mswells(d_x, d_y);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCL::APIaddMatrix(MatrixType type,
|
||||
int* colIndices,
|
||||
double* values,
|
||||
unsigned int val_size)
|
||||
{
|
||||
if (!allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Cnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Ccols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::D:
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_Dnnzs_ocl, CL_FALSE, sizeof(double) * num_std_wells_so_far * dim_wells * dim_wells, sizeof(double) * dim_wells * dim_wells, values, nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
break;
|
||||
|
||||
case MatrixType::B:
|
||||
events.resize(2);
|
||||
queue->enqueueWriteBuffer(*d_Bnnzs_ocl, CL_FALSE, sizeof(double) * num_blocks_so_far * dim * dim_wells, sizeof(double) * val_size * dim * dim_wells, values, nullptr, &events[0]);
|
||||
queue->enqueueWriteBuffer(*d_Bcols_ocl, CL_FALSE, sizeof(int) * num_blocks_so_far, sizeof(int) * val_size, colIndices, nullptr, &events[1]);
|
||||
cl::WaitForEvents(events);
|
||||
events.clear();
|
||||
|
||||
val_pointers[num_std_wells_so_far] = num_blocks_so_far;
|
||||
if (num_std_wells_so_far == num_std_wells - 1) {
|
||||
val_pointers[num_std_wells] = num_blocks;
|
||||
events.resize(1);
|
||||
queue->enqueueWriteBuffer(*d_val_pointers_ocl, CL_FALSE, 0, sizeof(unsigned int) * (num_std_wells + 1), val_pointers.data(), nullptr, &events[0]);
|
||||
events[0].wait();
|
||||
events.clear();
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
OPM_THROW(std::logic_error, "Error unsupported matrix ID for WellContributionsOCL::addMatrix()");
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsOCL::APIalloc()
|
||||
{
|
||||
d_Cnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Dnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
d_Bnnzs_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
d_Ccols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_Bcols_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(int) * num_blocks);
|
||||
d_val_pointers_ocl = std::make_unique<cl::Buffer>(*context, CL_MEM_READ_WRITE, sizeof(unsigned int) * (num_std_wells + 1));
|
||||
}
|
||||
|
||||
} //namespace Opm
|
75
opm/simulators/linalg/bda/openclWellContributions.hpp
Normal file
75
opm/simulators/linalg/bda/openclWellContributions.hpp
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef WELLCONTRIBUTIONS_OPENCL_HEADER_INCLUDED
|
||||
#define WELLCONTRIBUTIONS_OPENCL_HEADER_INCLUDED
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/opencl.hpp>
|
||||
#include <opm/simulators/linalg/bda/openclKernels.hpp>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
class WellContributionsOCL : public WellContributions
|
||||
{
|
||||
public:
|
||||
void setKernel(Opm::Accelerator::stdwell_apply_kernel_type *kernel_,
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type *kernel_no_reorder_);
|
||||
void setOpenCLEnv(cl::Context *context_, cl::CommandQueue *queue_);
|
||||
|
||||
/// Since the rows of the matrix are reordered, the columnindices of the matrixdata is incorrect
|
||||
/// Those indices need to be mapped via toOrder
|
||||
/// \param[in] toOrder array with mappings
|
||||
/// \param[in] reorder whether reordering is actually used or not
|
||||
void setReordering(int* toOrder, bool reorder);
|
||||
void apply_stdwells(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
void apply_mswells(cl::Buffer d_x, cl::Buffer d_y);
|
||||
void apply(cl::Buffer d_x, cl::Buffer d_y, cl::Buffer d_toOrder);
|
||||
|
||||
protected:
|
||||
/// Allocate memory for the StandardWells
|
||||
void APIalloc() override;
|
||||
|
||||
void APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size) override;
|
||||
|
||||
cl::Context* context;
|
||||
cl::CommandQueue* queue;
|
||||
Opm::Accelerator::stdwell_apply_kernel_type* kernel;
|
||||
Opm::Accelerator::stdwell_apply_no_reorder_kernel_type* kernel_no_reorder;
|
||||
std::vector<cl::Event> events;
|
||||
|
||||
std::unique_ptr<cl::Buffer> d_Cnnzs_ocl, d_Dnnzs_ocl, d_Bnnzs_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_Ccols_ocl, d_Bcols_ocl;
|
||||
std::unique_ptr<cl::Buffer> d_val_pointers_ocl;
|
||||
|
||||
bool reorder = false;
|
||||
int *h_toOrder = nullptr;
|
||||
std::vector<double> h_x;
|
||||
std::vector<double> h_y;
|
||||
};
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
#endif
|
@ -81,18 +81,22 @@ testCusparseSolver(const boost::property_tree::ptree& prm, const std::string& ma
|
||||
Dune::InverseOperatorResult result;
|
||||
|
||||
Vector x(rhs.size());
|
||||
Opm::WellContributions wellContribs("cusparse", false);
|
||||
auto wellContribs = Opm::WellContributions::create("cusparse", false);
|
||||
std::unique_ptr<Opm::BdaBridge<Matrix, Vector, bz> > bridge;
|
||||
try {
|
||||
bridge = std::make_unique<Opm::BdaBridge<Matrix, Vector, bz> >(gpu_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder);
|
||||
|
||||
bridge->solve_system(&matrix, rhs, *wellContribs, result);
|
||||
bridge->get_result(x);
|
||||
|
||||
return x;
|
||||
} catch (const std::logic_error& error) {
|
||||
BOOST_WARN_MESSAGE(true, error.what());
|
||||
throw DeviceInitException(error.what());
|
||||
if (strstr(error.what(), "Could not get device") != nullptr)
|
||||
throw DeviceInitException(error.what());
|
||||
else
|
||||
throw error;
|
||||
}
|
||||
bridge->solve_system(&matrix, rhs, wellContribs, result);
|
||||
bridge->get_result(x);
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
namespace pt = boost::property_tree;
|
||||
|
@ -80,7 +80,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, const std::string& matr
|
||||
Dune::InverseOperatorResult result;
|
||||
|
||||
Vector x(rhs.size());
|
||||
Opm::WellContributions wellContribs("opencl", false);
|
||||
auto wellContribs = Opm::WellContributions::create("opencl", false);
|
||||
std::unique_ptr<Opm::BdaBridge<Matrix, Vector, bz> > bridge;
|
||||
try {
|
||||
bridge = std::make_unique<Opm::BdaBridge<Matrix, Vector, bz> >(gpu_mode, fpga_bitstream, linear_solver_verbosity, maxit, tolerance, platformID, deviceID, opencl_ilu_reorder);
|
||||
@ -88,7 +88,7 @@ testOpenclSolver(const boost::property_tree::ptree& prm, const std::string& matr
|
||||
BOOST_WARN_MESSAGE(true, error.what());
|
||||
throw PlatformInitException(error.what());
|
||||
}
|
||||
bridge->solve_system(&matrix, rhs, wellContribs, result);
|
||||
bridge->solve_system(&matrix, rhs, *wellContribs, result);
|
||||
bridge->get_result(x);
|
||||
|
||||
return x;
|
||||
|
Loading…
Reference in New Issue
Block a user