Changed platformID and deviceID to commandline parameter. Make sure Flow exits cleanly upon invalid platform- or deviceID.

This commit is contained in:
T.D. (Tongdong) Qiu 2020-07-01 19:43:22 +02:00
parent c8eb14aaac
commit bb622449b6
12 changed files with 108 additions and 63 deletions

View File

@ -68,6 +68,8 @@ NEW_PROP_TAG(CprReuseSetup);
NEW_PROP_TAG(LinearSolverConfiguration);
NEW_PROP_TAG(LinearSolverConfigurationJsonFile);
NEW_PROP_TAG(GpuMode);
NEW_PROP_TAG(BdaDeviceId);
NEW_PROP_TAG(OpenclPlatformId);
SET_SCALAR_PROP(FlowIstlSolverParams, LinearSolverReduction, 1e-2);
SET_SCALAR_PROP(FlowIstlSolverParams, IluRelaxation, 0.9);
@ -95,6 +97,8 @@ SET_INT_PROP(FlowIstlSolverParams, CprReuseSetup, 3);
SET_STRING_PROP(FlowIstlSolverParams, LinearSolverConfiguration, "ilu0");
SET_STRING_PROP(FlowIstlSolverParams, LinearSolverConfigurationJsonFile, "none");
SET_STRING_PROP(FlowIstlSolverParams, GpuMode, "none");
SET_INT_PROP(FlowIstlSolverParams, BdaDeviceId, 0);
SET_INT_PROP(FlowIstlSolverParams, OpenclPlatformId, 0);
@ -168,6 +172,8 @@ namespace Opm
std::string linear_solver_configuration_;
std::string linear_solver_configuration_json_file_;
std::string gpu_mode_;
int bda_device_id_;
int opencl_platform_id_;
template <class TypeTag>
void init()
@ -197,6 +203,8 @@ namespace Opm
linear_solver_configuration_ = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolverConfiguration);
linear_solver_configuration_json_file_ = EWOMS_GET_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile);
gpu_mode_ = EWOMS_GET_PARAM(TypeTag, std::string, GpuMode);
bda_device_id_ = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
opencl_platform_id_ = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
}
template <class TypeTag>
@ -226,6 +234,8 @@ namespace Opm
EWOMS_REGISTER_PARAM(TypeTag, std::string, LinearSolverConfiguration, "Configuration of solver valid is: ilu0 (default), cpr_quasiimpes, cpr_trueimpes or file (specified in LinearSolverConfigurationJsonFile) ");
EWOMS_REGISTER_PARAM(TypeTag, std::string, LinearSolverConfigurationJsonFile, "Filename of JSON configuration for flexible linear solver system.");
EWOMS_REGISTER_PARAM(TypeTag, std::string, GpuMode, "Use GPU cusparseSolver or openclSolver as the linear solver");
EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, too high value could lead to errors");
EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, too high value could lead to errors");
}
FlowLinearSolverParameters() { reset(); }
@ -248,6 +258,8 @@ namespace Opm
ilu_redblack_ = false;
ilu_reorder_sphere_ = true;
gpu_mode_ = "none";
bda_device_id_ = 0;
opencl_platform_id_ = 0;
}
};

View File

@ -319,6 +319,8 @@ protected:
const auto& gridForConn = simulator_.vanguard().grid();
#if HAVE_CUDA || HAVE_OPENCL
std::string gpu_mode = EWOMS_GET_PARAM(TypeTag, std::string, GpuMode);
int platformID = EWOMS_GET_PARAM(TypeTag, int, OpenclPlatformId);
int deviceID = EWOMS_GET_PARAM(TypeTag, int, BdaDeviceId);
if (gridForConn.comm().size() > 1 && gpu_mode.compare("none") != 0) {
OpmLog::warning("Warning cannot use GPU with MPI, GPU is disabled");
gpu_mode = "none";
@ -326,7 +328,7 @@ protected:
const int maxit = EWOMS_GET_PARAM(TypeTag, int, LinearSolverMaxIter);
const double tolerance = EWOMS_GET_PARAM(TypeTag, double, LinearSolverReduction);
const int linear_solver_verbosity = parameters_.linear_solver_verbosity_;
bdaBridge.reset(new BdaBridge<Matrix, Vector, block_size>(gpu_mode, linear_solver_verbosity, maxit, tolerance));
bdaBridge.reset(new BdaBridge<Matrix, Vector, block_size>(gpu_mode, linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
#else
const std::string gpu_mode = EWOMS_GET_PARAM(TypeTag, std::string, GpuMode);
if (gpu_mode.compare("none") != 0) {

View File

@ -329,6 +329,7 @@ namespace bda
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template BILU0<n>::BILU0(bool, bool, int); \
template BILU0<n>::~BILU0(); \
template bool BILU0<n>::init(BlockedMatrix*); \
template bool BILU0<n>::create_preconditioner(BlockedMatrix*); \
template void BILU0<n>::apply(cl::Buffer& x, cl::Buffer& y); \

View File

@ -39,11 +39,11 @@ namespace bda
int Nb; // number of blockrows of the matrix
int nnz; // number of nonzeroes of the matrix (scalar)
int nnzbs; // number of blocks of the matrix
BlockedMatrix *Lmat, *Umat, *LUmat;
BlockedMatrix *Lmat = nullptr, *Umat = nullptr, *LUmat = nullptr;
BlockedMatrix *rmat = nullptr; // only used with PAR_SIM
double *invDiagVals;
int *diagIndex, *rowsPerColor;
int *toOrder, *fromOrder;
double *invDiagVals = nullptr;
int *diagIndex = nullptr, *rowsPerColor = nullptr;
int *toOrder = nullptr, *fromOrder = nullptr;
int numColors;
int verbosity;

View File

@ -41,19 +41,19 @@ namespace Opm
using bda::BdaSolverStatus;
template <class BridgeMatrix, class BridgeVector, int block_size>
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance)
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID)
{
if (gpu_mode.compare("cusparse") == 0) {
#if HAVE_CUDA
use_gpu = true;
backend.reset(new bda::cusparseSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance));
backend.reset(new bda::cusparseSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, deviceID));
#else
OPM_THROW(std::logic_error, "Error cusparseSolver was chosen, but CUDA was not found by CMake");
#endif
} else if (gpu_mode.compare("opencl") == 0) {
#if HAVE_OPENCL
use_gpu = true;
backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance));
backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
#else
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
#endif
@ -215,23 +215,23 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::get_result(BridgeVector
}
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>::BdaBridge \
(std::string gpu_mode_, int linear_solver_verbosity, int maxit, double tolerance); \
\
template void BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>::solve_system \
(Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >*, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >&, \
WellContributions&, InverseOperatorResult&); \
\
template void BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>::get_result \
(Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >&); \
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>::BdaBridge \
(std::string gpu_mode_, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID); \
\
template void BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>::solve_system \
(Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >*, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >&, \
WellContributions&, InverseOperatorResult&); \
\
template void BdaBridge<Dune::BCRSMatrix<Opm::MatrixBlock<double, n, n>, std::allocator<Opm::MatrixBlock<double, n, n> > >, \
Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >, \
n>::get_result \
(Dune::BlockVector<Dune::FieldVector<double, n>, std::allocator<Dune::FieldVector<double, n> > >&); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);

View File

@ -54,7 +54,9 @@ public:
/// \param[in] linear_solver_verbosity verbosity of BdaSolver
/// \param[in] maxit maximum number of iterations for BdaSolver
/// \param[in] tolerance required relative tolerance for BdaSolver
BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance);
/// \param[in] platformID the OpenCL platform ID to be used
/// \param[in] deviceID the device ID to be used by the cusparse- and openclSolvers, too high values could cause runtime errors
BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
/// Solve linear system, A*x = b

View File

@ -55,11 +55,21 @@ namespace bda
int nnz; // number of nonzeroes (scalars)
int nnzb; // number of nonzero blocks (nnzb*block_size*block_size == nnz)
unsigned int platformID = 0; // ID of OpenCL platform to be used, only used by openclSolver now
unsigned int deviceID = 0; // ID of the device to be used
bool initialized = false;
public:
BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_) {};
/// Construct a BdaSolver, can be cusparseSolver or openclSolver
/// \param[in] linear_solver_verbosity verbosity of solver
/// \param[in] maxit maximum number of iterations for solver
/// \param[in] tolerance required relative tolerance for solver
/// \param[in] platformID the OpenCL platform to be used, only used in openclSolver
/// \param[in] deviceID the device to be used
BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_, unsigned int deviceID_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_), deviceID(deviceID_) {};
BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_), platformID(platformID_), deviceID(deviceID_) {};
/// Define virtual destructor, so that the derivedclass destructor will be called
virtual ~BdaSolver() {};

View File

@ -21,7 +21,10 @@
#define CUDA_HEADER_HEADER_INCLUDED
#include <cuda_runtime.h>
#include <iostream>
#include <sstream>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>
/// Runtime error checking of CUDA functions
/// Usage:
@ -33,9 +36,10 @@
inline void __cudaCheckError(const char *file, const int line, const char *msg){
cudaError err = cudaGetLastError();
if (cudaSuccess != err){
std::cerr << "cudaCheckError() failed at " << file << ":" << line << ": " << cudaGetErrorString(err) << std::endl;
std::cerr << "BDA error message: " << msg << std::endl;
exit(1);
std::ostringstream out;
out << cudaGetErrorString(err) << "\n";
out << "BDA error message: " << msg << "\n";
OPM_THROW(std::logic_error, out.str());
}
}

View File

@ -49,7 +49,7 @@ const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
template <unsigned int block_size>
cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {}
cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, deviceID_) {}
template <unsigned int block_size>
cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
@ -198,7 +198,6 @@ void cusparseSolverBackend<block_size>::initialize(int N, int nnz, int dim) {
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance;
OpmLog::info(out.str());
int deviceID = 0;
cudaSetDevice(deviceID);
cudaCheckLastError("Could not get device");
struct cudaDeviceProp props;
@ -501,8 +500,8 @@ Status cusparseSolverBackend<block_size>::solve_system(int N, int nnz, int dim,
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template cusparseSolverBackend<n>::cusparseSolverBackend(int, int, double); \
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template cusparseSolverBackend<n>::cusparseSolverBackend(int, int, double, unsigned int); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);

View File

@ -42,6 +42,7 @@ class cusparseSolverBackend : public BdaSolver<block_size> {
using Base::nnz;
using Base::nnzb;
using Base::verbosity;
using Base::deviceID;
using Base::maxit;
using Base::tolerance;
using Base::initialized;
@ -117,7 +118,8 @@ public:
/// \param[in] linear_solver_verbosity verbosity of cusparseSolver
/// \param[in] maxit maximum number of iterations for cusparseSolver
/// \param[in] tolerance required relative tolerance for cusparseSolver
cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance);
/// \param[in] deviceID the device to be used
cusparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int deviceID);
/// Destroy a cusparseSolver, and free memory
~cusparseSolverBackend();

View File

@ -52,7 +52,7 @@ using Opm::OpmLog;
using Dune::Timer;
template <unsigned int block_size>
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
prec = new Preconditioner(LEVEL_SCHEDULING, GRAPH_COLORING, verbosity_);
}
@ -318,10 +318,10 @@ void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContr
}
if (verbosity >= 4) {
std::ostringstream out;
out << "openclSolver::ily_apply: " << t_prec.elapsed() << "s\n";
out << "openclSolver::spmv: " << t_spmv.elapsed() << "s\n";
out << "openclSolver::rest: " << t_rest.elapsed() << "s\n";
out << "openclSolver::total_solve: " << res.elapsed << "s\n";
out << "openclSolver::ily_apply: " << t_prec.elapsed() << " s\n";
out << "openclSolver::spmv: " << t_spmv.elapsed() << " s\n";
out << "openclSolver::rest: " << t_rest.elapsed() << " s\n";
out << "openclSolver::total_solve: " << res.elapsed << " s\n";
OpmLog::info(out.str());
}
}
@ -337,34 +337,32 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
std::ostringstream out;
out << "Initializing GPU, matrix size: " << N << " blocks, nnzb: " << nnzb << "\n";
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
OpmLog::info(out.str());
out.str("");
out.clear();
int deviceID = 0;
cl_int err = CL_SUCCESS;
try {
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
if (platforms.size() == 0)
{
if (platforms.size() == 0) {
OPM_THROW(std::logic_error, "Error openclSolver is selected but no OpenCL platforms are found");
}
out << "Found " << platforms.size() << " OpenCL platforms" << "\n\n";
if (verbosity >= 1) {
std::string platform_info;
for (unsigned int platformID = 0; platformID < platforms.size(); ++platformID) {
platforms[platformID].getInfo(CL_PLATFORM_NAME, &platform_info);
for (unsigned int i = 0; i < platforms.size(); ++i) {
platforms[i].getInfo(CL_PLATFORM_NAME, &platform_info);
out << "Platform name : " << platform_info << "\n";
platforms[platformID].getInfo(CL_PLATFORM_VENDOR, &platform_info);
platforms[i].getInfo(CL_PLATFORM_VENDOR, &platform_info);
out << "Platform vendor : " << platform_info << "\n";
platforms[platformID].getInfo(CL_PLATFORM_VERSION, &platform_info);
platforms[i].getInfo(CL_PLATFORM_VERSION, &platform_info);
out << "Platform version : " << platform_info << "\n";
platforms[platformID].getInfo(CL_PLATFORM_PROFILE, &platform_info);
platforms[i].getInfo(CL_PLATFORM_PROFILE, &platform_info);
out << "Platform profile : " << platform_info << "\n";
platforms[platformID].getInfo(CL_PLATFORM_EXTENSIONS, &platform_info);
platforms[i].getInfo(CL_PLATFORM_EXTENSIONS, &platform_info);
out << "Platform extensions: " << platform_info << "\n\n";
}
}
@ -372,7 +370,11 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
out.str("");
out.clear();
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[deviceID])(), 0};
if (platforms.size() <= platformID) {
OPM_THROW(std::logic_error, "Error chosen too high OpenCL platform ID");
}
cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[platformID])(), 0};
context.reset(new cl::Context(CL_DEVICE_TYPE_GPU, properties));
std::vector<cl::Device> devices = context->getInfo<CL_CONTEXT_DEVICES>();
@ -437,6 +439,10 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
}
OpmLog::info(out.str());
if (devices.size() <= deviceID){
OPM_THROW(std::logic_error, "Error chosen too high OpenCL device ID");
}
cl::Program::Sources source(1, std::make_pair(kernel_1, strlen(kernel_1))); // what does this '1' mean? cl::Program::Sources is of type 'std::vector<std::pair<const char*, long unsigned int> >'
source.emplace_back(std::make_pair(kernel_2, strlen(kernel_2)));
source.emplace_back(std::make_pair(axpy_s, strlen(axpy_s)));
@ -451,7 +457,7 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
program_.build(devices);
cl::Event event;
queue.reset(new cl::CommandQueue(*context, devices[0], 0, &err));
queue.reset(new cl::CommandQueue(*context, devices[deviceID], 0, &err));
prec->setOpenCLContext(context.get());
prec->setOpenCLQueue(queue.get());
@ -503,13 +509,15 @@ void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, doub
std::ostringstream oss;
oss << "OpenCL Error: " << error.what() << "(" << error.err() << ")";
OpmLog::error(oss.str());
} catch (std::logic_error error) {
// rethrow exception, without this, a segfault occurs
throw error;
}
initialized = true;
} // end initialize()
template <unsigned int block_size>
void openclSolverBackend<block_size>::finalize() {
delete mat;
@ -518,6 +526,7 @@ void openclSolverBackend<block_size>::finalize() {
#if COPY_ROW_BY_ROW
delete[] vals_contiguous;
#endif
delete prec;
} // end finalize()
@ -700,8 +709,8 @@ Status openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim,
}
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template openclSolverBackend<n>::openclSolverBackend(int, int, double); \
#define INSTANTIATE_BDA_FUNCTIONS(n) \
template openclSolverBackend<n>::openclSolverBackend(int, int, double, unsigned int, unsigned int); \
INSTANTIATE_BDA_FUNCTIONS(1);
INSTANTIATE_BDA_FUNCTIONS(2);

View File

@ -48,6 +48,8 @@ class openclSolverBackend : public BdaSolver<block_size>
using Base::nnz;
using Base::nnzb;
using Base::verbosity;
using Base::platformID;
using Base::deviceID;
using Base::maxit;
using Base::tolerance;
using Base::initialized;
@ -55,8 +57,8 @@ class openclSolverBackend : public BdaSolver<block_size>
private:
double *rb; // reordered b vector, the matrix is reordered, so b must also be
double *vals_contiguous; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
double *rb = nullptr; // reordered b vector, the matrix is reordered, so b must also be
double *vals_contiguous = nullptr; // only used if COPY_ROW_BY_ROW is true in openclSolverBackend.cpp
bool analysis_done = false;
@ -65,7 +67,7 @@ private:
cl::Buffer d_x, d_b, d_rb, d_r, d_rw, d_p; // vectors, used during linear solve
cl::Buffer d_pw, d_s, d_t, d_v; // vectors, used during linear solve
cl::Buffer d_tmp; // used as tmp GPU buffer for dot() and norm()
double *tmp; // used as tmp CPU buffer for dot() and norm()
double *tmp = nullptr; // used as tmp CPU buffer for dot() and norm()
// shared pointers are also passed to BILU0
std::shared_ptr<cl::Context> context;
@ -78,9 +80,9 @@ private:
std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> > ILU_apply1_k;
std::shared_ptr<cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> > ILU_apply2_k;
Preconditioner *prec; // only supported preconditioner is BILU0
int *toOrder, *fromOrder; // BILU0 reorders rows of the matrix via these mappings
BlockedMatrix *mat, *rmat; // normal and reordered matrix
Preconditioner *prec = nullptr; // only supported preconditioner is BILU0
int *toOrder = nullptr, *fromOrder = nullptr; // BILU0 reorders rows of the matrix via these mappings
BlockedMatrix *mat = nullptr, *rmat = nullptr; // normal and reordered matrix
/// Divide A by B, and round up: return (int)ceil(A/B)
@ -175,7 +177,9 @@ public:
/// \param[in] linear_solver_verbosity verbosity of openclSolver
/// \param[in] maxit maximum number of iterations for openclSolver
/// \param[in] tolerance required relative tolerance for openclSolver
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance);
/// \param[in] platformID the OpenCL platform to be used
/// \param[in] deviceID the device to be used
openclSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
/// Destroy a openclSolver, and free memory
~openclSolverBackend();