mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Added block_size template to BdaSolvers and BILU0
This commit is contained in:
parent
2a48f5f63f
commit
98ddf47b44
@ -150,6 +150,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
||||
opm/simulators/linalg/bda/BdaBridge.hpp
|
||||
opm/simulators/linalg/bda/BdaResult.hpp
|
||||
opm/simulators/linalg/bda/BdaSolver.hpp
|
||||
opm/simulators/linalg/bda/BdaSolverStatus.hpp
|
||||
opm/simulators/linalg/bda/BILU0.hpp
|
||||
opm/simulators/linalg/bda/BlockedMatrix.hpp
|
||||
opm/simulators/linalg/bda/cuda_header.hpp
|
||||
|
@ -35,12 +35,12 @@ namespace bda
|
||||
|
||||
using Opm::OpmLog;
|
||||
|
||||
BILU0::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
|
||||
|
||||
// define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing
|
||||
// typedefs cannot handle templates
|
||||
const auto second = BdaSolver<>::second;
|
||||
|
||||
template <unsigned int block_size>
|
||||
BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
|
||||
level_scheduling(level_scheduling_), graph_coloring(graph_coloring_), verbosity(verbosity_)
|
||||
{
|
||||
if (level_scheduling == graph_coloring) {
|
||||
@ -49,7 +49,8 @@ namespace bda
|
||||
double t1 = second();
|
||||
}
|
||||
|
||||
BILU0::~BILU0()
|
||||
template <unsigned int block_size>
|
||||
BILU0<block_size>::~BILU0()
|
||||
{
|
||||
delete[] invDiagVals;
|
||||
delete[] diagIndex;
|
||||
@ -62,7 +63,8 @@ namespace bda
|
||||
freeBlockedMatrix(&rMat);
|
||||
}
|
||||
|
||||
bool BILU0::init(BlockedMatrix *mat, unsigned int block_size)
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
BlockedMatrix *CSCmat = nullptr;
|
||||
@ -71,7 +73,6 @@ namespace bda
|
||||
this->Nb = mat->Nb;
|
||||
this->nnz = mat->nnzbs * block_size * block_size;
|
||||
this->nnzbs = mat->nnzbs;
|
||||
this->block_size = block_size;
|
||||
|
||||
toOrder = new int[N];
|
||||
fromOrder = new int[N];
|
||||
@ -160,7 +161,8 @@ namespace bda
|
||||
} // end init()
|
||||
|
||||
|
||||
bool BILU0::create_preconditioner(BlockedMatrix *mat)
|
||||
template <unsigned int block_size>
|
||||
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity >= 3){
|
||||
@ -306,13 +308,13 @@ namespace bda
|
||||
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
|
||||
// however, if individual kernel calls are timed, waiting for events is needed
|
||||
// behavior on other GPUs is untested
|
||||
void BILU0::apply(cl::Buffer& x, cl::Buffer& y)
|
||||
template <unsigned int block_size>
|
||||
void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity >= 3) {
|
||||
t1 = second();
|
||||
}
|
||||
const unsigned int block_size = 3;
|
||||
cl::Event event;
|
||||
|
||||
for(unsigned int color = 0; color < numColors; ++color){
|
||||
@ -334,18 +336,22 @@ namespace bda
|
||||
}
|
||||
|
||||
|
||||
void BILU0::setOpenCLContext(cl::Context *context){
|
||||
template <unsigned int block_size>
|
||||
void BILU0<block_size>::setOpenCLContext(cl::Context *context){
|
||||
this->context = context;
|
||||
}
|
||||
void BILU0::setOpenCLQueue(cl::CommandQueue *queue){
|
||||
template <unsigned int block_size>
|
||||
void BILU0<block_size>::setOpenCLQueue(cl::CommandQueue *queue){
|
||||
this->queue = queue;
|
||||
}
|
||||
void BILU0::setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group){
|
||||
template <unsigned int block_size>
|
||||
void BILU0<block_size>::setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group){
|
||||
this->work_group_size = work_group_size;
|
||||
this->total_work_items = total_work_items;
|
||||
this->lmem_per_work_group = lmem_per_work_group;
|
||||
}
|
||||
void BILU0::setKernels(
|
||||
template <unsigned int block_size>
|
||||
void BILU0<block_size>::setKernels(
|
||||
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply1_,
|
||||
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply2_
|
||||
){
|
||||
@ -353,6 +359,27 @@ namespace bda
|
||||
this->ILU_apply2 = ILU_apply2_;
|
||||
}
|
||||
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template BILU0<n>::BILU0(bool, bool, int); \
|
||||
template bool BILU0<n>::init(BlockedMatrix*); \
|
||||
template bool BILU0<n>::create_preconditioner(BlockedMatrix*); \
|
||||
template void BILU0<n>::apply(cl::Buffer& x, cl::Buffer& y); \
|
||||
template void BILU0<n>::setOpenCLContext(cl::Context*); \
|
||||
template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*); \
|
||||
template void BILU0<n>::setKernelParameters(unsigned int, unsigned int, unsigned int); \
|
||||
template void BILU0<n>::setKernels( \
|
||||
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *, \
|
||||
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> * \
|
||||
);
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
} // end namespace bda
|
||||
|
||||
|
||||
|
@ -36,6 +36,7 @@ namespace bda
|
||||
|
||||
/// This class implementa a Blocked ILU0 preconditioner
|
||||
/// The decomposition is done on CPU, and reorders the rows of the matrix
|
||||
template <unsigned int block_size>
|
||||
class BILU0
|
||||
{
|
||||
|
||||
@ -44,7 +45,6 @@ namespace bda
|
||||
int Nb; // number of blockrows of the matrix
|
||||
int nnz; // number of nonzeroes of the matrix (scalar)
|
||||
int nnzbs; // number of blocks of the matrix
|
||||
unsigned int block_size;
|
||||
BlockedMatrix *LMat, *UMat, *LUMat;
|
||||
BlockedMatrix *rMat = nullptr; // only used with PAR_SIM
|
||||
Block *invDiagVals;
|
||||
@ -79,7 +79,7 @@ namespace bda
|
||||
~BILU0();
|
||||
|
||||
// analysis
|
||||
bool init(BlockedMatrix *mat, unsigned int block_size);
|
||||
bool init(BlockedMatrix *mat);
|
||||
|
||||
// ilu_decomposition
|
||||
bool create_preconditioner(BlockedMatrix *mat);
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include <opm/material/common/Unused.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
|
||||
#include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
|
||||
#define PRINT_TIMERS_BRIDGE 0
|
||||
@ -37,6 +38,7 @@ namespace Opm
|
||||
|
||||
using bda::BdaResult;
|
||||
using bda::BdaSolver;
|
||||
using bda::BdaSolverStatus;
|
||||
|
||||
template <class BridgeMatrix, class BridgeVector, int block_size>
|
||||
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance)
|
||||
@ -44,14 +46,14 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mod
|
||||
if (gpu_mode.compare("cusparse") == 0) {
|
||||
#if HAVE_CUDA
|
||||
use_gpu = true;
|
||||
backend.reset(new bda::cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance));
|
||||
backend.reset(new bda::cusparseSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance));
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error cusparseSolver was chosen, but CUDA was not found by CMake");
|
||||
#endif
|
||||
} else if (gpu_mode.compare("opencl") == 0) {
|
||||
#if HAVE_OPENCL
|
||||
use_gpu = true;
|
||||
backend.reset(new bda::openclSolverBackend(linear_solver_verbosity, maxit, tolerance));
|
||||
backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance));
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
|
||||
#endif
|
||||
@ -178,17 +180,17 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
|
||||
/////////////////////////
|
||||
// actually solve
|
||||
|
||||
typedef BdaSolver::BdaSolverStatus BdaSolverStatus;
|
||||
typedef BdaSolverStatus::Status Status;
|
||||
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour
|
||||
BdaSolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
|
||||
Status status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
|
||||
switch(status) {
|
||||
case BdaSolverStatus::BDA_SOLVER_SUCCESS:
|
||||
case Status::BDA_SOLVER_SUCCESS:
|
||||
//OpmLog::info("BdaSolver converged");
|
||||
break;
|
||||
case BdaSolverStatus::BDA_SOLVER_ANALYSIS_FAILED:
|
||||
case Status::BDA_SOLVER_ANALYSIS_FAILED:
|
||||
OpmLog::warning("BdaSolver could not analyse level information of matrix, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
|
||||
break;
|
||||
case BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED:
|
||||
case Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED:
|
||||
OpmLog::warning("BdaSolver could not create preconditioner, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
|
||||
break;
|
||||
default:
|
||||
@ -238,6 +240,6 @@ INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
}
|
||||
} // namespace Opm
|
||||
|
||||
|
||||
|
@ -47,7 +47,7 @@ template <class BridgeMatrix, class BridgeVector, int block_size>
|
||||
class BdaBridge
|
||||
{
|
||||
private:
|
||||
std::unique_ptr<bda::BdaSolver> backend;
|
||||
std::unique_ptr<bda::BdaSolver<block_size> > backend;
|
||||
bool use_gpu = false;
|
||||
|
||||
public:
|
||||
|
@ -26,15 +26,19 @@
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
#include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
namespace bda
|
||||
{
|
||||
|
||||
using Opm::WellContributions;
|
||||
typedef BdaSolverStatus::Status Status;
|
||||
|
||||
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
|
||||
/// This class is abstract, no instantiations can of it can be made, only of its children
|
||||
/// Without a default block_size value, the BILU0 class cannot use BdaSolver::second()
|
||||
template <unsigned int block_size = 3>
|
||||
class BdaSolver
|
||||
{
|
||||
|
||||
@ -55,26 +59,18 @@ namespace bda
|
||||
int Nb; // number of blocked rows (Nb*block_size == N)
|
||||
int nnz; // number of nonzeroes (scalars)
|
||||
int nnzb; // number of nonzero blocks (nnzb*block_size*block_size == nnz)
|
||||
int block_size; // size of block
|
||||
|
||||
bool initialized = false;
|
||||
|
||||
public:
|
||||
|
||||
enum class BdaSolverStatus {
|
||||
BDA_SOLVER_SUCCESS,
|
||||
BDA_SOLVER_ANALYSIS_FAILED,
|
||||
BDA_SOLVER_CREATE_PRECONDITIONER_FAILED,
|
||||
BDA_SOLVER_UNKNOWN_ERROR
|
||||
};
|
||||
|
||||
BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_) {};
|
||||
|
||||
/// Define virtual destructor, so that the derivedclass destructor will be called
|
||||
virtual ~BdaSolver() {};
|
||||
|
||||
/// Define as pure virtual functions, so derivedclass must implement them
|
||||
virtual BdaSolverStatus solve_system(int N, int nnz, int dim,
|
||||
virtual Status solve_system(int N, int nnz, int dim,
|
||||
double *vals, int *rows, int *cols,
|
||||
double *b, WellContributions& wellContribs, BdaResult &res) = 0;
|
||||
|
||||
|
42
opm/simulators/linalg/bda/BdaSolverStatus.hpp
Normal file
42
opm/simulators/linalg/bda/BdaSolverStatus.hpp
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
Copyright 2019 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef OPM_BDASOLVERSTATUS_HEADER_INCLUDED
|
||||
#define OPM_BDASOLVERSTATUS_HEADER_INCLUDED
|
||||
|
||||
namespace bda
|
||||
{
|
||||
|
||||
class BdaSolverStatus
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
enum class Status {
|
||||
BDA_SOLVER_SUCCESS,
|
||||
BDA_SOLVER_ANALYSIS_FAILED,
|
||||
BDA_SOLVER_CREATE_PRECONDITIONER_FAILED,
|
||||
BDA_SOLVER_UNKNOWN_ERROR
|
||||
};
|
||||
|
||||
}; // end class BdaSolverStatus
|
||||
|
||||
} // end namespace bda
|
||||
|
||||
#endif
|
@ -51,13 +51,17 @@ const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
|
||||
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||
const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
|
||||
|
||||
cusparseSolverBackend::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver(verbosity_, maxit_, tolerance_) {}
|
||||
|
||||
cusparseSolverBackend::~cusparseSolverBackend() {
|
||||
template <unsigned int block_size>
|
||||
cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {}
|
||||
|
||||
template <unsigned int block_size>
|
||||
cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
|
||||
finalize();
|
||||
}
|
||||
|
||||
void cusparseSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
||||
double t_total1, t_total2;
|
||||
int n = N;
|
||||
double rho = 1.0, rhop;
|
||||
@ -188,10 +192,10 @@ void cusparseSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaRe
|
||||
}
|
||||
|
||||
|
||||
void cusparseSolverBackend::initialize(int N, int nnz, int dim) {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::initialize(int N, int nnz, int dim) {
|
||||
this->N = N;
|
||||
this->nnz = nnz;
|
||||
this->block_size = dim;
|
||||
this->nnzb = nnz / block_size / block_size;
|
||||
Nb = (N + dim - 1) / dim;
|
||||
std::ostringstream out;
|
||||
@ -250,7 +254,8 @@ void cusparseSolverBackend::initialize(int N, int nnz, int dim) {
|
||||
initialized = true;
|
||||
} // end initialize()
|
||||
|
||||
void cusparseSolverBackend::finalize() {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::finalize() {
|
||||
if (initialized) {
|
||||
cudaFree(d_x);
|
||||
cudaFree(d_b);
|
||||
@ -283,7 +288,8 @@ void cusparseSolverBackend::finalize() {
|
||||
} // end finalize()
|
||||
|
||||
|
||||
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
||||
|
||||
double t1, t2;
|
||||
if (verbosity > 2) {
|
||||
@ -318,7 +324,8 @@ void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *col
|
||||
|
||||
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
void cusparseSolverBackend::update_system_on_gpu(double *vals, int *rows, double *b) {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
|
||||
|
||||
double t1, t2;
|
||||
if (verbosity > 2) {
|
||||
@ -350,12 +357,14 @@ void cusparseSolverBackend::update_system_on_gpu(double *vals, int *rows, double
|
||||
} // end update_system_on_gpu()
|
||||
|
||||
|
||||
void cusparseSolverBackend::reset_prec_on_gpu() {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::reset_prec_on_gpu() {
|
||||
cudaMemcpyAsync(d_mVals, d_bVals, nnz * sizeof(double), cudaMemcpyDeviceToDevice, stream);
|
||||
}
|
||||
|
||||
|
||||
bool cusparseSolverBackend::analyse_matrix() {
|
||||
template <unsigned int block_size>
|
||||
bool cusparseSolverBackend<block_size>::analyse_matrix() {
|
||||
|
||||
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||
double t1, t2;
|
||||
@ -436,7 +445,8 @@ bool cusparseSolverBackend::analyse_matrix() {
|
||||
return true;
|
||||
} // end analyse_matrix()
|
||||
|
||||
bool cusparseSolverBackend::create_preconditioner() {
|
||||
template <unsigned int block_size>
|
||||
bool cusparseSolverBackend<block_size>::create_preconditioner() {
|
||||
|
||||
double t1, t2;
|
||||
if (verbosity > 2) {
|
||||
@ -468,7 +478,8 @@ bool cusparseSolverBackend::create_preconditioner() {
|
||||
} // end create_preconditioner()
|
||||
|
||||
|
||||
void cusparseSolverBackend::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
||||
// actually solve
|
||||
gpu_pbicgstab(wellContribs, res);
|
||||
cudaStreamSynchronize(stream);
|
||||
@ -478,7 +489,8 @@ void cusparseSolverBackend::solve_system(WellContributions& wellContribs, BdaRes
|
||||
|
||||
// copy result to host memory
|
||||
// caller must be sure that x is a valid array
|
||||
void cusparseSolverBackend::get_result(double *x) {
|
||||
template <unsigned int block_size>
|
||||
void cusparseSolverBackend<block_size>::get_result(double *x) {
|
||||
|
||||
double t1, t2;
|
||||
if (verbosity > 2) {
|
||||
@ -497,9 +509,10 @@ void cusparseSolverBackend::get_result(double *x) {
|
||||
} // end get_result()
|
||||
|
||||
|
||||
typedef BdaSolver::BdaSolverStatus BdaSolverStatus;
|
||||
typedef BdaSolverStatus::Status Status;
|
||||
|
||||
BdaSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||
template <unsigned int block_size>
|
||||
Status cusparseSolverBackend<block_size>::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||
if (initialized == false) {
|
||||
initialize(N, nnz, dim);
|
||||
copy_system_to_gpu(vals, rows, cols, b);
|
||||
@ -508,19 +521,29 @@ BdaSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, dou
|
||||
}
|
||||
if (analysis_done == false) {
|
||||
if (!analyse_matrix()) {
|
||||
return BdaSolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||
return Status::BDA_SOLVER_ANALYSIS_FAILED;
|
||||
}
|
||||
}
|
||||
reset_prec_on_gpu();
|
||||
if (create_preconditioner()) {
|
||||
solve_system(wellContribs, res);
|
||||
} else {
|
||||
return BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
return Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
}
|
||||
return BdaSolverStatus::BDA_SOLVER_SUCCESS;
|
||||
return Status::BDA_SOLVER_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template cusparseSolverBackend<n>::cusparseSolverBackend(int, int, double); \
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
} // namespace bda
|
||||
|
||||
|
||||
|
@ -32,7 +32,21 @@ namespace bda
|
||||
{
|
||||
|
||||
/// This class implements a cusparse-based ilu0-bicgstab solver on GPU
|
||||
class cusparseSolverBackend : public BdaSolver {
|
||||
template <unsigned int block_size>
|
||||
class cusparseSolverBackend : public BdaSolver<block_size> {
|
||||
|
||||
typedef BdaSolver<block_size> Base;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
using Base::nnz;
|
||||
using Base::nnzb;
|
||||
using Base::verbosity;
|
||||
using Base::maxit;
|
||||
using Base::tolerance;
|
||||
using Base::second;
|
||||
using Base::initialized;
|
||||
typedef BdaSolverStatus::Status Status;
|
||||
|
||||
private:
|
||||
|
||||
@ -120,7 +134,7 @@ public:
|
||||
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
BdaSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||
Status solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||
|
||||
/// Get resulting vector x after linear solve, also includes post processing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
|
@ -51,31 +51,28 @@ namespace bda
|
||||
|
||||
using Opm::OpmLog;
|
||||
|
||||
openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver(verbosity_, maxit_, tolerance_) {
|
||||
template <unsigned int block_size>
|
||||
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
|
||||
prec = new Preconditioner(LEVEL_SCHEDULING, GRAPH_COLORING, verbosity_);
|
||||
}
|
||||
|
||||
|
||||
openclSolverBackend::~openclSolverBackend() {
|
||||
template <unsigned int block_size>
|
||||
openclSolverBackend<block_size>::~openclSolverBackend() {
|
||||
finalize();
|
||||
}
|
||||
|
||||
|
||||
// divide A by B, and round up: return (int)ceil(A/B)
|
||||
unsigned int openclSolverBackend::ceilDivision(const unsigned int A, const unsigned int B)
|
||||
template <unsigned int block_size>
|
||||
unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A, const unsigned int B)
|
||||
{
|
||||
return A / B + (A % B > 0);
|
||||
}
|
||||
|
||||
// just for verifying and debugging
|
||||
bool equal(float a, float b)
|
||||
{
|
||||
const float tol_abs = 1e-2;
|
||||
const float tol_rel = 1e-2;
|
||||
return std::abs(a - b) <= std::max(tol_rel * std::max(std::abs(a), std::abs(b)), tol_abs);
|
||||
}
|
||||
|
||||
double openclSolverBackend::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
|
||||
template <unsigned int block_size>
|
||||
double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
const unsigned int work_group_size = 1024;
|
||||
@ -106,7 +103,8 @@ double openclSolverBackend::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out
|
||||
return gpu_sum;
|
||||
}
|
||||
|
||||
double openclSolverBackend::norm_w(cl::Buffer in, cl::Buffer out)
|
||||
template <unsigned int block_size>
|
||||
double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
const unsigned int work_group_size = 1024;
|
||||
@ -138,7 +136,8 @@ double openclSolverBackend::norm_w(cl::Buffer in, cl::Buffer out)
|
||||
return gpu_norm;
|
||||
}
|
||||
|
||||
void openclSolverBackend::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
const unsigned int work_group_size = 32;
|
||||
@ -159,7 +158,8 @@ void openclSolverBackend::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
||||
}
|
||||
}
|
||||
|
||||
void openclSolverBackend::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
const unsigned int work_group_size = 32;
|
||||
@ -180,7 +180,8 @@ void openclSolverBackend::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, con
|
||||
}
|
||||
}
|
||||
|
||||
void openclSolverBackend::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
|
||||
{
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
const unsigned int work_group_size = 32;
|
||||
@ -203,7 +204,8 @@ void openclSolverBackend::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::B
|
||||
}
|
||||
|
||||
|
||||
void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
||||
|
||||
float it;
|
||||
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
||||
@ -360,10 +362,10 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaResu
|
||||
}
|
||||
|
||||
|
||||
void openclSolverBackend::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
|
||||
this->N = N_;
|
||||
this->nnz = nnz_;
|
||||
this->block_size = dim;
|
||||
this->nnzb = nnz_ / block_size / block_size;
|
||||
|
||||
Nb = (N + dim - 1) / dim;
|
||||
@ -542,7 +544,9 @@ void openclSolverBackend::initialize(int N_, int nnz_, int dim, double *vals, in
|
||||
initialized = true;
|
||||
} // end initialize()
|
||||
|
||||
void openclSolverBackend::finalize() {
|
||||
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::finalize() {
|
||||
delete[] rb;
|
||||
delete[] tmp;
|
||||
#if COPY_ROW_BY_ROW
|
||||
@ -551,7 +555,8 @@ void openclSolverBackend::finalize() {
|
||||
} // end finalize()
|
||||
|
||||
|
||||
void openclSolverBackend::copy_system_to_gpu() {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity > 2) {
|
||||
@ -588,7 +593,8 @@ void openclSolverBackend::copy_system_to_gpu() {
|
||||
|
||||
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
void openclSolverBackend::update_system_on_gpu() {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::update_system_on_gpu() {
|
||||
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity > 2) {
|
||||
@ -622,7 +628,8 @@ void openclSolverBackend::update_system_on_gpu() {
|
||||
} // end update_system_on_gpu()
|
||||
|
||||
|
||||
bool openclSolverBackend::analyse_matrix() {
|
||||
template <unsigned int block_size>
|
||||
bool openclSolverBackend<block_size>::analyse_matrix() {
|
||||
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
|
||||
@ -630,7 +637,7 @@ bool openclSolverBackend::analyse_matrix() {
|
||||
t1 = second();
|
||||
}
|
||||
|
||||
bool success = prec->init(mat, block_size);
|
||||
bool success = prec->init(mat);
|
||||
int work_group_size = 32;
|
||||
int num_work_groups = ceilDivision(N, work_group_size);
|
||||
int total_work_items = num_work_groups * work_group_size;
|
||||
@ -654,7 +661,8 @@ bool openclSolverBackend::analyse_matrix() {
|
||||
} // end analyse_matrix()
|
||||
|
||||
|
||||
void openclSolverBackend::update_system(double *vals, double *b) {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity > 2) {
|
||||
t1 = second();
|
||||
@ -673,7 +681,8 @@ void openclSolverBackend::update_system(double *vals, double *b) {
|
||||
} // end update_system()
|
||||
|
||||
|
||||
bool openclSolverBackend::create_preconditioner() {
|
||||
template <unsigned int block_size>
|
||||
bool openclSolverBackend<block_size>::create_preconditioner() {
|
||||
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity > 2) {
|
||||
@ -692,7 +701,8 @@ bool openclSolverBackend::create_preconditioner() {
|
||||
} // end create_preconditioner()
|
||||
|
||||
|
||||
void openclSolverBackend::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
||||
// actually solve
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity > 2) {
|
||||
@ -713,7 +723,8 @@ void openclSolverBackend::solve_system(WellContributions& wellContribs, BdaResul
|
||||
|
||||
// copy result to host memory
|
||||
// caller must be sure that x is a valid array
|
||||
void openclSolverBackend::get_result(double *x) {
|
||||
template <unsigned int block_size>
|
||||
void openclSolverBackend<block_size>::get_result(double *x) {
|
||||
|
||||
double t1 = 0.0, t2 = 0.0;
|
||||
if (verbosity > 2) {
|
||||
@ -732,32 +743,43 @@ void openclSolverBackend::get_result(double *x) {
|
||||
} // end get_result()
|
||||
|
||||
|
||||
typedef BdaSolverStatus::Status Status;
|
||||
|
||||
typedef BdaSolver::BdaSolverStatus BdaSolverStatus;
|
||||
|
||||
BdaSolverStatus openclSolverBackend::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||
template <unsigned int block_size>
|
||||
Status openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||
if (initialized == false) {
|
||||
initialize(N_, nnz_, dim, vals, rows, cols);
|
||||
if (analysis_done == false) {
|
||||
if (!analyse_matrix()) {
|
||||
return BdaSolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
||||
return Status::BDA_SOLVER_ANALYSIS_FAILED;
|
||||
}
|
||||
}
|
||||
update_system(vals, b);
|
||||
if (!create_preconditioner()) {
|
||||
return BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
return Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
}
|
||||
copy_system_to_gpu();
|
||||
} else {
|
||||
update_system(vals, b);
|
||||
if (!create_preconditioner()) {
|
||||
return BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
return Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||
}
|
||||
update_system_on_gpu();
|
||||
}
|
||||
solve_system(wellContribs, res);
|
||||
return BdaSolverStatus::BDA_SOLVER_SUCCESS;
|
||||
return Status::BDA_SOLVER_SUCCESS;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template openclSolverBackend<n>::openclSolverBackend(int, int, double); \
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
} // namespace bda
|
||||
|
||||
|
@ -31,15 +31,29 @@
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BILU0.hpp>
|
||||
typedef bda::BILU0 Preconditioner;
|
||||
|
||||
namespace bda
|
||||
{
|
||||
|
||||
/// This class implements a opencl-based ilu0-bicgstab solver on GPU
|
||||
class openclSolverBackend : public BdaSolver
|
||||
template <unsigned int block_size>
|
||||
class openclSolverBackend : public BdaSolver<block_size>
|
||||
{
|
||||
|
||||
typedef BdaSolver<block_size> Base;
|
||||
typedef BILU0<block_size> Preconditioner;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
using Base::nnz;
|
||||
using Base::nnzb;
|
||||
using Base::verbosity;
|
||||
using Base::maxit;
|
||||
using Base::tolerance;
|
||||
using Base::second;
|
||||
using Base::initialized;
|
||||
typedef BdaSolverStatus::Status Status;
|
||||
|
||||
private:
|
||||
|
||||
double *rb; // reordered b vector, the matrix is reordered, so b must also be
|
||||
@ -182,7 +196,7 @@ public:
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
BdaSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||
Status solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||
|
||||
/// Get result after linear solve, and peform postprocessing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
|
Loading…
Reference in New Issue
Block a user