mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Added block_size template to BdaSolvers and BILU0
This commit is contained in:
parent
2a48f5f63f
commit
98ddf47b44
@ -150,6 +150,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
|||||||
opm/simulators/linalg/bda/BdaBridge.hpp
|
opm/simulators/linalg/bda/BdaBridge.hpp
|
||||||
opm/simulators/linalg/bda/BdaResult.hpp
|
opm/simulators/linalg/bda/BdaResult.hpp
|
||||||
opm/simulators/linalg/bda/BdaSolver.hpp
|
opm/simulators/linalg/bda/BdaSolver.hpp
|
||||||
|
opm/simulators/linalg/bda/BdaSolverStatus.hpp
|
||||||
opm/simulators/linalg/bda/BILU0.hpp
|
opm/simulators/linalg/bda/BILU0.hpp
|
||||||
opm/simulators/linalg/bda/BlockedMatrix.hpp
|
opm/simulators/linalg/bda/BlockedMatrix.hpp
|
||||||
opm/simulators/linalg/bda/cuda_header.hpp
|
opm/simulators/linalg/bda/cuda_header.hpp
|
||||||
|
@ -35,12 +35,12 @@ namespace bda
|
|||||||
|
|
||||||
using Opm::OpmLog;
|
using Opm::OpmLog;
|
||||||
|
|
||||||
BILU0::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
|
|
||||||
|
|
||||||
// define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing
|
// define 'second' as 'BdaSolver<>::second', this allows usage of the second() function for timing
|
||||||
// typedefs cannot handle templates
|
// typedefs cannot handle templates
|
||||||
const auto second = BdaSolver<>::second;
|
const auto second = BdaSolver<>::second;
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
BILU0<block_size>::BILU0(bool level_scheduling_, bool graph_coloring_, int verbosity_) :
|
||||||
level_scheduling(level_scheduling_), graph_coloring(graph_coloring_), verbosity(verbosity_)
|
level_scheduling(level_scheduling_), graph_coloring(graph_coloring_), verbosity(verbosity_)
|
||||||
{
|
{
|
||||||
if (level_scheduling == graph_coloring) {
|
if (level_scheduling == graph_coloring) {
|
||||||
@ -49,7 +49,8 @@ namespace bda
|
|||||||
double t1 = second();
|
double t1 = second();
|
||||||
}
|
}
|
||||||
|
|
||||||
BILU0::~BILU0()
|
template <unsigned int block_size>
|
||||||
|
BILU0<block_size>::~BILU0()
|
||||||
{
|
{
|
||||||
delete[] invDiagVals;
|
delete[] invDiagVals;
|
||||||
delete[] diagIndex;
|
delete[] diagIndex;
|
||||||
@ -62,7 +63,8 @@ namespace bda
|
|||||||
freeBlockedMatrix(&rMat);
|
freeBlockedMatrix(&rMat);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool BILU0::init(BlockedMatrix *mat, unsigned int block_size)
|
template <unsigned int block_size>
|
||||||
|
bool BILU0<block_size>::init(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
BlockedMatrix *CSCmat = nullptr;
|
BlockedMatrix *CSCmat = nullptr;
|
||||||
@ -71,7 +73,6 @@ namespace bda
|
|||||||
this->Nb = mat->Nb;
|
this->Nb = mat->Nb;
|
||||||
this->nnz = mat->nnzbs * block_size * block_size;
|
this->nnz = mat->nnzbs * block_size * block_size;
|
||||||
this->nnzbs = mat->nnzbs;
|
this->nnzbs = mat->nnzbs;
|
||||||
this->block_size = block_size;
|
|
||||||
|
|
||||||
toOrder = new int[N];
|
toOrder = new int[N];
|
||||||
fromOrder = new int[N];
|
fromOrder = new int[N];
|
||||||
@ -160,7 +161,8 @@ namespace bda
|
|||||||
} // end init()
|
} // end init()
|
||||||
|
|
||||||
|
|
||||||
bool BILU0::create_preconditioner(BlockedMatrix *mat)
|
template <unsigned int block_size>
|
||||||
|
bool BILU0<block_size>::create_preconditioner(BlockedMatrix *mat)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity >= 3){
|
if (verbosity >= 3){
|
||||||
@ -306,13 +308,13 @@ namespace bda
|
|||||||
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
|
// kernels are blocking on an NVIDIA GPU, so waiting for events is not needed
|
||||||
// however, if individual kernel calls are timed, waiting for events is needed
|
// however, if individual kernel calls are timed, waiting for events is needed
|
||||||
// behavior on other GPUs is untested
|
// behavior on other GPUs is untested
|
||||||
void BILU0::apply(cl::Buffer& x, cl::Buffer& y)
|
template <unsigned int block_size>
|
||||||
|
void BILU0<block_size>::apply(cl::Buffer& x, cl::Buffer& y)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
t1 = second();
|
t1 = second();
|
||||||
}
|
}
|
||||||
const unsigned int block_size = 3;
|
|
||||||
cl::Event event;
|
cl::Event event;
|
||||||
|
|
||||||
for(unsigned int color = 0; color < numColors; ++color){
|
for(unsigned int color = 0; color < numColors; ++color){
|
||||||
@ -334,18 +336,22 @@ namespace bda
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void BILU0::setOpenCLContext(cl::Context *context){
|
template <unsigned int block_size>
|
||||||
|
void BILU0<block_size>::setOpenCLContext(cl::Context *context){
|
||||||
this->context = context;
|
this->context = context;
|
||||||
}
|
}
|
||||||
void BILU0::setOpenCLQueue(cl::CommandQueue *queue){
|
template <unsigned int block_size>
|
||||||
|
void BILU0<block_size>::setOpenCLQueue(cl::CommandQueue *queue){
|
||||||
this->queue = queue;
|
this->queue = queue;
|
||||||
}
|
}
|
||||||
void BILU0::setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group){
|
template <unsigned int block_size>
|
||||||
|
void BILU0<block_size>::setKernelParameters(const unsigned int work_group_size, const unsigned int total_work_items, const unsigned int lmem_per_work_group){
|
||||||
this->work_group_size = work_group_size;
|
this->work_group_size = work_group_size;
|
||||||
this->total_work_items = total_work_items;
|
this->total_work_items = total_work_items;
|
||||||
this->lmem_per_work_group = lmem_per_work_group;
|
this->lmem_per_work_group = lmem_per_work_group;
|
||||||
}
|
}
|
||||||
void BILU0::setKernels(
|
template <unsigned int block_size>
|
||||||
|
void BILU0<block_size>::setKernels(
|
||||||
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply1_,
|
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply1_,
|
||||||
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply2_
|
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *ILU_apply2_
|
||||||
){
|
){
|
||||||
@ -353,6 +359,27 @@ namespace bda
|
|||||||
this->ILU_apply2 = ILU_apply2_;
|
this->ILU_apply2 = ILU_apply2_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
|
template BILU0<n>::BILU0(bool, bool, int); \
|
||||||
|
template bool BILU0<n>::init(BlockedMatrix*); \
|
||||||
|
template bool BILU0<n>::create_preconditioner(BlockedMatrix*); \
|
||||||
|
template void BILU0<n>::apply(cl::Buffer& x, cl::Buffer& y); \
|
||||||
|
template void BILU0<n>::setOpenCLContext(cl::Context*); \
|
||||||
|
template void BILU0<n>::setOpenCLQueue(cl::CommandQueue*); \
|
||||||
|
template void BILU0<n>::setKernelParameters(unsigned int, unsigned int, unsigned int); \
|
||||||
|
template void BILU0<n>::setKernels( \
|
||||||
|
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> *, \
|
||||||
|
cl::make_kernel<cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, cl::Buffer&, cl::Buffer&, cl::Buffer&, const unsigned int, const unsigned int, cl::LocalSpaceArg> * \
|
||||||
|
);
|
||||||
|
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||||
|
|
||||||
|
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||||
|
|
||||||
} // end namespace bda
|
} // end namespace bda
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ namespace bda
|
|||||||
|
|
||||||
/// This class implementa a Blocked ILU0 preconditioner
|
/// This class implementa a Blocked ILU0 preconditioner
|
||||||
/// The decomposition is done on CPU, and reorders the rows of the matrix
|
/// The decomposition is done on CPU, and reorders the rows of the matrix
|
||||||
|
template <unsigned int block_size>
|
||||||
class BILU0
|
class BILU0
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -44,7 +45,6 @@ namespace bda
|
|||||||
int Nb; // number of blockrows of the matrix
|
int Nb; // number of blockrows of the matrix
|
||||||
int nnz; // number of nonzeroes of the matrix (scalar)
|
int nnz; // number of nonzeroes of the matrix (scalar)
|
||||||
int nnzbs; // number of blocks of the matrix
|
int nnzbs; // number of blocks of the matrix
|
||||||
unsigned int block_size;
|
|
||||||
BlockedMatrix *LMat, *UMat, *LUMat;
|
BlockedMatrix *LMat, *UMat, *LUMat;
|
||||||
BlockedMatrix *rMat = nullptr; // only used with PAR_SIM
|
BlockedMatrix *rMat = nullptr; // only used with PAR_SIM
|
||||||
Block *invDiagVals;
|
Block *invDiagVals;
|
||||||
@ -79,7 +79,7 @@ namespace bda
|
|||||||
~BILU0();
|
~BILU0();
|
||||||
|
|
||||||
// analysis
|
// analysis
|
||||||
bool init(BlockedMatrix *mat, unsigned int block_size);
|
bool init(BlockedMatrix *mat);
|
||||||
|
|
||||||
// ilu_decomposition
|
// ilu_decomposition
|
||||||
bool create_preconditioner(BlockedMatrix *mat);
|
bool create_preconditioner(BlockedMatrix *mat);
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include <opm/material/common/Unused.hpp>
|
#include <opm/material/common/Unused.hpp>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
|
#include <opm/simulators/linalg/bda/BdaBridge.hpp>
|
||||||
|
#include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
|
||||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||||
|
|
||||||
#define PRINT_TIMERS_BRIDGE 0
|
#define PRINT_TIMERS_BRIDGE 0
|
||||||
@ -37,6 +38,7 @@ namespace Opm
|
|||||||
|
|
||||||
using bda::BdaResult;
|
using bda::BdaResult;
|
||||||
using bda::BdaSolver;
|
using bda::BdaSolver;
|
||||||
|
using bda::BdaSolverStatus;
|
||||||
|
|
||||||
template <class BridgeMatrix, class BridgeVector, int block_size>
|
template <class BridgeMatrix, class BridgeVector, int block_size>
|
||||||
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance)
|
BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mode, int linear_solver_verbosity, int maxit, double tolerance)
|
||||||
@ -44,14 +46,14 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string gpu_mod
|
|||||||
if (gpu_mode.compare("cusparse") == 0) {
|
if (gpu_mode.compare("cusparse") == 0) {
|
||||||
#if HAVE_CUDA
|
#if HAVE_CUDA
|
||||||
use_gpu = true;
|
use_gpu = true;
|
||||||
backend.reset(new bda::cusparseSolverBackend(linear_solver_verbosity, maxit, tolerance));
|
backend.reset(new bda::cusparseSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance));
|
||||||
#else
|
#else
|
||||||
OPM_THROW(std::logic_error, "Error cusparseSolver was chosen, but CUDA was not found by CMake");
|
OPM_THROW(std::logic_error, "Error cusparseSolver was chosen, but CUDA was not found by CMake");
|
||||||
#endif
|
#endif
|
||||||
} else if (gpu_mode.compare("opencl") == 0) {
|
} else if (gpu_mode.compare("opencl") == 0) {
|
||||||
#if HAVE_OPENCL
|
#if HAVE_OPENCL
|
||||||
use_gpu = true;
|
use_gpu = true;
|
||||||
backend.reset(new bda::openclSolverBackend(linear_solver_verbosity, maxit, tolerance));
|
backend.reset(new bda::openclSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance));
|
||||||
#else
|
#else
|
||||||
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
|
OPM_THROW(std::logic_error, "Error openclSolver was chosen, but OpenCL was not found by CMake");
|
||||||
#endif
|
#endif
|
||||||
@ -178,17 +180,17 @@ void BdaBridge<BridgeMatrix, BridgeVector, block_size>::solve_system(BridgeMatri
|
|||||||
/////////////////////////
|
/////////////////////////
|
||||||
// actually solve
|
// actually solve
|
||||||
|
|
||||||
typedef BdaSolver::BdaSolverStatus BdaSolverStatus;
|
typedef BdaSolverStatus::Status Status;
|
||||||
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour
|
// assume that underlying data (nonzeroes) from mat (Dune::BCRSMatrix) are contiguous, if this is not the case, cusparseSolver is expected to perform undefined behaviour
|
||||||
BdaSolverStatus status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
|
Status status = backend->solve_system(N, nnz, dim, static_cast<double*>(&(((*mat)[0][0][0][0]))), h_rows.data(), h_cols.data(), static_cast<double*>(&(b[0][0])), wellContribs, result);
|
||||||
switch(status) {
|
switch(status) {
|
||||||
case BdaSolverStatus::BDA_SOLVER_SUCCESS:
|
case Status::BDA_SOLVER_SUCCESS:
|
||||||
//OpmLog::info("BdaSolver converged");
|
//OpmLog::info("BdaSolver converged");
|
||||||
break;
|
break;
|
||||||
case BdaSolverStatus::BDA_SOLVER_ANALYSIS_FAILED:
|
case Status::BDA_SOLVER_ANALYSIS_FAILED:
|
||||||
OpmLog::warning("BdaSolver could not analyse level information of matrix, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
|
OpmLog::warning("BdaSolver could not analyse level information of matrix, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
|
||||||
break;
|
break;
|
||||||
case BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED:
|
case Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED:
|
||||||
OpmLog::warning("BdaSolver could not create preconditioner, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
|
OpmLog::warning("BdaSolver could not create preconditioner, perhaps there is still a 0.0 on the diagonal of a block on the diagonal");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@ -238,6 +240,6 @@ INSTANTIATE_BDA_FUNCTIONS(4);
|
|||||||
|
|
||||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||||
|
|
||||||
}
|
} // namespace Opm
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +47,7 @@ template <class BridgeMatrix, class BridgeVector, int block_size>
|
|||||||
class BdaBridge
|
class BdaBridge
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<bda::BdaSolver> backend;
|
std::unique_ptr<bda::BdaSolver<block_size> > backend;
|
||||||
bool use_gpu = false;
|
bool use_gpu = false;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -26,15 +26,19 @@
|
|||||||
#include <sys/time.h>
|
#include <sys/time.h>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||||
|
#include <opm/simulators/linalg/bda/BdaSolverStatus.hpp>
|
||||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||||
|
|
||||||
namespace bda
|
namespace bda
|
||||||
{
|
{
|
||||||
|
|
||||||
using Opm::WellContributions;
|
using Opm::WellContributions;
|
||||||
|
typedef BdaSolverStatus::Status Status;
|
||||||
|
|
||||||
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
|
/// This class serves to simplify choosing between different backend solvers, such as cusparseSolver and openclSolver
|
||||||
/// This class is abstract, no instantiations can of it can be made, only of its children
|
/// This class is abstract, no instantiations can of it can be made, only of its children
|
||||||
|
/// Without a default block_size value, the BILU0 class cannot use BdaSolver::second()
|
||||||
|
template <unsigned int block_size = 3>
|
||||||
class BdaSolver
|
class BdaSolver
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -55,26 +59,18 @@ namespace bda
|
|||||||
int Nb; // number of blocked rows (Nb*block_size == N)
|
int Nb; // number of blocked rows (Nb*block_size == N)
|
||||||
int nnz; // number of nonzeroes (scalars)
|
int nnz; // number of nonzeroes (scalars)
|
||||||
int nnzb; // number of nonzero blocks (nnzb*block_size*block_size == nnz)
|
int nnzb; // number of nonzero blocks (nnzb*block_size*block_size == nnz)
|
||||||
int block_size; // size of block
|
|
||||||
|
|
||||||
bool initialized = false;
|
bool initialized = false;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
enum class BdaSolverStatus {
|
|
||||||
BDA_SOLVER_SUCCESS,
|
|
||||||
BDA_SOLVER_ANALYSIS_FAILED,
|
|
||||||
BDA_SOLVER_CREATE_PRECONDITIONER_FAILED,
|
|
||||||
BDA_SOLVER_UNKNOWN_ERROR
|
|
||||||
};
|
|
||||||
|
|
||||||
BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_) {};
|
BdaSolver(int linear_solver_verbosity, int max_it, double tolerance_) : verbosity(linear_solver_verbosity), maxit(max_it), tolerance(tolerance_) {};
|
||||||
|
|
||||||
/// Define virtual destructor, so that the derivedclass destructor will be called
|
/// Define virtual destructor, so that the derivedclass destructor will be called
|
||||||
virtual ~BdaSolver() {};
|
virtual ~BdaSolver() {};
|
||||||
|
|
||||||
/// Define as pure virtual functions, so derivedclass must implement them
|
/// Define as pure virtual functions, so derivedclass must implement them
|
||||||
virtual BdaSolverStatus solve_system(int N, int nnz, int dim,
|
virtual Status solve_system(int N, int nnz, int dim,
|
||||||
double *vals, int *rows, int *cols,
|
double *vals, int *rows, int *cols,
|
||||||
double *b, WellContributions& wellContribs, BdaResult &res) = 0;
|
double *b, WellContributions& wellContribs, BdaResult &res) = 0;
|
||||||
|
|
||||||
|
42
opm/simulators/linalg/bda/BdaSolverStatus.hpp
Normal file
42
opm/simulators/linalg/bda/BdaSolverStatus.hpp
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
/*
|
||||||
|
Copyright 2019 Equinor ASA
|
||||||
|
|
||||||
|
This file is part of the Open Porous Media project (OPM).
|
||||||
|
|
||||||
|
OPM is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
OPM is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef OPM_BDASOLVERSTATUS_HEADER_INCLUDED
|
||||||
|
#define OPM_BDASOLVERSTATUS_HEADER_INCLUDED
|
||||||
|
|
||||||
|
namespace bda
|
||||||
|
{
|
||||||
|
|
||||||
|
class BdaSolverStatus
|
||||||
|
{
|
||||||
|
|
||||||
|
public:
|
||||||
|
|
||||||
|
enum class Status {
|
||||||
|
BDA_SOLVER_SUCCESS,
|
||||||
|
BDA_SOLVER_ANALYSIS_FAILED,
|
||||||
|
BDA_SOLVER_CREATE_PRECONDITIONER_FAILED,
|
||||||
|
BDA_SOLVER_UNKNOWN_ERROR
|
||||||
|
};
|
||||||
|
|
||||||
|
}; // end class BdaSolverStatus
|
||||||
|
|
||||||
|
} // end namespace bda
|
||||||
|
|
||||||
|
#endif
|
@ -51,13 +51,17 @@ const cusparseSolvePolicy_t policy = CUSPARSE_SOLVE_POLICY_USE_LEVEL;
|
|||||||
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
const cusparseOperation_t operation = CUSPARSE_OPERATION_NON_TRANSPOSE;
|
||||||
const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
|
const cusparseDirection_t order = CUSPARSE_DIRECTION_ROW;
|
||||||
|
|
||||||
cusparseSolverBackend::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver(verbosity_, maxit_, tolerance_) {}
|
|
||||||
|
|
||||||
cusparseSolverBackend::~cusparseSolverBackend() {
|
template <unsigned int block_size>
|
||||||
|
cusparseSolverBackend<block_size>::cusparseSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {}
|
||||||
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
cusparseSolverBackend<block_size>::~cusparseSolverBackend() {
|
||||||
finalize();
|
finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
void cusparseSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
||||||
double t_total1, t_total2;
|
double t_total1, t_total2;
|
||||||
int n = N;
|
int n = N;
|
||||||
double rho = 1.0, rhop;
|
double rho = 1.0, rhop;
|
||||||
@ -188,10 +192,10 @@ void cusparseSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaRe
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cusparseSolverBackend::initialize(int N, int nnz, int dim) {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::initialize(int N, int nnz, int dim) {
|
||||||
this->N = N;
|
this->N = N;
|
||||||
this->nnz = nnz;
|
this->nnz = nnz;
|
||||||
this->block_size = dim;
|
|
||||||
this->nnzb = nnz / block_size / block_size;
|
this->nnzb = nnz / block_size / block_size;
|
||||||
Nb = (N + dim - 1) / dim;
|
Nb = (N + dim - 1) / dim;
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
@ -250,7 +254,8 @@ void cusparseSolverBackend::initialize(int N, int nnz, int dim) {
|
|||||||
initialized = true;
|
initialized = true;
|
||||||
} // end initialize()
|
} // end initialize()
|
||||||
|
|
||||||
void cusparseSolverBackend::finalize() {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::finalize() {
|
||||||
if (initialized) {
|
if (initialized) {
|
||||||
cudaFree(d_x);
|
cudaFree(d_x);
|
||||||
cudaFree(d_b);
|
cudaFree(d_b);
|
||||||
@ -283,7 +288,8 @@ void cusparseSolverBackend::finalize() {
|
|||||||
} // end finalize()
|
} // end finalize()
|
||||||
|
|
||||||
|
|
||||||
void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::copy_system_to_gpu(double *vals, int *rows, int *cols, double *b) {
|
||||||
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -318,7 +324,8 @@ void cusparseSolverBackend::copy_system_to_gpu(double *vals, int *rows, int *col
|
|||||||
|
|
||||||
|
|
||||||
// don't copy rowpointers and colindices, they stay the same
|
// don't copy rowpointers and colindices, they stay the same
|
||||||
void cusparseSolverBackend::update_system_on_gpu(double *vals, int *rows, double *b) {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::update_system_on_gpu(double *vals, int *rows, double *b) {
|
||||||
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -350,12 +357,14 @@ void cusparseSolverBackend::update_system_on_gpu(double *vals, int *rows, double
|
|||||||
} // end update_system_on_gpu()
|
} // end update_system_on_gpu()
|
||||||
|
|
||||||
|
|
||||||
void cusparseSolverBackend::reset_prec_on_gpu() {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::reset_prec_on_gpu() {
|
||||||
cudaMemcpyAsync(d_mVals, d_bVals, nnz * sizeof(double), cudaMemcpyDeviceToDevice, stream);
|
cudaMemcpyAsync(d_mVals, d_bVals, nnz * sizeof(double), cudaMemcpyDeviceToDevice, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool cusparseSolverBackend::analyse_matrix() {
|
template <unsigned int block_size>
|
||||||
|
bool cusparseSolverBackend<block_size>::analyse_matrix() {
|
||||||
|
|
||||||
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
int d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
@ -436,7 +445,8 @@ bool cusparseSolverBackend::analyse_matrix() {
|
|||||||
return true;
|
return true;
|
||||||
} // end analyse_matrix()
|
} // end analyse_matrix()
|
||||||
|
|
||||||
bool cusparseSolverBackend::create_preconditioner() {
|
template <unsigned int block_size>
|
||||||
|
bool cusparseSolverBackend<block_size>::create_preconditioner() {
|
||||||
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -468,7 +478,8 @@ bool cusparseSolverBackend::create_preconditioner() {
|
|||||||
} // end create_preconditioner()
|
} // end create_preconditioner()
|
||||||
|
|
||||||
|
|
||||||
void cusparseSolverBackend::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
||||||
// actually solve
|
// actually solve
|
||||||
gpu_pbicgstab(wellContribs, res);
|
gpu_pbicgstab(wellContribs, res);
|
||||||
cudaStreamSynchronize(stream);
|
cudaStreamSynchronize(stream);
|
||||||
@ -478,7 +489,8 @@ void cusparseSolverBackend::solve_system(WellContributions& wellContribs, BdaRes
|
|||||||
|
|
||||||
// copy result to host memory
|
// copy result to host memory
|
||||||
// caller must be sure that x is a valid array
|
// caller must be sure that x is a valid array
|
||||||
void cusparseSolverBackend::get_result(double *x) {
|
template <unsigned int block_size>
|
||||||
|
void cusparseSolverBackend<block_size>::get_result(double *x) {
|
||||||
|
|
||||||
double t1, t2;
|
double t1, t2;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -497,9 +509,10 @@ void cusparseSolverBackend::get_result(double *x) {
|
|||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
|
||||||
|
|
||||||
typedef BdaSolver::BdaSolverStatus BdaSolverStatus;
|
typedef BdaSolverStatus::Status Status;
|
||||||
|
|
||||||
BdaSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
template <unsigned int block_size>
|
||||||
|
Status cusparseSolverBackend<block_size>::solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||||
if (initialized == false) {
|
if (initialized == false) {
|
||||||
initialize(N, nnz, dim);
|
initialize(N, nnz, dim);
|
||||||
copy_system_to_gpu(vals, rows, cols, b);
|
copy_system_to_gpu(vals, rows, cols, b);
|
||||||
@ -508,19 +521,29 @@ BdaSolverStatus cusparseSolverBackend::solve_system(int N, int nnz, int dim, dou
|
|||||||
}
|
}
|
||||||
if (analysis_done == false) {
|
if (analysis_done == false) {
|
||||||
if (!analyse_matrix()) {
|
if (!analyse_matrix()) {
|
||||||
return BdaSolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
return Status::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
reset_prec_on_gpu();
|
reset_prec_on_gpu();
|
||||||
if (create_preconditioner()) {
|
if (create_preconditioner()) {
|
||||||
solve_system(wellContribs, res);
|
solve_system(wellContribs, res);
|
||||||
} else {
|
} else {
|
||||||
return BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
return Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
}
|
}
|
||||||
return BdaSolverStatus::BDA_SOLVER_SUCCESS;
|
return Status::BDA_SOLVER_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
|
template cusparseSolverBackend<n>::cusparseSolverBackend(int, int, double); \
|
||||||
|
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||||
|
|
||||||
|
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||||
|
|
||||||
|
} // namespace bda
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,7 +32,21 @@ namespace bda
|
|||||||
{
|
{
|
||||||
|
|
||||||
/// This class implements a cusparse-based ilu0-bicgstab solver on GPU
|
/// This class implements a cusparse-based ilu0-bicgstab solver on GPU
|
||||||
class cusparseSolverBackend : public BdaSolver {
|
template <unsigned int block_size>
|
||||||
|
class cusparseSolverBackend : public BdaSolver<block_size> {
|
||||||
|
|
||||||
|
typedef BdaSolver<block_size> Base;
|
||||||
|
|
||||||
|
using Base::N;
|
||||||
|
using Base::Nb;
|
||||||
|
using Base::nnz;
|
||||||
|
using Base::nnzb;
|
||||||
|
using Base::verbosity;
|
||||||
|
using Base::maxit;
|
||||||
|
using Base::tolerance;
|
||||||
|
using Base::second;
|
||||||
|
using Base::initialized;
|
||||||
|
typedef BdaSolverStatus::Status Status;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@ -120,7 +134,7 @@ public:
|
|||||||
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs contains all WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] res summary of solver result
|
/// \param[inout] res summary of solver result
|
||||||
/// \return status code
|
/// \return status code
|
||||||
BdaSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
Status solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
/// Get resulting vector x after linear solve, also includes post processing if necessary
|
/// Get resulting vector x after linear solve, also includes post processing if necessary
|
||||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||||
|
@ -51,31 +51,28 @@ namespace bda
|
|||||||
|
|
||||||
using Opm::OpmLog;
|
using Opm::OpmLog;
|
||||||
|
|
||||||
openclSolverBackend::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver(verbosity_, maxit_, tolerance_) {
|
template <unsigned int block_size>
|
||||||
|
openclSolverBackend<block_size>::openclSolverBackend(int verbosity_, int maxit_, double tolerance_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_) {
|
||||||
prec = new Preconditioner(LEVEL_SCHEDULING, GRAPH_COLORING, verbosity_);
|
prec = new Preconditioner(LEVEL_SCHEDULING, GRAPH_COLORING, verbosity_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
openclSolverBackend::~openclSolverBackend() {
|
template <unsigned int block_size>
|
||||||
|
openclSolverBackend<block_size>::~openclSolverBackend() {
|
||||||
finalize();
|
finalize();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// divide A by B, and round up: return (int)ceil(A/B)
|
// divide A by B, and round up: return (int)ceil(A/B)
|
||||||
unsigned int openclSolverBackend::ceilDivision(const unsigned int A, const unsigned int B)
|
template <unsigned int block_size>
|
||||||
|
unsigned int openclSolverBackend<block_size>::ceilDivision(const unsigned int A, const unsigned int B)
|
||||||
{
|
{
|
||||||
return A / B + (A % B > 0);
|
return A / B + (A % B > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// just for verifying and debugging
|
|
||||||
bool equal(float a, float b)
|
|
||||||
{
|
|
||||||
const float tol_abs = 1e-2;
|
|
||||||
const float tol_rel = 1e-2;
|
|
||||||
return std::abs(a - b) <= std::max(tol_rel * std::max(std::abs(a), std::abs(b)), tol_abs);
|
|
||||||
}
|
|
||||||
|
|
||||||
double openclSolverBackend::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
|
template <unsigned int block_size>
|
||||||
|
double openclSolverBackend<block_size>::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
const unsigned int work_group_size = 1024;
|
const unsigned int work_group_size = 1024;
|
||||||
@ -106,7 +103,8 @@ double openclSolverBackend::dot_w(cl::Buffer in1, cl::Buffer in2, cl::Buffer out
|
|||||||
return gpu_sum;
|
return gpu_sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
double openclSolverBackend::norm_w(cl::Buffer in, cl::Buffer out)
|
template <unsigned int block_size>
|
||||||
|
double openclSolverBackend<block_size>::norm_w(cl::Buffer in, cl::Buffer out)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
const unsigned int work_group_size = 1024;
|
const unsigned int work_group_size = 1024;
|
||||||
@ -138,7 +136,8 @@ double openclSolverBackend::norm_w(cl::Buffer in, cl::Buffer out)
|
|||||||
return gpu_norm;
|
return gpu_norm;
|
||||||
}
|
}
|
||||||
|
|
||||||
void openclSolverBackend::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
const unsigned int work_group_size = 32;
|
const unsigned int work_group_size = 32;
|
||||||
@ -159,7 +158,8 @@ void openclSolverBackend::axpy_w(cl::Buffer in, const double a, cl::Buffer out)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void openclSolverBackend::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, const double omega, const double beta)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
const unsigned int work_group_size = 32;
|
const unsigned int work_group_size = 32;
|
||||||
@ -180,7 +180,8 @@ void openclSolverBackend::custom_w(cl::Buffer p, cl::Buffer v, cl::Buffer r, con
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void openclSolverBackend::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::Buffer rows, cl::Buffer x, cl::Buffer b)
|
||||||
{
|
{
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
const unsigned int work_group_size = 32;
|
const unsigned int work_group_size = 32;
|
||||||
@ -203,7 +204,8 @@ void openclSolverBackend::spmv_blocked_w(cl::Buffer vals, cl::Buffer cols, cl::B
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::gpu_pbicgstab(WellContributions& wellContribs, BdaResult& res) {
|
||||||
|
|
||||||
float it;
|
float it;
|
||||||
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
double rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
||||||
@ -360,10 +362,10 @@ void openclSolverBackend::gpu_pbicgstab(WellContributions& wellContribs, BdaResu
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void openclSolverBackend::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::initialize(int N_, int nnz_, int dim, double *vals, int *rows, int *cols) {
|
||||||
this->N = N_;
|
this->N = N_;
|
||||||
this->nnz = nnz_;
|
this->nnz = nnz_;
|
||||||
this->block_size = dim;
|
|
||||||
this->nnzb = nnz_ / block_size / block_size;
|
this->nnzb = nnz_ / block_size / block_size;
|
||||||
|
|
||||||
Nb = (N + dim - 1) / dim;
|
Nb = (N + dim - 1) / dim;
|
||||||
@ -542,7 +544,9 @@ void openclSolverBackend::initialize(int N_, int nnz_, int dim, double *vals, in
|
|||||||
initialized = true;
|
initialized = true;
|
||||||
} // end initialize()
|
} // end initialize()
|
||||||
|
|
||||||
void openclSolverBackend::finalize() {
|
|
||||||
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::finalize() {
|
||||||
delete[] rb;
|
delete[] rb;
|
||||||
delete[] tmp;
|
delete[] tmp;
|
||||||
#if COPY_ROW_BY_ROW
|
#if COPY_ROW_BY_ROW
|
||||||
@ -551,7 +555,8 @@ void openclSolverBackend::finalize() {
|
|||||||
} // end finalize()
|
} // end finalize()
|
||||||
|
|
||||||
|
|
||||||
void openclSolverBackend::copy_system_to_gpu() {
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::copy_system_to_gpu() {
|
||||||
|
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -588,7 +593,8 @@ void openclSolverBackend::copy_system_to_gpu() {
|
|||||||
|
|
||||||
|
|
||||||
// don't copy rowpointers and colindices, they stay the same
|
// don't copy rowpointers and colindices, they stay the same
|
||||||
void openclSolverBackend::update_system_on_gpu() {
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::update_system_on_gpu() {
|
||||||
|
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -622,7 +628,8 @@ void openclSolverBackend::update_system_on_gpu() {
|
|||||||
} // end update_system_on_gpu()
|
} // end update_system_on_gpu()
|
||||||
|
|
||||||
|
|
||||||
bool openclSolverBackend::analyse_matrix() {
|
template <unsigned int block_size>
|
||||||
|
bool openclSolverBackend<block_size>::analyse_matrix() {
|
||||||
|
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
|
|
||||||
@ -630,7 +637,7 @@ bool openclSolverBackend::analyse_matrix() {
|
|||||||
t1 = second();
|
t1 = second();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool success = prec->init(mat, block_size);
|
bool success = prec->init(mat);
|
||||||
int work_group_size = 32;
|
int work_group_size = 32;
|
||||||
int num_work_groups = ceilDivision(N, work_group_size);
|
int num_work_groups = ceilDivision(N, work_group_size);
|
||||||
int total_work_items = num_work_groups * work_group_size;
|
int total_work_items = num_work_groups * work_group_size;
|
||||||
@ -654,7 +661,8 @@ bool openclSolverBackend::analyse_matrix() {
|
|||||||
} // end analyse_matrix()
|
} // end analyse_matrix()
|
||||||
|
|
||||||
|
|
||||||
void openclSolverBackend::update_system(double *vals, double *b) {
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::update_system(double *vals, double *b) {
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
t1 = second();
|
t1 = second();
|
||||||
@ -673,7 +681,8 @@ void openclSolverBackend::update_system(double *vals, double *b) {
|
|||||||
} // end update_system()
|
} // end update_system()
|
||||||
|
|
||||||
|
|
||||||
bool openclSolverBackend::create_preconditioner() {
|
template <unsigned int block_size>
|
||||||
|
bool openclSolverBackend<block_size>::create_preconditioner() {
|
||||||
|
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -692,7 +701,8 @@ bool openclSolverBackend::create_preconditioner() {
|
|||||||
} // end create_preconditioner()
|
} // end create_preconditioner()
|
||||||
|
|
||||||
|
|
||||||
void openclSolverBackend::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::solve_system(WellContributions& wellContribs, BdaResult &res) {
|
||||||
// actually solve
|
// actually solve
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -713,7 +723,8 @@ void openclSolverBackend::solve_system(WellContributions& wellContribs, BdaResul
|
|||||||
|
|
||||||
// copy result to host memory
|
// copy result to host memory
|
||||||
// caller must be sure that x is a valid array
|
// caller must be sure that x is a valid array
|
||||||
void openclSolverBackend::get_result(double *x) {
|
template <unsigned int block_size>
|
||||||
|
void openclSolverBackend<block_size>::get_result(double *x) {
|
||||||
|
|
||||||
double t1 = 0.0, t2 = 0.0;
|
double t1 = 0.0, t2 = 0.0;
|
||||||
if (verbosity > 2) {
|
if (verbosity > 2) {
|
||||||
@ -732,32 +743,43 @@ void openclSolverBackend::get_result(double *x) {
|
|||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
|
||||||
|
|
||||||
|
typedef BdaSolverStatus::Status Status;
|
||||||
|
|
||||||
typedef BdaSolver::BdaSolverStatus BdaSolverStatus;
|
template <unsigned int block_size>
|
||||||
|
Status openclSolverBackend<block_size>::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
||||||
BdaSolverStatus openclSolverBackend::solve_system(int N_, int nnz_, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) {
|
|
||||||
if (initialized == false) {
|
if (initialized == false) {
|
||||||
initialize(N_, nnz_, dim, vals, rows, cols);
|
initialize(N_, nnz_, dim, vals, rows, cols);
|
||||||
if (analysis_done == false) {
|
if (analysis_done == false) {
|
||||||
if (!analyse_matrix()) {
|
if (!analyse_matrix()) {
|
||||||
return BdaSolverStatus::BDA_SOLVER_ANALYSIS_FAILED;
|
return Status::BDA_SOLVER_ANALYSIS_FAILED;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
update_system(vals, b);
|
update_system(vals, b);
|
||||||
if (!create_preconditioner()) {
|
if (!create_preconditioner()) {
|
||||||
return BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
return Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
}
|
}
|
||||||
copy_system_to_gpu();
|
copy_system_to_gpu();
|
||||||
} else {
|
} else {
|
||||||
update_system(vals, b);
|
update_system(vals, b);
|
||||||
if (!create_preconditioner()) {
|
if (!create_preconditioner()) {
|
||||||
return BdaSolverStatus::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
return Status::BDA_SOLVER_CREATE_PRECONDITIONER_FAILED;
|
||||||
}
|
}
|
||||||
update_system_on_gpu();
|
update_system_on_gpu();
|
||||||
}
|
}
|
||||||
solve_system(wellContribs, res);
|
solve_system(wellContribs, res);
|
||||||
return BdaSolverStatus::BDA_SOLVER_SUCCESS;
|
return Status::BDA_SOLVER_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
|
||||||
|
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||||
|
template openclSolverBackend<n>::openclSolverBackend(int, int, double); \
|
||||||
|
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||||
|
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||||
|
|
||||||
|
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||||
|
|
||||||
|
} // namespace bda
|
||||||
|
|
||||||
|
@ -31,15 +31,29 @@
|
|||||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||||
|
|
||||||
#include <opm/simulators/linalg/bda/BILU0.hpp>
|
#include <opm/simulators/linalg/bda/BILU0.hpp>
|
||||||
typedef bda::BILU0 Preconditioner;
|
|
||||||
|
|
||||||
namespace bda
|
namespace bda
|
||||||
{
|
{
|
||||||
|
|
||||||
/// This class implements a opencl-based ilu0-bicgstab solver on GPU
|
/// This class implements a opencl-based ilu0-bicgstab solver on GPU
|
||||||
class openclSolverBackend : public BdaSolver
|
template <unsigned int block_size>
|
||||||
|
class openclSolverBackend : public BdaSolver<block_size>
|
||||||
{
|
{
|
||||||
|
|
||||||
|
typedef BdaSolver<block_size> Base;
|
||||||
|
typedef BILU0<block_size> Preconditioner;
|
||||||
|
|
||||||
|
using Base::N;
|
||||||
|
using Base::Nb;
|
||||||
|
using Base::nnz;
|
||||||
|
using Base::nnzb;
|
||||||
|
using Base::verbosity;
|
||||||
|
using Base::maxit;
|
||||||
|
using Base::tolerance;
|
||||||
|
using Base::second;
|
||||||
|
using Base::initialized;
|
||||||
|
typedef BdaSolverStatus::Status Status;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
double *rb; // reordered b vector, the matrix is reordered, so b must also be
|
double *rb; // reordered b vector, the matrix is reordered, so b must also be
|
||||||
@ -182,7 +196,7 @@ public:
|
|||||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] res summary of solver result
|
/// \param[inout] res summary of solver result
|
||||||
/// \return status code
|
/// \return status code
|
||||||
BdaSolverStatus solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
Status solve_system(int N, int nnz, int dim, double *vals, int *rows, int *cols, double *b, WellContributions& wellContribs, BdaResult &res) override;
|
||||||
|
|
||||||
/// Get result after linear solve, and peform postprocessing if necessary
|
/// Get result after linear solve, and peform postprocessing if necessary
|
||||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||||
|
Loading…
Reference in New Issue
Block a user