Merge pull request #5522 from akva2/janitoring_rocm

BDA rocm: some janitoring
This commit is contained in:
Bård Skaflestad 2024-08-13 15:27:33 +02:00 committed by GitHub
commit d9dd1bcf12
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 91 additions and 100 deletions

View File

@ -304,12 +304,12 @@ init(int verbosity_)
template<class Scalar>
void HipKernels<Scalar>::
vmul(const Scalar alpha,
Scalar* in1,
Scalar* in2,
Scalar* out,
int N,
hipStream_t stream)
vmul([[maybe_unused]] const Scalar alpha,
[[maybe_unused]] Scalar* in1,
[[maybe_unused]] Scalar* in2,
[[maybe_unused]] Scalar* out,
[[maybe_unused]] int N,
[[maybe_unused]] hipStream_t stream)
{
Timer t_vmul;
#ifdef __HIP__
@ -333,11 +333,11 @@ vmul(const Scalar alpha,
template<class Scalar>
void HipKernels<Scalar>::
full_to_pressure_restriction(const Scalar* fine_y,
Scalar* weights,
Scalar* coarse_y,
int Nb,
hipStream_t stream)
full_to_pressure_restriction([[maybe_unused]] const Scalar* fine_y,
[[maybe_unused]] Scalar* weights,
[[maybe_unused]] Scalar* coarse_y,
[[maybe_unused]] int Nb,
[[maybe_unused]] hipStream_t stream)
{
Timer t;
#ifdef __HIP__
@ -361,11 +361,11 @@ full_to_pressure_restriction(const Scalar* fine_y,
template<class Scalar>
void HipKernels<Scalar>::
add_coarse_pressure_correction(Scalar* coarse_x,
Scalar* fine_x,
int pressure_idx,
int Nb,
hipStream_t stream)
add_coarse_pressure_correction([[maybe_unused]] Scalar* coarse_x,
[[maybe_unused]] Scalar* fine_x,
[[maybe_unused]] int pressure_idx,
[[maybe_unused]] int Nb,
[[maybe_unused]] hipStream_t stream)
{
Timer t;
#ifdef __HIP__
@ -389,11 +389,11 @@ add_coarse_pressure_correction(Scalar* coarse_x,
template<class Scalar>
void HipKernels<Scalar>::
prolongate_vector(const Scalar* in,
Scalar* out,
const int* cols,
int N,
hipStream_t stream)
prolongate_vector([[maybe_unused]] const Scalar* in,
[[maybe_unused]] Scalar* out,
[[maybe_unused]] const int* cols,
[[maybe_unused]] int N,
[[maybe_unused]] hipStream_t stream)
{
Timer t;
@ -419,15 +419,15 @@ prolongate_vector(const Scalar* in,
template<class Scalar>
void HipKernels<Scalar>::
residual(Scalar* vals,
int* cols,
int* rows,
Scalar* x,
const Scalar* rhs,
Scalar* out,
int Nb,
unsigned int block_size,
hipStream_t stream)
residual([[maybe_unused]] Scalar* vals,
[[maybe_unused]] int* cols,
[[maybe_unused]] int* rows,
[[maybe_unused]] Scalar* x,
[[maybe_unused]] const Scalar* rhs,
[[maybe_unused]] Scalar* out,
[[maybe_unused]] int Nb,
[[maybe_unused]] unsigned int block_size,
[[maybe_unused]] hipStream_t stream)
{
Timer t_residual;
@ -458,14 +458,14 @@ residual(Scalar* vals,
template<class Scalar>
void HipKernels<Scalar>::
spmv(Scalar* vals,
int* cols,
int* rows,
Scalar* x,
Scalar* y,
int Nb,
unsigned int block_size,
hipStream_t stream)
spmv([[maybe_unused]] Scalar* vals,
[[maybe_unused]] int* cols,
[[maybe_unused]] int* rows,
[[maybe_unused]] Scalar* x,
[[maybe_unused]] Scalar* y,
[[maybe_unused]] int Nb,
[[maybe_unused]] unsigned int block_size,
[[maybe_unused]] hipStream_t stream)
{//NOTE: block_size not used since I use this kernel only for block sizes 1, other uses use rocsparse!
Timer t_spmv;
#ifdef __HIP__

View File

@ -92,8 +92,8 @@ analyze_matrix(BlockedMatrix<Scalar> *mat) {
template <class Scalar, unsigned int block_size>
bool rocsparseBILU0<Scalar, block_size>::
analyze_matrix(BlockedMatrix<Scalar> *mat,
BlockedMatrix<Scalar> *jacMat)
analyze_matrix(BlockedMatrix<Scalar>*,
BlockedMatrix<Scalar>*)
{
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
Timer t;
@ -163,8 +163,8 @@ create_preconditioner(BlockedMatrix<Scalar> *mat) {
template <class Scalar, unsigned int block_size>
bool rocsparseBILU0<Scalar, block_size>::
create_preconditioner(BlockedMatrix<Scalar> *mat,
BlockedMatrix<Scalar> *jacMat)
create_preconditioner(BlockedMatrix<Scalar>*,
BlockedMatrix<Scalar>*)
{
Timer t;
bool result = true;
@ -253,7 +253,6 @@ update_system_on_gpu(Scalar *d_Avals) {
template <class Scalar, unsigned int block_size>
void rocsparseBILU0<Scalar, block_size>::
apply(Scalar& y, Scalar& x) {
Scalar zero = 0.0;
Scalar one = 1.0;
Timer t_apply;
@ -274,7 +273,7 @@ apply(Scalar& y, Scalar& x) {
}
}
#define INSTANCE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class rocsparseBILU0<T,1>; \
template class rocsparseBILU0<T,2>; \
template class rocsparseBILU0<T,3>; \
@ -282,5 +281,6 @@ apply(Scalar& y, Scalar& x) {
template class rocsparseBILU0<T,5>; \
template class rocsparseBILU0<T,6>;
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
} // namespace Opm

View File

@ -20,9 +20,6 @@
#ifndef OPM_ROCSPARSEBILU0_HPP
#define OPM_ROCSPARSEBILU0_HPP
#include <mutex>
#include <vector>
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
@ -70,17 +67,17 @@ public:
bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
rocsparse_int *d_Arows,
rocsparse_int *d_Acols);
rocsparse_int *d_Acols) override;
/// Analysis, extract parallelism if specified
/// \param[in] mat matrix A
bool analyze_matrix(BlockedMatrix<Scalar> *mat);
bool analyze_matrix(BlockedMatrix<Scalar> *mat) override;
/// Analysis, extract parallelism if specified
/// \param[in] mat matrix A
/// \param[in] jacMat matrix for preconditioner, analyze this as well
bool analyze_matrix(BlockedMatrix<Scalar> *mat,
BlockedMatrix<Scalar> *jacMat);
BlockedMatrix<Scalar> *jacMat) override;
/// ILU decomposition
/// \param[in] mat matrix A to decompose
@ -102,13 +99,12 @@ public:
#if HAVE_OPENCL
// apply preconditioner, x = prec(y)
void apply(const cl::Buffer& y,
cl::Buffer& x) {}
void apply(const cl::Buffer&, cl::Buffer&) override {}
#endif
/// Copy matrix A values to GPU
/// \param[in] mVals Input values
void copy_system_to_gpu(Scalar *mVals);
void copy_system_to_gpu(Scalar *mVals) override;
/// Reassign pointers, in case the addresses of the Dune variables have changed --> TODO: check when/if we need this method
// /// \param[in] vals New values
@ -117,7 +113,7 @@ public:
/// Update GPU values after a new assembly is done
/// \param[in] b New b vector
void update_system_on_gpu(Scalar *b);
void update_system_on_gpu(Scalar *b) override;
};
} // namespace Opm

View File

@ -95,14 +95,14 @@ analyze_matrix(BlockedMatrix<Scalar> *mat_) {
template <class Scalar, unsigned int block_size>
bool rocsparseCPR<Scalar, block_size>::
analyze_matrix(BlockedMatrix<Scalar> *mat_,
BlockedMatrix<Scalar> *jacMat)
BlockedMatrix<Scalar> *jacMat_)
{
this->Nb = mat_->Nb;
this->nnzb = mat_->nnzbs;
this->N = Nb * block_size;
this->nnz = nnzb * block_size * block_size;
bool success = bilu0->analyze_matrix(mat_, jacMat);
bool success = bilu0->analyze_matrix(mat_, jacMat_);
this->mat = mat_;
return success;
@ -111,10 +111,10 @@ analyze_matrix(BlockedMatrix<Scalar> *mat_,
template <class Scalar, unsigned int block_size>
bool rocsparseCPR<Scalar, block_size>::
create_preconditioner(BlockedMatrix<Scalar> *mat_,
BlockedMatrix<Scalar> *jacMat)
BlockedMatrix<Scalar> *jacMat_)
{
Dune::Timer t_bilu0;
bool result = bilu0->create_preconditioner(mat_, jacMat);
bool result = bilu0->create_preconditioner(mat_, jacMat_);
if (verbosity >= 3) {
std::ostringstream out;
out << "rocsparseCPR create_preconditioner bilu0(): " << t_bilu0.stop() << " s";
@ -223,8 +223,6 @@ amg_cycle_gpu(const int level,
RocmMatrix<Scalar> *A = &d_Amatrices[level];
RocmMatrix<Scalar> *R = &d_Rmatrices[level];
int Ncur = A->Nb;
Scalar zero = 0.0;
Scalar one = 1.0;
rocsparse_mat_info spmv_info;
rocsparse_mat_descr descr_R;
@ -324,7 +322,7 @@ apply(Scalar& y,
}
}
#define INSTANCE_TYPE(T) \
#define INSTANTIATE_TYPE(T) \
template class rocsparseCPR<T,1>; \
template class rocsparseCPR<T,2>; \
template class rocsparseCPR<T,3>; \
@ -332,7 +330,6 @@ apply(Scalar& y,
template class rocsparseCPR<T,5>; \
template class rocsparseCPR<T,6>;
INSTANCE_TYPE(double)
INSTANTIATE_TYPE(double)
} // namespace Opm

View File

@ -85,28 +85,28 @@ public:
bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
rocsparse_int *d_Arows,
rocsparse_int *d_Acols);
rocsparse_int *d_Acols) override;
/// Analysis, extract parallelism if specified
/// \param[in] mat matrix A
bool analyze_matrix(BlockedMatrix<Scalar> *mat);
bool analyze_matrix(BlockedMatrix<Scalar> *mat) override;
/// Analysis, extract parallelism if specified
/// \param[in] mat matrix A
/// \param[in] jacMat matrix for preconditioner, analyze this as well
bool analyze_matrix(BlockedMatrix<Scalar> *mat,
BlockedMatrix<Scalar> *jacMat);
BlockedMatrix<Scalar> *jacMat) override;
/// Create AMG preconditioner and perform ILU decomposition
/// \param[in] mat matrix A
bool create_preconditioner(BlockedMatrix<Scalar> *mat);
bool create_preconditioner(BlockedMatrix<Scalar> *mat) override;
/// Create AMG preconditioner and perform ILU decomposition
/// \param[in] mat matrix A
/// \param[in] jacMat matrix for preconditioner, decompose this one if used
bool create_preconditioner(BlockedMatrix<Scalar> *mat,
BlockedMatrix<Scalar> *jacMat);
BlockedMatrix<Scalar> *jacMat) override;
/// Apply preconditioner, x = prec(y)
/// applies blocked ilu0
@ -118,13 +118,12 @@ public:
#if HAVE_OPENCL
// apply preconditioner, x = prec(y)
void apply(const cl::Buffer& y,
cl::Buffer& x) {}
void apply(const cl::Buffer&, cl::Buffer&) override {}
#endif
/// Copy matrix A values to GPU
/// \param[in] mVals Input values
void copy_system_to_gpu(Scalar *b);
void copy_system_to_gpu(Scalar *b) override;
/// Reassign pointers, in case the addresses of the Dune variables have changed --> TODO: check when/if we need this method
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
@ -133,7 +132,7 @@ public:
/// Update linear system to GPU
/// \param[in] b input vector, contains N values
void update_system_on_gpu(Scalar *b);
void update_system_on_gpu(Scalar *b) override;
};
} // namespace Opm

View File

@ -38,38 +38,39 @@ create(PreconditionerType type,
return std::make_unique<Opm::Accelerator::rocsparseBILU0<Scalar, block_size> >(verbosity);
case PreconditionerType::CPR:
return std::make_unique<Opm::Accelerator::rocsparseCPR<Scalar, block_size> >(verbosity);
default:
OPM_THROW(std::logic_error,
"Invalid preconditioner type " + std::to_string(static_cast<int>(type)));
}
OPM_THROW(std::logic_error,
"Invalid preconditioner type " + std::to_string(static_cast<int>(type)));
}
template <class Scalar, unsigned int block_size>
void rocsparsePreconditioner<Scalar, block_size>::
set_matrix_analysis(rocsparse_mat_descr descr_L,
rocsparse_mat_descr descr_U)
set_matrix_analysis(rocsparse_mat_descr desc_L,
rocsparse_mat_descr desc_U)
{
descr_L = descr_L;
descr_U = descr_U;
descr_L = desc_L;
descr_U = desc_U;
}
template <class Scalar, unsigned int block_size>
void rocsparsePreconditioner<Scalar, block_size>::
set_context(rocsparse_handle handle,
rocsparse_direction dir,
rocsparse_operation operation,
hipStream_t stream)
set_context(rocsparse_handle handle_,
rocsparse_direction dir_,
rocsparse_operation operation_,
hipStream_t stream_)
{
this->handle = handle;
this->dir = dir;
this->operation = operation;
this->stream = stream;
this->handle = handle_;
this->dir = dir_;
this->operation = operation_;
this->stream = stream_;
}
template <class Scalar, unsigned int block_size>
void rocsparsePreconditioner<Scalar, block_size>::
setJacMat(BlockedMatrix<Scalar> jacMat) {
this->jacMat = std::make_shared<BlockedMatrix<Scalar>>(jacMat);
setJacMat(const BlockedMatrix<Scalar>& jMat)
{
this->jacMat = std::make_shared<BlockedMatrix<Scalar>>(jMat);
}
#define INSTANTIATE_TYPE(T) \

View File

@ -84,7 +84,7 @@ public:
rocsparse_operation operation,
hipStream_t stream);
void setJacMat(BlockedMatrix<Scalar> jacMat);
void setJacMat(const BlockedMatrix<Scalar>& jacMat);
};
} //namespace Opm

View File

@ -63,18 +63,16 @@ rocsparseSolverBackend(int verbosity_, int maxit_, Scalar tolerance_,
: Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
{
int numDevices = 0;
bool use_cpr, use_isai;
bool use_cpr;
if (linsolver.compare("ilu0") == 0) {
use_cpr = false;
use_isai = false;
} else if (linsolver.compare("cpr_quasiimpes") == 0) {
use_cpr = true;
use_isai = false;
} else if (linsolver.compare("isai") == 0) {
OPM_THROW(std::logic_error, "Error rocsparseSolver does not support --linerar-solver=isai");
OPM_THROW(std::logic_error, "Error rocsparseSolver does not support --linear-solver=isai");
} else if (linsolver.compare("cpr_trueimpes") == 0) {
OPM_THROW(std::logic_error, "Error rocsparseSolver does not support --linerar-solver=cpr_trueimpes");
OPM_THROW(std::logic_error, "Error rocsparseSolver does not support --linear-solver=cpr_trueimpes");
} else {
OPM_THROW(std::logic_error, "Error unknown value for argument --linear-solver, " + linsolver);
}
@ -100,11 +98,11 @@ rocsparseSolverBackend(int verbosity_, int maxit_, Scalar tolerance_,
ROCSPARSE_CHECK(rocsparse_set_stream(handle, stream));
ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream));
using PreconditionerType = typename Opm::Accelerator::PreconditionerType;
using PCType = typename Opm::Accelerator::PreconditionerType;
if (use_cpr) {
prec = rocsparsePreconditioner<Scalar, block_size>::create(PreconditionerType::CPR, verbosity);
prec = rocsparsePreconditioner<Scalar, block_size>::create(PCType::CPR, verbosity);
} else {
prec = rocsparsePreconditioner<Scalar, block_size>::create(PreconditionerType::BILU0, verbosity);
prec = rocsparsePreconditioner<Scalar, block_size>::create(PCType::BILU0, verbosity);
}
prec->set_context(handle, dir, operation, stream);