mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
cleanup and run fix
This commit is contained in:
parent
7a307fafa0
commit
dcbd9be46a
@ -47,7 +47,8 @@ CprCreation<Scalar, block_size>::CprCreation()
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void CprCreation<Scalar, block_size>::
|
||||
create_preconditioner_amg(BlockedMatrix<Scalar> *mat_) {
|
||||
create_preconditioner_amg(BlockedMatrix<Scalar> *mat_)
|
||||
{
|
||||
mat = mat_;
|
||||
cprNb = mat_->Nb;
|
||||
cprnnzb = mat_->nnzbs;
|
||||
@ -183,7 +184,9 @@ create_preconditioner_amg(BlockedMatrix<Scalar> *mat_) {
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void CprCreation<Scalar, block_size>::analyzeHierarchy() {
|
||||
void CprCreation<Scalar, block_size>::
|
||||
analyzeHierarchy()
|
||||
{
|
||||
const typename DuneAmg::ParallelMatrixHierarchy& matrixHierarchy = dune_amg->matrices();
|
||||
|
||||
// store coarsest AMG level in umfpack format, also performs LU decomposition
|
||||
@ -237,8 +240,9 @@ void CprCreation<Scalar, block_size>::analyzeHierarchy() {
|
||||
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void CprCreation<Scalar, block_size>::analyzeAggregateMaps() {
|
||||
|
||||
void CprCreation<Scalar, block_size>::
|
||||
analyzeAggregateMaps()
|
||||
{
|
||||
PcolIndices.resize(num_levels - 1);
|
||||
Rmatrices.clear();
|
||||
|
||||
|
@ -6,14 +6,16 @@
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
// divide A by B, and round up: return (int)ceil(A/B)
|
||||
unsigned int ceilDivision(const unsigned int A, const unsigned int B)
|
||||
unsigned int ceilDivision(const unsigned int A,
|
||||
const unsigned int B)
|
||||
{
|
||||
return A / B + (A % B > 0);
|
||||
}
|
||||
|
||||
// return the absolute value of the N elements for which the absolute value is highest
|
||||
template<class Scalar>
|
||||
Scalar get_absmax(const Scalar* data, const int N)
|
||||
Scalar get_absmax(const Scalar* data,
|
||||
const int N)
|
||||
{
|
||||
return std::abs(*std::max_element(data, data + N,
|
||||
[](Scalar a, Scalar b)
|
||||
@ -22,7 +24,9 @@ Scalar get_absmax(const Scalar* data, const int N)
|
||||
|
||||
// solve A^T * x = b
|
||||
template<class Scalar>
|
||||
void solve_transposed_3x3(const Scalar* A, const Scalar* b, Scalar* x)
|
||||
void solve_transposed_3x3(const Scalar* A,
|
||||
const Scalar* b,
|
||||
Scalar* x)
|
||||
{
|
||||
const int B = 3;
|
||||
// from dune-common/densematrix.hh, but transposed, so replace [r*B+c] with [r+c*B]
|
||||
|
@ -1,15 +1,58 @@
|
||||
#ifndef OPM_MISC_HPP
|
||||
#define OPM_MISC_HPP
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
#define HIP_CHECK(STAT) \
|
||||
do { \
|
||||
const hipError_t stat = (STAT); \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::hip "; \
|
||||
oss << "error: " << hipGetErrorString(stat); \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCSPARSE_CHECK(STAT) \
|
||||
do { \
|
||||
const rocsparse_status stat = (STAT); \
|
||||
if(stat != rocsparse_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::rocsparse "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCBLAS_CHECK(STAT) \
|
||||
do { \
|
||||
const rocblas_status stat = (STAT); \
|
||||
if(stat != rocblas_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::rocblas "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
unsigned int ceilDivision(const unsigned int A, const unsigned int B);
|
||||
unsigned int ceilDivision(const unsigned int A,
|
||||
const unsigned int B);
|
||||
|
||||
template<class Scalar>
|
||||
Scalar get_absmax(const Scalar *data, const int N);
|
||||
Scalar get_absmax(const Scalar *data,
|
||||
const int N);
|
||||
|
||||
template<class Scalar>
|
||||
void solve_transposed_3x3(const Scalar *A, const Scalar *b, Scalar *x);
|
||||
void solve_transposed_3x3(const Scalar *A,
|
||||
const Scalar *b,
|
||||
Scalar *x);
|
||||
|
||||
}
|
||||
|
||||
|
@ -28,43 +28,26 @@
|
||||
#include <opm/simulators/linalg/bda/rocm/hipKernels.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
#define HIP_CHECK(STAT) \
|
||||
do { \
|
||||
const hipError_t stat = (STAT); \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::hip "; \
|
||||
oss << "error: " << hipGetErrorString(stat); \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Opm {
|
||||
|
||||
using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
// define static variables and kernels
|
||||
int HipKernels::verbosity;
|
||||
double* HipKernels::tmp;
|
||||
bool HipKernels::initialized = false;
|
||||
std::size_t HipKernels::preferred_workgroup_size_multiple = 0;
|
||||
template<class Scalar> int HipKernels<Scalar>::verbosity = 0;
|
||||
template<class Scalar> bool HipKernels<Scalar>::initialized = false;
|
||||
|
||||
#ifdef __HIP__
|
||||
/// HIP kernel to multiply vector with another vector and a scalar, element-wise
|
||||
// add result to a third vector
|
||||
__global__ void vmul_k(
|
||||
const double alpha,
|
||||
double const *in1,
|
||||
double const *in2,
|
||||
double *out,
|
||||
const int N)
|
||||
template<class Scalar>
|
||||
__global__ void vmul_k(const Scalar alpha,
|
||||
Scalar const *in1,
|
||||
Scalar const *in2,
|
||||
Scalar *out,
|
||||
const int N)
|
||||
{
|
||||
unsigned int NUM_THREADS = gridDim.x;
|
||||
int idx = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
@ -77,18 +60,18 @@ __global__ void vmul_k(
|
||||
|
||||
/// HIP kernel to transform blocked vector to scalar vector using pressure-weights
|
||||
// every workitem handles one blockrow
|
||||
__global__ void full_to_pressure_restriction_k(
|
||||
const double *fine_y,
|
||||
const double *weights,
|
||||
double *coarse_y,
|
||||
const unsigned int Nb)
|
||||
template<class Scalar>
|
||||
__global__ void full_to_pressure_restriction_k(const Scalar *fine_y,
|
||||
const Scalar *weights,
|
||||
Scalar *coarse_y,
|
||||
const unsigned int Nb)
|
||||
{
|
||||
const unsigned int NUM_THREADS = gridDim.x;
|
||||
const unsigned int block_size = 3;
|
||||
unsigned int target_block_row = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
|
||||
while(target_block_row < Nb){
|
||||
double sum = 0.0;
|
||||
Scalar sum = 0.0;
|
||||
unsigned int idx = block_size * target_block_row;
|
||||
for (unsigned int i = 0; i < block_size; ++i) {
|
||||
sum += fine_y[idx + i] * weights[idx + i];
|
||||
@ -100,11 +83,11 @@ __global__ void full_to_pressure_restriction_k(
|
||||
|
||||
/// HIP kernel to add the coarse pressure solution back to the finer, complete solution
|
||||
// every workitem handles one blockrow
|
||||
__global__ void add_coarse_pressure_correction_k(
|
||||
const double *coarse_x,
|
||||
double *fine_x,
|
||||
const unsigned int pressure_idx,
|
||||
const unsigned int Nb)
|
||||
template<class Scalar>
|
||||
__global__ void add_coarse_pressure_correction_k(const Scalar *coarse_x,
|
||||
Scalar *fine_x,
|
||||
const unsigned int pressure_idx,
|
||||
const unsigned int Nb)
|
||||
{
|
||||
const unsigned int NUM_THREADS = gridDim.x;
|
||||
const unsigned int block_size = 3;
|
||||
@ -118,11 +101,11 @@ __global__ void add_coarse_pressure_correction_k(
|
||||
|
||||
/// HIP kernel to prolongate vector during amg cycle
|
||||
// every workitem handles one row
|
||||
__global__ void prolongate_vector_k(
|
||||
const double *in,
|
||||
double *out,
|
||||
const int *cols,
|
||||
const unsigned int N)
|
||||
template<class Scalar>
|
||||
__global__ void prolongate_vector_k(const Scalar *in,
|
||||
Scalar *out,
|
||||
const int *cols,
|
||||
const unsigned int N)
|
||||
{
|
||||
const unsigned int NUM_THREADS = gridDim.x;
|
||||
unsigned int row = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
@ -137,18 +120,17 @@ __global__ void prolongate_vector_k(
|
||||
// algorithm based on:
|
||||
// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
|
||||
// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
|
||||
__global__ void residual_blocked_k(
|
||||
const double *vals,
|
||||
const int *cols,
|
||||
const int *rows,
|
||||
const int Nb,
|
||||
const double *x,
|
||||
const double *rhs,
|
||||
double *out,
|
||||
const unsigned int block_size
|
||||
)
|
||||
template<class Scalar>
|
||||
__global__ void residual_blocked_k(const Scalar *vals,
|
||||
const int *cols,
|
||||
const int *rows,
|
||||
const int Nb,
|
||||
const Scalar *x,
|
||||
const Scalar *rhs,
|
||||
Scalar *out,
|
||||
const unsigned int block_size)
|
||||
{
|
||||
extern __shared__ double tmp[];
|
||||
extern __shared__ Scalar tmp[];
|
||||
const unsigned int warpsize = warpSize;
|
||||
const unsigned int bsize = blockDim.x;
|
||||
const unsigned int gid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
@ -174,12 +156,12 @@ __global__ void residual_blocked_k(
|
||||
unsigned int first_block = rows[target_block_row];
|
||||
unsigned int last_block = rows[target_block_row+1];
|
||||
unsigned int block = first_block + lane / (bs*bs);
|
||||
double local_out = 0.0;
|
||||
Scalar local_out = 0.0;
|
||||
|
||||
if(lane < num_active_threads){
|
||||
for(; block < last_block; block += num_blocks_per_warp){
|
||||
double x_elem = x[cols[block]*bs + c];
|
||||
double A_elem = vals[block*bs*bs + c + r*bs];
|
||||
Scalar x_elem = x[cols[block]*bs + c];
|
||||
Scalar A_elem = vals[block*bs*bs + c + r*bs];
|
||||
local_out += x_elem * A_elem;
|
||||
}
|
||||
}
|
||||
@ -210,17 +192,16 @@ __global__ void residual_blocked_k(
|
||||
// Optimization of Block Sparse Matrix-Vector Multiplication on Shared-MemoryParallel Architectures,
|
||||
// Ryan Eberhardt, Mark Hoemmen, 2016, https://doi.org/10.1109/IPDPSW.2016.42
|
||||
// template <unsigned shared_mem_size>
|
||||
__global__ void residual_k(
|
||||
const double *vals,
|
||||
const int *cols,
|
||||
const int *rows,
|
||||
const int N,
|
||||
const double *x,
|
||||
const double *rhs,
|
||||
double *out
|
||||
)
|
||||
template<class Scalar>
|
||||
__global__ void residual_k(const Scalar *vals,
|
||||
const int *cols,
|
||||
const int *rows,
|
||||
const int N,
|
||||
const Scalar *x,
|
||||
const Scalar *rhs,
|
||||
Scalar *out)
|
||||
{
|
||||
extern __shared__ double tmp[];
|
||||
extern __shared__ Scalar tmp[];
|
||||
const unsigned int bsize = blockDim.x;
|
||||
const unsigned int gid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const unsigned int idx_b = gid / bsize;
|
||||
@ -233,7 +214,7 @@ __global__ void residual_k(
|
||||
int rowStart = rows[row];
|
||||
int rowEnd = rows[row+1];
|
||||
int rowLength = rowEnd - rowStart;
|
||||
double local_sum = 0.0;
|
||||
Scalar local_sum = 0.0;
|
||||
for (int j = rowStart + idx_t; j < rowEnd; j += bsize) {
|
||||
int col = cols[j];
|
||||
local_sum += vals[j] * x[col];
|
||||
@ -259,16 +240,15 @@ __global__ void residual_k(
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void spmv_k(
|
||||
const double *vals,
|
||||
const int *cols,
|
||||
const int *rows,
|
||||
const int N,
|
||||
const double *x,
|
||||
double *out
|
||||
)
|
||||
template<class Scalar>
|
||||
__global__ void spmv_k(const Scalar *vals,
|
||||
const int *cols,
|
||||
const int *rows,
|
||||
const int N,
|
||||
const Scalar *x,
|
||||
Scalar *out)
|
||||
{
|
||||
extern __shared__ double tmp[];
|
||||
extern __shared__ Scalar tmp[];
|
||||
const unsigned int bsize = blockDim.x;
|
||||
const unsigned int gid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const unsigned int idx_b = gid / bsize;
|
||||
@ -281,7 +261,7 @@ __global__ void spmv_k(
|
||||
int rowStart = rows[row];
|
||||
int rowEnd = rows[row+1];
|
||||
int rowLength = rowEnd - rowStart;
|
||||
double local_sum = 0.0;
|
||||
Scalar local_sum = 0.0;
|
||||
for (int j = rowStart + idx_t; j < rowEnd; j += bsize) {
|
||||
int col = cols[j];
|
||||
local_sum += vals[j] * x[col];
|
||||
@ -308,7 +288,9 @@ __global__ void spmv_k(
|
||||
}
|
||||
#endif
|
||||
|
||||
void HipKernels::init(int verbosity_)
|
||||
template<class Scalar>
|
||||
void HipKernels<Scalar>::
|
||||
init(int verbosity_)
|
||||
{
|
||||
if (initialized) {
|
||||
OpmLog::debug("Warning HipKernels is already initialized");
|
||||
@ -320,7 +302,14 @@ void HipKernels::init(int verbosity_)
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
void HipKernels::vmul(const double alpha, double* in1, double* in2, double* out, int N, hipStream_t stream)
|
||||
template<class Scalar>
|
||||
void HipKernels<Scalar>::
|
||||
vmul(const Scalar alpha,
|
||||
Scalar* in1,
|
||||
Scalar* in2,
|
||||
Scalar* out,
|
||||
int N,
|
||||
hipStream_t stream)
|
||||
{
|
||||
Timer t_vmul;
|
||||
#ifdef __HIP__
|
||||
@ -342,7 +331,13 @@ void HipKernels::vmul(const double alpha, double* in1, double* in2, double* out,
|
||||
}
|
||||
}
|
||||
|
||||
void HipKernels::full_to_pressure_restriction(const double* fine_y, double* weights, double* coarse_y, int Nb, hipStream_t stream)
|
||||
template<class Scalar>
|
||||
void HipKernels<Scalar>::
|
||||
full_to_pressure_restriction(const Scalar* fine_y,
|
||||
Scalar* weights,
|
||||
Scalar* coarse_y,
|
||||
int Nb,
|
||||
hipStream_t stream)
|
||||
{
|
||||
Timer t;
|
||||
#ifdef __HIP__
|
||||
@ -364,7 +359,13 @@ void HipKernels::full_to_pressure_restriction(const double* fine_y, double* weig
|
||||
}
|
||||
}
|
||||
|
||||
void HipKernels::add_coarse_pressure_correction(double* coarse_x, double* fine_x, int pressure_idx, int Nb, hipStream_t stream)
|
||||
template<class Scalar>
|
||||
void HipKernels<Scalar>::
|
||||
add_coarse_pressure_correction(Scalar* coarse_x,
|
||||
Scalar* fine_x,
|
||||
int pressure_idx,
|
||||
int Nb,
|
||||
hipStream_t stream)
|
||||
{
|
||||
Timer t;
|
||||
#ifdef __HIP__
|
||||
@ -386,7 +387,13 @@ void HipKernels::add_coarse_pressure_correction(double* coarse_x, double* fine_x
|
||||
}
|
||||
}
|
||||
|
||||
void HipKernels::prolongate_vector(const double* in, double* out, const int* cols, int N, hipStream_t stream)
|
||||
template<class Scalar>
|
||||
void HipKernels<Scalar>::
|
||||
prolongate_vector(const Scalar* in,
|
||||
Scalar* out,
|
||||
const int* cols,
|
||||
int N,
|
||||
hipStream_t stream)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
@ -394,7 +401,7 @@ void HipKernels::prolongate_vector(const double* in, double* out, const int* col
|
||||
unsigned blockDim = 64;
|
||||
unsigned blocks = Accelerator::ceilDivision(N, blockDim);
|
||||
unsigned gridDim = blocks * blockDim;
|
||||
unsigned shared_mem_size = blockDim * sizeof(double);
|
||||
unsigned shared_mem_size = blockDim * sizeof(Scalar);
|
||||
|
||||
// dim3(N) will create a vector {N, 1, 1}
|
||||
prolongate_vector_k<<<dim3(gridDim), dim3(blockDim), shared_mem_size, stream>>>(in, out, cols, N);
|
||||
@ -410,7 +417,17 @@ void HipKernels::prolongate_vector(const double* in, double* out, const int* col
|
||||
}
|
||||
}
|
||||
|
||||
void HipKernels::residual(double* vals, int* cols, int* rows, double* x, const double* rhs, double* out, int Nb, unsigned int block_size, hipStream_t stream)
|
||||
template<class Scalar>
|
||||
void HipKernels<Scalar>::
|
||||
residual(Scalar* vals,
|
||||
int* cols,
|
||||
int* rows,
|
||||
Scalar* x,
|
||||
const Scalar* rhs,
|
||||
Scalar* out,
|
||||
int Nb,
|
||||
unsigned int block_size,
|
||||
hipStream_t stream)
|
||||
{
|
||||
Timer t_residual;
|
||||
|
||||
@ -418,7 +435,7 @@ void HipKernels::residual(double* vals, int* cols, int* rows, double* x, const d
|
||||
unsigned blockDim = 64;
|
||||
const unsigned int num_work_groups = Accelerator::ceilDivision(Nb, blockDim);
|
||||
unsigned gridDim = num_work_groups * blockDim;
|
||||
unsigned shared_mem_size = blockDim * sizeof(double);
|
||||
unsigned shared_mem_size = blockDim * sizeof(Scalar);
|
||||
|
||||
if (block_size > 1) {
|
||||
// dim3(N) will create a vector {N, 1, 1}
|
||||
@ -439,14 +456,23 @@ void HipKernels::residual(double* vals, int* cols, int* rows, double* x, const d
|
||||
}
|
||||
}
|
||||
|
||||
void HipKernels::spmv(double* vals, int* cols, int* rows, double* x, double* y, int Nb, unsigned int block_size, hipStream_t stream)
|
||||
template<class Scalar>
|
||||
void HipKernels<Scalar>::
|
||||
spmv(Scalar* vals,
|
||||
int* cols,
|
||||
int* rows,
|
||||
Scalar* x,
|
||||
Scalar* y,
|
||||
int Nb,
|
||||
unsigned int block_size,
|
||||
hipStream_t stream)
|
||||
{//NOTE: block_size not used since I use this kernel only for block sizes 1, other uses use rocsparse!
|
||||
Timer t_spmv;
|
||||
#ifdef __HIP__
|
||||
unsigned blockDim = 64;
|
||||
const unsigned int num_work_groups = Accelerator::ceilDivision(Nb, blockDim);
|
||||
unsigned gridDim = num_work_groups * blockDim;
|
||||
unsigned shared_mem_size = blockDim * sizeof(double);
|
||||
unsigned shared_mem_size = blockDim * sizeof(Scalar);
|
||||
|
||||
spmv_k<<<dim3(gridDim), dim3(blockDim), shared_mem_size, stream>>>(vals, cols, rows, Nb, x, y);
|
||||
|
||||
@ -462,5 +488,6 @@ void HipKernels::spmv(double* vals, int* cols, int* rows, double* x, double* y,
|
||||
}
|
||||
}
|
||||
|
||||
template class HipKernels<double>;
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -17,8 +17,8 @@
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef HIPKERNELS_HPP
|
||||
#define HIPKERNELS_HPP
|
||||
#ifndef OPM_HIPKERNELS_HPP
|
||||
#define OPM_HIPKERNELS_HPP
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
@ -27,29 +27,110 @@
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
// #include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Opm {
|
||||
|
||||
template<class Scalar>
|
||||
class HipKernels
|
||||
{
|
||||
private:
|
||||
static int verbosity;
|
||||
static double* tmp; // used as tmp CPU buffer for dot() and norm()
|
||||
static int verbosity;
|
||||
static bool initialized;
|
||||
static std::size_t preferred_workgroup_size_multiple; // stores CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
|
||||
|
||||
HipKernels(){}; // disable instantiation
|
||||
HipKernels();
|
||||
|
||||
public:
|
||||
/// Initialize verbosity level for the HIP kernels
|
||||
/// \param[in] verbosity verbosity level
|
||||
static void init(int verbosity);
|
||||
static void full_to_pressure_restriction(const double* fine_y, double* weights, double* coarse_y, int Nb, hipStream_t stream);
|
||||
static void add_coarse_pressure_correction(double* coarse_x, double* fine_x, int pressure_idx, int Nb, hipStream_t stream);
|
||||
static void vmul(const double alpha, double* in1, double* in2, double* out, int N, hipStream_t stream);
|
||||
static void prolongate_vector(const double* in, double* out, const int* cols, int N, hipStream_t stream);
|
||||
static void residual(double* vals, int* cols, int* rows, double* x, const double* rhs, double* out, int Nb, unsigned int block_size, hipStream_t stream);
|
||||
static void spmv(double* vals, int* cols, int* rows, double* x, double* y, int Nb, unsigned int block_size, hipStream_t stream);
|
||||
|
||||
/// Transform blocked vector to scalar vector using pressure-weights, where every workitem handles one blockrow
|
||||
/// \param[in] fine_y Input y vector
|
||||
/// \param[in] weights Weights used to combine cells
|
||||
/// \param[out] course_y Output y vector
|
||||
/// \param[in] Nb Number of blocks in the original matrix
|
||||
/// \param[in] stream Hip stream to use for the computations
|
||||
static void full_to_pressure_restriction(const Scalar* fine_y,
|
||||
Scalar* weights,
|
||||
Scalar* coarse_y,
|
||||
int Nb,
|
||||
hipStream_t stream);
|
||||
|
||||
/// Add the coarse pressure solution back to the finer, complete solution; every workitem handles one blockrow
|
||||
/// \param[in] coarse_x Input scalar x vector
|
||||
/// \param[out] fine_x Output blocked x vector
|
||||
/// \param[in] pressure_idx Pressure index
|
||||
/// \param[in] Nb Number of blocks in the original matrix
|
||||
/// \param[in] stream Hip stream to use for the computations
|
||||
static void add_coarse_pressure_correction(Scalar* coarse_x,
|
||||
Scalar* fine_x,
|
||||
int pressure_idx,
|
||||
int Nb,
|
||||
hipStream_t stream);
|
||||
|
||||
|
||||
/// Function to multiply vector with another vector and a scalar, element-wise and add the result to a third vector (out = alpha * in1 + in2)
|
||||
/// \param[in] alpha Input scalar
|
||||
/// \param[in] in1 First input vector
|
||||
/// \param[in] in2 Second input vector
|
||||
/// \param[out] out Output vector
|
||||
/// \param[in] N Size of the vector
|
||||
/// \param[in] stream Hip stream to use for the computations
|
||||
static void vmul(const Scalar alpha,
|
||||
Scalar* in1,
|
||||
Scalar* in2,
|
||||
Scalar* out,
|
||||
int N,
|
||||
hipStream_t stream);
|
||||
|
||||
/// Function to prolongate vector during amg cycle, every workitem handles one row
|
||||
/// \param[in] in Input fine-grained vector
|
||||
/// \param[out] out Output course-graned vector
|
||||
/// \param[in] cols Column indexes
|
||||
/// \param[in] N Size of the vector
|
||||
/// \param[in] stream Hip stream to use for the computations
|
||||
static void prolongate_vector(const Scalar* in,
|
||||
Scalar* out,
|
||||
const int* cols,
|
||||
int N,
|
||||
hipStream_t stream);
|
||||
|
||||
/// Function to perform res = rhs - mat * x
|
||||
/// \param[in] vals Matrix values
|
||||
/// \param[in] cols Column indexes
|
||||
/// \param[in] rows Row pointers
|
||||
/// \param[in] x X vector
|
||||
/// \param[in] rhs Rhs vector
|
||||
/// \param[out] out Output res vector
|
||||
/// \param[in] Nb Number of non-zero blocks in the original matrix
|
||||
/// \param[in] block_size Block size
|
||||
/// \param[in] stream Hip stream to use for the computations
|
||||
static void residual(Scalar* vals,
|
||||
int* cols,
|
||||
int* rows,
|
||||
Scalar* x,
|
||||
const Scalar* rhs,
|
||||
Scalar* out,
|
||||
int Nb,
|
||||
unsigned int block_size,
|
||||
hipStream_t stream);
|
||||
|
||||
/// Function to perform sparse matrix vector multipliation
|
||||
/// \param[in] vals Matrix values
|
||||
/// \param[in] cols Column indexes
|
||||
/// \param[in] rows Row pointers
|
||||
/// \param[in] x Input x vector
|
||||
/// \param[out] y Output y vector
|
||||
/// \param[in] Nb Number of non-zero blocks in the original matrix
|
||||
/// \param[in] block_size Block size
|
||||
/// \param[in] stream Hip stream to use for the computations
|
||||
static void spmv(Scalar* vals,
|
||||
int* cols,
|
||||
int* rows,
|
||||
Scalar* x,
|
||||
Scalar* y,
|
||||
int Nb,
|
||||
unsigned int block_size,
|
||||
hipStream_t stream);
|
||||
};
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -26,48 +26,11 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
|
||||
#include <opm/simulators/linalg/bda/Reorder.hpp>
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <iostream> //Razvan
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#define HIP_CHECK(STAT) \
|
||||
do { \
|
||||
const hipError_t stat = (STAT); \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseBILU0::hip "; \
|
||||
oss << "error: " << hipGetErrorString(stat); \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCSPARSE_CHECK(STAT) \
|
||||
do { \
|
||||
const rocsparse_status stat = (STAT); \
|
||||
if(stat != rocsparse_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseBILU0::rocsparse "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCBLAS_CHECK(STAT) \
|
||||
do { \
|
||||
const rocblas_status stat = (STAT); \
|
||||
if(stat != rocblas_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseBILU0::rocblas "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <thread>
|
||||
#include <omp.h>
|
||||
@ -88,7 +51,11 @@ rocsparseBILU0(int verbosity_) :
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols) {
|
||||
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||
rocsparse_int *d_Arows,
|
||||
rocsparse_int *d_Acols)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
@ -101,14 +68,14 @@ initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<Blocke
|
||||
this->jacMat = jacMatrix;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(double) * this->N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(Scalar) * this->N));
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * this->nnzbs_prec));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * this->nnzbs_prec * block_size * block_size));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size));
|
||||
} else { // preconditioner matrix is same
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * this->nnzbs_prec * block_size * block_size));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size));
|
||||
d_Mcols = d_Acols;
|
||||
d_Mrows = d_Arows;
|
||||
}
|
||||
@ -125,10 +92,13 @@ analyze_matrix(BlockedMatrix<Scalar> *mat) {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
analyze_matrix(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) {
|
||||
analyze_matrix(BlockedMatrix<Scalar> *mat,
|
||||
BlockedMatrix<Scalar> *jacMat)
|
||||
{
|
||||
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||
Timer t;
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_info(&ilu_info));
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_info(&spmv_info));
|
||||
#endif
|
||||
@ -193,7 +163,9 @@ create_preconditioner(BlockedMatrix<Scalar> *mat) {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) {
|
||||
create_preconditioner(BlockedMatrix<Scalar> *mat,
|
||||
BlockedMatrix<Scalar> *jacMat)
|
||||
{
|
||||
Timer t;
|
||||
bool result = true;
|
||||
|
||||
@ -219,7 +191,7 @@ create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat)
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
copy_system_to_gpu(double *d_Avals) {
|
||||
copy_system_to_gpu(Scalar *d_Avals) {
|
||||
Timer t;
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
@ -230,9 +202,9 @@ copy_system_to_gpu(double *d_Avals) {
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, this->jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, this->jacMat->colIndices, sizeof(rocsparse_int) * this->nnzbs_prec, hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(double) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
@ -246,7 +218,7 @@ copy_system_to_gpu(double *d_Avals) {
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
update_system_on_gpu(double *d_Avals) {
|
||||
update_system_on_gpu(Scalar *d_Avals) {
|
||||
Timer t;
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
@ -255,9 +227,9 @@ update_system_on_gpu(double *d_Avals) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(double) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
@ -270,9 +242,9 @@ update_system_on_gpu(double *d_Avals) {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
apply(double& y, double& x) {
|
||||
double zero = 0.0;
|
||||
double one = 1.0;
|
||||
apply(Scalar& y, Scalar& x) {
|
||||
Scalar zero = 0.0;
|
||||
Scalar one = 1.0;
|
||||
|
||||
Timer t_apply;
|
||||
|
||||
|
@ -57,7 +57,7 @@ private:
|
||||
#endif
|
||||
|
||||
rocsparse_int *d_Mrows, *d_Mcols;
|
||||
double *d_Mvals, *d_t;
|
||||
Scalar *d_Mvals, *d_t;
|
||||
void *d_buffer; // buffer space, used by rocsparse ilu0 analysis
|
||||
|
||||
public:
|
||||
@ -67,29 +67,57 @@ public:
|
||||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols);
|
||||
bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||
rocsparse_int *d_Arows,
|
||||
rocsparse_int *d_Acols);
|
||||
|
||||
// analysis, extract parallelism if specified
|
||||
/// Analysis, extract parallelism if specified
|
||||
/// \param[in] mat matrix A
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat);
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat);
|
||||
|
||||
/// Analysis, extract parallelism if specified
|
||||
/// \param[in] mat matrix A
|
||||
/// \param[in] jacMat matrix for preconditioner, analyze this as well
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat,
|
||||
BlockedMatrix<Scalar> *jacMat);
|
||||
|
||||
// ilu_decomposition
|
||||
/// ILU decomposition
|
||||
/// \param[in] mat matrix A to decompose
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) override;
|
||||
|
||||
/// ILU decomposition
|
||||
/// \param[in] mat matrix A
|
||||
/// \param[in] jacMat matrix for preconditioner, decompose this one if used
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat,
|
||||
BlockedMatrix<Scalar> *jacMat) override;
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
// via Lz = y
|
||||
// and Ux = z
|
||||
void apply(double& y, double& x) override;
|
||||
/// Apply preconditioner, x = prec(y)
|
||||
/// via Lz = y
|
||||
/// and Ux = z
|
||||
/// \param[in] y Input y vector
|
||||
/// \param[out] x Output x vector
|
||||
void apply(Scalar& y,
|
||||
Scalar& x) override;
|
||||
|
||||
#if HAVE_OPENCL
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) {}
|
||||
void apply(const cl::Buffer& y,
|
||||
cl::Buffer& x) {}
|
||||
#endif
|
||||
|
||||
void copy_system_to_gpu(double *mVals);
|
||||
void update_system(double *vals, double *b);
|
||||
void update_system_on_gpu(double *b);
|
||||
/// Copy matrix A values to GPU
|
||||
/// \param[in] mVals Input values
|
||||
void copy_system_to_gpu(Scalar *mVals);
|
||||
|
||||
/// Reassign pointers, in case the addresses of the Dune variables have changed --> TODO: check when/if we need this method
|
||||
// /// \param[in] vals New values
|
||||
// /// \param[in] b New b vector
|
||||
// void update_system(Scalar *vals, Scalar *b);
|
||||
|
||||
/// Update GPU values after a new assembly is done
|
||||
/// \param[in] b New b vector
|
||||
void update_system_on_gpu(Scalar *b);
|
||||
};
|
||||
} // namespace Opm
|
||||
|
||||
|
@ -35,45 +35,6 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
#define HIP_CHECK(STAT) \
|
||||
do { \
|
||||
const hipError_t stat = (STAT); \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::hip "; \
|
||||
oss << "error: " << hipGetErrorString(stat); \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCSPARSE_CHECK(STAT) \
|
||||
do { \
|
||||
const rocsparse_status stat = (STAT); \
|
||||
if(stat != rocsparse_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::rocsparse "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCBLAS_CHECK(STAT) \
|
||||
do { \
|
||||
const rocblas_status stat = (STAT); \
|
||||
if(stat != rocblas_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::rocblas "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
using Opm::OpmLog;
|
||||
@ -88,7 +49,11 @@ rocsparseCPR<Scalar, block_size>::rocsparseCPR(int verbosity_) :
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseCPR<Scalar, block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols) {
|
||||
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||
rocsparse_int *d_Arows,
|
||||
rocsparse_int *d_Acols)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
this->N = Nb * block_size;
|
||||
@ -129,7 +94,9 @@ analyze_matrix(BlockedMatrix<Scalar> *mat_) {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseCPR<Scalar, block_size>::
|
||||
analyze_matrix(BlockedMatrix<Scalar> *mat_, BlockedMatrix<Scalar> *jacMat) {
|
||||
analyze_matrix(BlockedMatrix<Scalar> *mat_,
|
||||
BlockedMatrix<Scalar> *jacMat)
|
||||
{
|
||||
this->Nb = mat_->Nb;
|
||||
this->nnzb = mat_->nnzbs;
|
||||
this->N = Nb * block_size;
|
||||
@ -142,7 +109,10 @@ analyze_matrix(BlockedMatrix<Scalar> *mat_, BlockedMatrix<Scalar> *jacMat) {
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseCPR<Scalar, block_size>::create_preconditioner(BlockedMatrix<Scalar> *mat_, BlockedMatrix<Scalar> *jacMat) {
|
||||
bool rocsparseCPR<Scalar, block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar> *mat_,
|
||||
BlockedMatrix<Scalar> *jacMat)
|
||||
{
|
||||
Dune::Timer t_bilu0;
|
||||
bool result = bilu0->create_preconditioner(mat_, jacMat);
|
||||
if (verbosity >= 3) {
|
||||
@ -205,7 +175,7 @@ init_rocm_buffers() {
|
||||
d_Rmatrices.reserve(this->num_levels - 1);
|
||||
d_invDiags.reserve(this->num_levels - 1);
|
||||
for (Matrix<Scalar>& m : this->Amatrices) {
|
||||
d_Amatrices.emplace_back(m.N, m.M, m.nnzs, 1);//TOCHECK: this memory might not be contigous!!
|
||||
d_Amatrices.emplace_back(m.N, m.M, m.nnzs, 1);
|
||||
}
|
||||
|
||||
for (Matrix<Scalar>& m : this->Rmatrices) {
|
||||
@ -246,7 +216,10 @@ rocm_upload() {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseCPR<Scalar, block_size>::
|
||||
amg_cycle_gpu(const int level, Scalar &y, Scalar &x) {
|
||||
amg_cycle_gpu(const int level,
|
||||
Scalar &y,
|
||||
Scalar &x)
|
||||
{
|
||||
RocmMatrix<Scalar> *A = &d_Amatrices[level];
|
||||
RocmMatrix<Scalar> *R = &d_Rmatrices[level];
|
||||
int Ncur = A->Nb;
|
||||
@ -281,52 +254,56 @@ amg_cycle_gpu(const int level, Scalar &y, Scalar &x) {
|
||||
// presmooth
|
||||
Scalar jacobi_damping = 0.65; // default value in amgcl: 0.72
|
||||
for (unsigned i = 0; i < this->num_pre_smooth_steps; ++i){
|
||||
HipKernels::residual(A->nnzValues, A->colIndices, A->rowPointers, &x, &y, t.nnzValues, Ncur, 1, this->stream);
|
||||
HipKernels::vmul(jacobi_damping, d_invDiags[level].nnzValues, t.nnzValues, &x, Ncur, this->stream);
|
||||
HipKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, &x, &y, t.nnzValues, Ncur, 1, this->stream);
|
||||
HipKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level].nnzValues, t.nnzValues, &x, Ncur, this->stream);
|
||||
}
|
||||
|
||||
// move to coarser level
|
||||
HipKernels::residual(A->nnzValues, A->colIndices, A->rowPointers, &x, &y, t.nnzValues, Ncur, 1, this->stream);
|
||||
HipKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, &x, &y, t.nnzValues, Ncur, 1, this->stream);
|
||||
|
||||
// TODO: understand why rocsparse spmv library function does not here.
|
||||
// ROCSPARSE_CHECK(rocsparse_dbsrmv(this->handle, this->dir, this->operation,
|
||||
// R->Nb, R->Mb, R->nnzbs, &one, descr_R,
|
||||
// R->nnzValues, R->rowPointers, R->colIndices, 1,
|
||||
// t.nnzValues, &zero, f.nnzValues));
|
||||
HipKernels::spmv(R->nnzValues, R->colIndices, R->rowPointers, t.nnzValues, f.nnzValues, Nnext, 1, this->stream);
|
||||
HipKernels<Scalar>::spmv(R->nnzValues, R->colIndices, R->rowPointers, t.nnzValues, f.nnzValues, Nnext, 1, this->stream);
|
||||
|
||||
amg_cycle_gpu(level + 1, *f.nnzValues, *u.nnzValues);
|
||||
HipKernels::prolongate_vector(u.nnzValues, &x, d_PcolIndices[level].nnzValues, Ncur, this->stream);
|
||||
HipKernels<Scalar>::prolongate_vector(u.nnzValues, &x, d_PcolIndices[level].nnzValues, Ncur, this->stream);
|
||||
|
||||
// postsmooth
|
||||
for (unsigned i = 0; i < this->num_post_smooth_steps; ++i){
|
||||
HipKernels::residual(A->nnzValues, A->colIndices, A->rowPointers, &x, &y, t.nnzValues, Ncur, 1, this->stream);
|
||||
HipKernels::vmul(jacobi_damping, d_invDiags[level].nnzValues, t.nnzValues, &x, Ncur, this->stream);
|
||||
HipKernels<Scalar>::residual(A->nnzValues, A->colIndices, A->rowPointers, &x, &y, t.nnzValues, Ncur, 1, this->stream);
|
||||
HipKernels<Scalar>::vmul(jacobi_damping, d_invDiags[level].nnzValues, t.nnzValues, &x, Ncur, this->stream);
|
||||
}
|
||||
}
|
||||
|
||||
// x = prec(y)
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseCPR<Scalar, block_size>::
|
||||
apply_amg(const Scalar& y, Scalar& x) {
|
||||
apply_amg(const Scalar& y,
|
||||
Scalar& x)
|
||||
{
|
||||
HIP_CHECK(hipMemsetAsync(d_coarse_x.data()->nnzValues, 0, sizeof(Scalar) * this->Nb, this->stream));
|
||||
|
||||
for (unsigned int i = 0; i < d_u.size(); ++i) {
|
||||
d_u[i].upload(this->Rmatrices[i].nnzValues.data(), this->stream);
|
||||
}
|
||||
|
||||
HipKernels::residual(d_mat->nnzValues, d_mat->colIndices, d_mat->rowPointers, &x, &y, d_rs.data()->nnzValues, this->Nb, block_size, this->stream);
|
||||
HipKernels<Scalar>::residual(d_mat->nnzValues, d_mat->colIndices, d_mat->rowPointers, &x, &y, d_rs.data()->nnzValues, this->Nb, block_size, this->stream);
|
||||
|
||||
HipKernels::full_to_pressure_restriction(d_rs.data()->nnzValues, d_weights.data()->nnzValues, d_coarse_y.data()->nnzValues, Nb, this->stream);
|
||||
HipKernels<Scalar>::full_to_pressure_restriction(d_rs.data()->nnzValues, d_weights.data()->nnzValues, d_coarse_y.data()->nnzValues, Nb, this->stream);
|
||||
|
||||
amg_cycle_gpu(0, *(d_coarse_y.data()->nnzValues), *(d_coarse_x.data()->nnzValues));
|
||||
|
||||
HipKernels::add_coarse_pressure_correction(d_coarse_x.data()->nnzValues, &x, this->pressure_idx, Nb, this->stream);
|
||||
HipKernels<Scalar>::add_coarse_pressure_correction(d_coarse_x.data()->nnzValues, &x, this->pressure_idx, Nb, this->stream);
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseCPR<Scalar, block_size>::
|
||||
apply(Scalar& y, Scalar& x) {
|
||||
apply(Scalar& y,
|
||||
Scalar& x)
|
||||
{
|
||||
Dune::Timer t_bilu0;
|
||||
|
||||
bilu0->apply(y, x);
|
||||
|
@ -64,25 +64,69 @@ private:
|
||||
std::unique_ptr<rocsparseSolverBackend<Scalar, 1> > coarse_solver; // coarse solver is scalar
|
||||
|
||||
// Initialize and allocate matrices and vectors
|
||||
void init_rocm_buffers();//TODO: rename to rocm/c and update in code
|
||||
void init_rocm_buffers();
|
||||
|
||||
// Copy matrices and vectors to GPU
|
||||
void rocm_upload();//TODO: rename to rocm/c and update in code
|
||||
void rocm_upload();
|
||||
|
||||
// apply pressure correction to vector
|
||||
void apply_amg(const Scalar& y, Scalar& x);
|
||||
|
||||
// Apply the AMG preconditioner
|
||||
void amg_cycle_gpu(const int level, Scalar &y, Scalar &x);
|
||||
|
||||
public:
|
||||
|
||||
rocsparseCPR(int verbosity);
|
||||
|
||||
bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols);
|
||||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||
rocsparse_int *d_Arows,
|
||||
rocsparse_int *d_Acols);
|
||||
|
||||
|
||||
/// Analysis, extract parallelism if specified
|
||||
/// \param[in] mat matrix A
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat);
|
||||
|
||||
/// Analysis, extract parallelism if specified
|
||||
/// \param[in] mat matrix A
|
||||
/// \param[in] jacMat matrix for preconditioner, analyze this as well
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat,
|
||||
BlockedMatrix<Scalar> *jacMat);
|
||||
|
||||
/// Create AMG preconditioner and perform ILU decomposition
|
||||
/// \param[in] mat matrix A
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat);
|
||||
|
||||
/// Create AMG preconditioner and perform ILU decomposition
|
||||
/// \param[in] mat matrix A
|
||||
/// \param[in] jacMat matrix for preconditioner, decompose this one if used
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat,
|
||||
BlockedMatrix<Scalar> *jacMat);
|
||||
|
||||
/// Apply preconditioner, x = prec(y)
|
||||
/// applies blocked ilu0
|
||||
/// also applies amg for pressure component
|
||||
/// \param[in] y Input y vector
|
||||
/// \param[out] x Output x vector
|
||||
void apply(Scalar& y,
|
||||
Scalar& x) override;
|
||||
|
||||
#if HAVE_OPENCL
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y,
|
||||
cl::Buffer& x) {}
|
||||
#endif
|
||||
|
||||
/// Copy matrix A values to GPU
|
||||
/// \param[in] mVals Input values
|
||||
void copy_system_to_gpu(Scalar *b);
|
||||
|
||||
/// Reassign pointers, in case the addresses of the Dune variables have changed
|
||||
/// Reassign pointers, in case the addresses of the Dune variables have changed --> TODO: check when/if we need this method
|
||||
/// \param[in] vals array of nonzeroes, each block is stored row-wise and contiguous, contains nnz values
|
||||
/// \param[in] b input vector b, contains N values
|
||||
// void update_system(Scalar *vals, Scalar *b);
|
||||
@ -90,20 +134,6 @@ public:
|
||||
/// Update linear system to GPU
|
||||
/// \param[in] b input vector, contains N values
|
||||
void update_system_on_gpu(Scalar *b);
|
||||
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat);
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat);
|
||||
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat);
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat);
|
||||
|
||||
#if HAVE_OPENCL
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) {}
|
||||
#endif
|
||||
// applies blocked ilu0
|
||||
// also applies amg for pressure component
|
||||
void apply(Scalar& y, Scalar& x);
|
||||
};
|
||||
|
||||
} // namespace Opm
|
||||
|
@ -25,29 +25,19 @@
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparseMatrix.hpp>
|
||||
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
|
||||
#include <opm/simulators/linalg/bda/Matrix.hpp>
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
|
||||
// #include <opm/simulators/linalg/bda/opencl/opencl.hpp>
|
||||
|
||||
#define HIP_CHECK(STAT) \
|
||||
do { \
|
||||
const hipError_t stat = (STAT); \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::hip "; \
|
||||
oss << "error: " << hipGetErrorString(stat); \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
template<class Scalar>
|
||||
RocmMatrix<Scalar>::
|
||||
RocmMatrix(int Nb_, int Mb_, int nnzbs_, unsigned int block_size_)
|
||||
RocmMatrix(int Nb_,
|
||||
int Mb_,
|
||||
int nnzbs_,
|
||||
unsigned int block_size_)
|
||||
: Nb(Nb_),
|
||||
Mb(Mb_),
|
||||
nnzbs(nnzbs_),
|
||||
@ -62,7 +52,11 @@ RocmMatrix(int Nb_, int Mb_, int nnzbs_, unsigned int block_size_)
|
||||
|
||||
template <class Scalar>
|
||||
void RocmMatrix<Scalar>::
|
||||
upload(Scalar *vals, int *cols, int *rows, hipStream_t stream) {
|
||||
upload(Scalar *vals,
|
||||
int *cols,
|
||||
int *rows,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_CHECK(hipMemcpyAsync(nnzValues, vals, sizeof(Scalar) * block_size * block_size * nnzbs, hipMemcpyHostToDevice, stream));
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(colIndices, cols, sizeof(int) * nnzbs, hipMemcpyHostToDevice, stream));
|
||||
@ -72,7 +66,9 @@ upload(Scalar *vals, int *cols, int *rows, hipStream_t stream) {
|
||||
|
||||
template <class Scalar>
|
||||
void RocmMatrix<Scalar>::
|
||||
upload(Matrix<Scalar> *matrix, hipStream_t stream) {
|
||||
upload(Matrix<Scalar> *matrix,
|
||||
hipStream_t stream)
|
||||
{
|
||||
if (block_size != 1) {
|
||||
OPM_THROW(std::logic_error, "Error trying to upload a BlockedMatrix to RocmMatrix with different block_size");
|
||||
}
|
||||
@ -82,7 +78,9 @@ upload(Matrix<Scalar> *matrix, hipStream_t stream) {
|
||||
|
||||
template <class Scalar>
|
||||
void RocmMatrix<Scalar>::
|
||||
upload(BlockedMatrix<Scalar> *matrix, hipStream_t stream) {
|
||||
upload(BlockedMatrix<Scalar> *matrix,
|
||||
hipStream_t stream)
|
||||
{
|
||||
if (matrix->block_size != block_size) {
|
||||
OPM_THROW(std::logic_error, "Error trying to upload a BlockedMatrix to RocmMatrix with different block_size");
|
||||
}
|
||||
@ -99,7 +97,9 @@ RocmVector<Scalar>::RocmVector(int N)
|
||||
|
||||
template <class Scalar>
|
||||
void RocmVector<Scalar>::
|
||||
upload(Scalar *vals, hipStream_t stream) {
|
||||
upload(Scalar *vals,
|
||||
hipStream_t stream)
|
||||
{
|
||||
HIP_CHECK(hipMemcpyAsync(nnzValues, vals, sizeof(Scalar) * size, hipMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
|
@ -21,24 +21,29 @@
|
||||
#define OPM_ROCMMATRIX_HEADER_INCLUDED
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
template<class Scalar> class Matrix;
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
||||
/// This struct resembles a csr matrix, only doubles are supported
|
||||
/// The matrix data is stored in OpenCL Buffers
|
||||
/// This struct resembles a csr matrix
|
||||
template<class Scalar>
|
||||
class RocmMatrix {
|
||||
public:
|
||||
|
||||
RocmMatrix(int Nb_, int Mb_, int nnzbs_, unsigned int block_size_);
|
||||
|
||||
void upload(Scalar *vals, int *cols, int *rows, hipStream_t stream);
|
||||
void upload(Matrix<Scalar> *matrix, hipStream_t stream);
|
||||
void upload(BlockedMatrix<Scalar> *matrix, hipStream_t stream);
|
||||
void upload(Scalar *vals,
|
||||
int *cols,
|
||||
int *rows,
|
||||
hipStream_t stream);
|
||||
|
||||
void upload(Matrix<Scalar> *matrix,
|
||||
hipStream_t stream);
|
||||
|
||||
void upload(BlockedMatrix<Scalar> *matrix,
|
||||
hipStream_t stream);
|
||||
|
||||
Scalar* nnzValues;
|
||||
int* colIndices;
|
||||
@ -54,8 +59,11 @@ public:
|
||||
|
||||
RocmVector(int N);
|
||||
|
||||
void upload(Scalar *vals, hipStream_t stream);
|
||||
void upload(Matrix<Scalar> *matrix, hipStream_t stream);
|
||||
void upload(Scalar *vals,
|
||||
hipStream_t stream);
|
||||
|
||||
void upload(Matrix<Scalar> *matrix,
|
||||
hipStream_t stream);
|
||||
|
||||
Scalar* nnzValues;
|
||||
int size;
|
||||
|
@ -23,18 +23,20 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
|
||||
// #include <opm/simulators/linalg/bda/rocm/rocsparseCPR.hpp>
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparseCPR.hpp>
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
std::unique_ptr<rocsparsePreconditioner<Scalar,block_size> > rocsparsePreconditioner<Scalar,block_size>::
|
||||
create(PreconditionerType type, int verbosity) {
|
||||
create(PreconditionerType type,
|
||||
int verbosity)
|
||||
{
|
||||
if (type == PreconditionerType::BILU0) {
|
||||
return std::make_unique<Opm::Accelerator::rocsparseBILU0<Scalar, block_size> >(verbosity);
|
||||
// } else if (type == PreconditionerType::CPR) {
|
||||
// return std::make_unique<Opm::Accelerator::rocsparseCPR<block_size> >(verbosity);
|
||||
} else if (type == PreconditionerType::CPR) {
|
||||
return std::make_unique<Opm::Accelerator::rocsparseCPR<Scalar, block_size> >(verbosity);
|
||||
} else {
|
||||
OPM_THROW(std::logic_error, "Invalid PreconditionerType");
|
||||
}
|
||||
@ -42,14 +44,20 @@ create(PreconditionerType type, int verbosity) {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparsePreconditioner<Scalar, block_size>::
|
||||
set_matrix_analysis(rocsparse_mat_descr descr_L, rocsparse_mat_descr descr_U) {
|
||||
set_matrix_analysis(rocsparse_mat_descr descr_L,
|
||||
rocsparse_mat_descr descr_U)
|
||||
{
|
||||
descr_L = descr_L;
|
||||
descr_U = descr_U;
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparsePreconditioner<Scalar, block_size>::
|
||||
set_context(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation operation, hipStream_t stream) {
|
||||
set_context(rocsparse_handle handle,
|
||||
rocsparse_direction dir,
|
||||
rocsparse_operation operation,
|
||||
hipStream_t stream)
|
||||
{
|
||||
this->handle = handle;
|
||||
this->dir = dir;
|
||||
this->operation = operation;
|
||||
|
@ -52,7 +52,8 @@ public:
|
||||
|
||||
virtual ~rocsparsePreconditioner() = default;
|
||||
|
||||
static std::unique_ptr<rocsparsePreconditioner<Scalar, block_size>> create(PreconditionerType type, int verbosity);
|
||||
static std::unique_ptr<rocsparsePreconditioner<Scalar, block_size>> create(PreconditionerType type,
|
||||
int verbosity);
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
virtual void apply(double& y, double& x) = 0;
|
||||
@ -60,9 +61,14 @@ public:
|
||||
// create/update preconditioner, probably used every linear solve
|
||||
// the version with two params can be overloaded, if not, it will default to using the one param version
|
||||
virtual bool create_preconditioner(BlockedMatrix<Scalar> *mat) = 0;
|
||||
virtual bool create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) = 0;
|
||||
|
||||
virtual bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols) = 0;
|
||||
virtual bool create_preconditioner(BlockedMatrix<Scalar> *mat,
|
||||
BlockedMatrix<Scalar> *jacMat) = 0;
|
||||
|
||||
virtual bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||
rocsparse_int *d_Arows,
|
||||
rocsparse_int *d_Acols) = 0;
|
||||
|
||||
virtual void copy_system_to_gpu(double *b)=0;
|
||||
|
||||
@ -70,8 +76,14 @@ public:
|
||||
/// \param[in] b input vector, contains N values
|
||||
virtual void update_system_on_gpu(double *b)=0;
|
||||
|
||||
void set_matrix_analysis(rocsparse_mat_descr descr_L, rocsparse_mat_descr descr_U);
|
||||
void set_context(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation operation, hipStream_t stream);
|
||||
void set_matrix_analysis(rocsparse_mat_descr descr_L,
|
||||
rocsparse_mat_descr descr_U);
|
||||
|
||||
void set_context(rocsparse_handle handle,
|
||||
rocsparse_direction dir,
|
||||
rocsparse_operation operation,
|
||||
hipStream_t stream);
|
||||
|
||||
void setJacMat(BlockedMatrix<Scalar> jacMat);
|
||||
};
|
||||
} //namespace Opm
|
||||
|
@ -43,50 +43,13 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_version.h>
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
|
||||
#ifdef HIP_HAVE_CUDA_DEFINED
|
||||
#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED
|
||||
#undef HIP_HAVE_CUDA_DEFINED
|
||||
#endif
|
||||
|
||||
#define HIP_CHECK(STAT) \
|
||||
do { \
|
||||
const hipError_t stat = (STAT); \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::hip "; \
|
||||
oss << "error: " << hipGetErrorString(stat); \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCSPARSE_CHECK(STAT) \
|
||||
do { \
|
||||
const rocsparse_status stat = (STAT); \
|
||||
if(stat != rocsparse_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::rocsparse "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCBLAS_CHECK(STAT) \
|
||||
do { \
|
||||
const rocblas_status stat = (STAT); \
|
||||
if(stat != rocblas_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseSolverBackend::rocblas "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
@ -462,7 +425,7 @@ copy_system_to_gpu(Scalar *b)
|
||||
sizeof(Scalar) * nnz,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemsetAsync(d_x, 0, N * sizeof(Scalar), stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, N * sizeof(Scalar) * N,
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, N * sizeof(Scalar),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
|
||||
prec->copy_system_to_gpu(d_Avals);
|
||||
|
@ -40,17 +40,9 @@
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/MultisegmentWellContribution.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/Misc.hpp>
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
#define HIP_CHECK(stat) \
|
||||
{ \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
OPM_THROW(std::logic_error, "HIP error"); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user