mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
rocsparseSolverBackend: template Scalar type
This commit is contained in:
parent
e620d9d044
commit
3eed028978
@ -112,7 +112,9 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
|
||||
} else if (accelerator_mode.compare("rocsparse") == 0) {
|
||||
#if HAVE_ROCSPARSE
|
||||
use_gpu = true; // should be replaced by a 'use_bridge' boolean
|
||||
backend.reset(new Opm::Accelerator::rocsparseSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
|
||||
using ROCS = Accelerator::rocsparseSolverBackend<double,block_size>;
|
||||
backend = std::make_unique<ROCS>(linear_solver_verbosity, maxit,
|
||||
tolerance, platformID, deviceID);
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error rocsparseSolver was chosen, but rocsparse/rocblas was not found by CMake");
|
||||
#endif
|
||||
|
@ -93,24 +93,20 @@
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
#endif //HAVE_OPENMP
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Accelerator
|
||||
{
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
template <unsigned int block_size>
|
||||
rocsparseSolverBackend<block_size>::
|
||||
rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_,
|
||||
template<class Scalar, unsigned int block_size>
|
||||
rocsparseSolverBackend<Scalar,block_size>::
|
||||
rocsparseSolverBackend(int verbosity_, int maxit_, Scalar tolerance_,
|
||||
unsigned int platformID_, unsigned int deviceID_)
|
||||
: Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
|
||||
{
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (static_cast<int>(deviceID) >= numDevices) {
|
||||
OPM_THROW(std::runtime_error, "Error chosen too high HIP device ID");
|
||||
OPM_THROW(std::runtime_error, "Invalid HIP device ID");
|
||||
}
|
||||
HIP_CHECK(hipSetDevice(deviceID));
|
||||
|
||||
@ -130,46 +126,45 @@ rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_,
|
||||
ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream));
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
rocsparseSolverBackend<block_size>::~rocsparseSolverBackend() {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
rocsparseSolverBackend<Scalar,block_size>::~rocsparseSolverBackend()
|
||||
{
|
||||
hipError_t hipstatus = hipStreamSynchronize(stream);
|
||||
if(hipstatus != hipSuccess){
|
||||
if (hipstatus != hipSuccess) {
|
||||
OpmLog::error("Could not synchronize with hipStream");
|
||||
}
|
||||
hipstatus = hipStreamDestroy(stream);
|
||||
if(hipstatus != hipSuccess){
|
||||
if (hipstatus != hipSuccess) {
|
||||
OpmLog::error("Could not destroy hipStream");
|
||||
}
|
||||
rocsparse_status status1 = rocsparse_destroy_handle(handle);
|
||||
if(status1 != rocsparse_status_success){
|
||||
if (status1 != rocsparse_status_success) {
|
||||
OpmLog::error("Could not destroy rocsparse handle");
|
||||
}
|
||||
rocblas_status status2 = rocblas_destroy_handle(blas_handle);
|
||||
if(status2 != rocblas_status_success){
|
||||
if (status2 != rocblas_status_success) {
|
||||
OpmLog::error("Could not destroy rocblas handle");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::
|
||||
gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void rocsparseSolverBackend<Scalar,block_size>::
|
||||
gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
float it = 0.5;
|
||||
double rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2;
|
||||
double norm, norm_0;
|
||||
double zero = 0.0;
|
||||
double one = 1.0;
|
||||
double mone = -1.0;
|
||||
Scalar rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2;
|
||||
Scalar norm, norm_0;
|
||||
Scalar zero = 0.0;
|
||||
Scalar one = 1.0;
|
||||
Scalar mone = -1.0;
|
||||
|
||||
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
||||
|
||||
// set stream here, the WellContributions object is destroyed every linear solve
|
||||
// the number of wells can change every linear solve
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
static_cast<WellContributionsRocsparse<double>&>(wellContribs).setStream(stream);
|
||||
static_cast<WellContributionsRocsparse<Scalar>&>(wellContribs).setStream(stream);
|
||||
}
|
||||
|
||||
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|
||||
@ -259,7 +254,7 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
static_cast<WellContributionsRocsparse<double>&>(wellContribs).apply(d_pw, d_v);
|
||||
static_cast<WellContributionsRocsparse<Scalar>&>(wellContribs).apply(d_pw, d_v);
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
@ -325,7 +320,7 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
||||
|
||||
// apply wellContributions
|
||||
if (wellContribs.getNumWells() > 0) {
|
||||
static_cast<WellContributionsRocsparse<double>&>(wellContribs).apply(d_s, d_t);
|
||||
static_cast<WellContributionsRocsparse<Scalar>&>(wellContribs).apply(d_s, d_t);
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
@ -365,8 +360,11 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
||||
|
||||
if (verbosity >= 1) {
|
||||
std::ostringstream out;
|
||||
out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \
|
||||
", time per iteration: " << res.elapsed / it << ", iterations: " << it;
|
||||
out << "=== converged: " << res.converged
|
||||
<< ", conv_rate: " << res.conv_rate
|
||||
<< ", time: " << res.elapsed << \
|
||||
", time per iteration: " << res.elapsed / it
|
||||
<< ", iterations: " << it;
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
@ -380,10 +378,10 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void rocsparseSolverBackend<Scalar,block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
@ -397,12 +395,14 @@ initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
}
|
||||
|
||||
std::ostringstream out;
|
||||
out << "Initializing GPU, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << "\n";
|
||||
out << "Initializing GPU, matrix size: "
|
||||
<< Nb << " blockrows, nnzb: " << nnzb << "\n";
|
||||
if (useJacMatrix) {
|
||||
out << "Blocks in ILU matrix: " << jacMatrix->nnzbs << "\n";
|
||||
}
|
||||
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
|
||||
out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
|
||||
out << "Maxit: " << maxit
|
||||
<< std::scientific << ", tolerance: " << tolerance << "\n"
|
||||
<< "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
|
||||
OpmLog::info(out.str());
|
||||
out.str("");
|
||||
out.clear();
|
||||
@ -410,26 +410,26 @@ initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
mat = matrix;
|
||||
jacMat = jacMatrix;
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_r, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_p, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_s, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_v, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_r, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_p, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_s, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_v, sizeof(Scalar) * N));
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_Arows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Acols, sizeof(rocsparse_int) * nnzb));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(double) * nnz));
|
||||
HIP_CHECK(hipMalloc((void**)&d_x, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_b, sizeof(double) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(Scalar) * nnz));
|
||||
HIP_CHECK(hipMalloc((void**)&d_x, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_b, sizeof(Scalar) * N));
|
||||
|
||||
if (useJacMatrix) {
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * nnzbs_prec));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size));
|
||||
} else { // preconditioner matrix is same
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size));
|
||||
d_Mcols = d_Acols;
|
||||
d_Mrows = d_Arows;
|
||||
}
|
||||
@ -437,26 +437,43 @@ initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
initialized = true;
|
||||
} // end initialize()
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::copy_system_to_gpu(double *b) {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void rocsparseSolverBackend<Scalar,block_size>::
|
||||
copy_system_to_gpu(Scalar *b)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices, sizeof(rocsparse_int) * nnzb, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers,
|
||||
sizeof(rocsparse_int) * (Nb + 1),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices,
|
||||
sizeof(rocsparse_int) * nnzb,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues,
|
||||
sizeof(Scalar) * nnz,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemsetAsync(d_x, 0, N * sizeof(Scalar), stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, N * sizeof(Scalar) * N,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
|
||||
if (useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if(omp_get_max_threads() > 1)
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, sizeof(rocsparse_int) * nnzbs_prec, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers,
|
||||
sizeof(rocsparse_int) * (Nb + 1),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices,
|
||||
sizeof(rocsparse_int) * nnzbs_prec,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues,
|
||||
sizeof(Scalar) * nnzbs_prec * block_size * block_size,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals,
|
||||
sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
@ -471,24 +488,31 @@ void rocsparseSolverBackend<block_size>::copy_system_to_gpu(double *b) {
|
||||
} // end copy_system_to_gpu()
|
||||
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::update_system_on_gpu(double *b) {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void rocsparseSolverBackend<Scalar,block_size>::
|
||||
update_system_on_gpu(Scalar* b)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(Scalar) * nnz,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemsetAsync(d_x, 0, N * sizeof(Scalar), stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, N* sizeof(Scalar),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
|
||||
if (useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1)
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues,
|
||||
sizeof(Scalar) * nnzbs_prec * block_size * block_size,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals,
|
||||
sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
@ -500,8 +524,10 @@ void rocsparseSolverBackend<block_size>::update_system_on_gpu(double *b) {
|
||||
}
|
||||
} // end update_system_on_gpu()
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool rocsparseSolverBackend<block_size>::analyze_matrix() {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool rocsparseSolverBackend<Scalar,block_size>::
|
||||
analyze_matrix()
|
||||
{
|
||||
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||
Timer t;
|
||||
|
||||
@ -530,7 +556,8 @@ bool rocsparseSolverBackend<block_size>::analyze_matrix() {
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec,
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
||||
|
||||
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
||||
d_bufferSize = std::max(d_bufferSize_M,
|
||||
std::max(d_bufferSize_L, d_bufferSize_U));
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
||||
|
||||
@ -578,9 +605,10 @@ bool rocsparseSolverBackend<block_size>::analyze_matrix() {
|
||||
return true;
|
||||
} // end analyze_matrix()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
bool rocsparseSolverBackend<block_size>::create_preconditioner() {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
bool rocsparseSolverBackend<Scalar,block_size>::
|
||||
create_preconditioner()
|
||||
{
|
||||
Timer t;
|
||||
|
||||
bool result = true;
|
||||
@ -605,10 +633,9 @@ bool rocsparseSolverBackend<block_size>::create_preconditioner() {
|
||||
return result;
|
||||
} // end create_preconditioner()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::
|
||||
solve_system(WellContributions<double>& wellContribs, BdaResult& res)
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void rocsparseSolverBackend<Scalar,block_size>::
|
||||
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
@ -621,17 +648,18 @@ solve_system(WellContributions<double>& wellContribs, BdaResult& res)
|
||||
out << "rocsparseSolver::solve_system(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
} // end solve_system()
|
||||
|
||||
|
||||
// copy result to host memory
|
||||
// caller must be sure that x is a valid array
|
||||
template <unsigned int block_size>
|
||||
void rocsparseSolverBackend<block_size>::get_result(double *x) {
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void rocsparseSolverBackend<Scalar,block_size>::
|
||||
get_result(Scalar* x)
|
||||
{
|
||||
Timer t;
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(double) * N, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(Scalar) * N,
|
||||
hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream)); // always wait, caller might want to use x immediately
|
||||
|
||||
if (verbosity >= 3) {
|
||||
@ -641,13 +669,12 @@ void rocsparseSolverBackend<block_size>::get_result(double *x) {
|
||||
}
|
||||
} // end get_result()
|
||||
|
||||
|
||||
template <unsigned int block_size>
|
||||
SolverStatus rocsparseSolverBackend<block_size>::
|
||||
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double *b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions<double>& wellContribs,
|
||||
template<class Scalar, unsigned int block_size>
|
||||
SolverStatus rocsparseSolverBackend<Scalar,block_size>::
|
||||
solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
Scalar* b,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||
WellContributions<Scalar>& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
if (initialized == false) {
|
||||
@ -672,19 +699,14 @@ SolverStatus rocsparseSolverBackend<block_size>::
|
||||
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||
}
|
||||
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class rocsparseSolverBackend<T,1>; \
|
||||
template class rocsparseSolverBackend<T,2>; \
|
||||
template class rocsparseSolverBackend<T,3>; \
|
||||
template class rocsparseSolverBackend<T,4>; \
|
||||
template class rocsparseSolverBackend<T,5>; \
|
||||
template class rocsparseSolverBackend<T,6>;
|
||||
|
||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
template rocsparseSolverBackend<n>::rocsparseSolverBackend( \
|
||||
int, int, double, unsigned int, unsigned int);
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
INSTANTIATE_BDA_FUNCTIONS(5);
|
||||
INSTANTIATE_BDA_FUNCTIONS(6);
|
||||
|
||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
||||
} // namespace Opm::Accelerator
|
||||
|
@ -31,16 +31,13 @@
|
||||
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
namespace Accelerator
|
||||
{
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
/// This class implements a rocsparse-based ilu0-bicgstab solver on GPU
|
||||
template <unsigned int block_size>
|
||||
class rocsparseSolverBackend : public BdaSolver<double,block_size>
|
||||
template<class Scalar, unsigned int block_size>
|
||||
class rocsparseSolverBackend : public BdaSolver<Scalar,block_size>
|
||||
{
|
||||
using Base = BdaSolver<double,block_size>;
|
||||
using Base = BdaSolver<Scalar,block_size>;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
@ -59,8 +56,8 @@ private:
|
||||
bool useJacMatrix = false;
|
||||
|
||||
bool analysis_done = false;
|
||||
std::shared_ptr<BlockedMatrix<double>> mat{}; // original matrix
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMat{}; // matrix for preconditioner
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> mat{}; // original matrix
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMat{}; // matrix for preconditioner
|
||||
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
|
||||
|
||||
rocsparse_direction dir = rocsparse_direction_row;
|
||||
@ -76,32 +73,31 @@ private:
|
||||
|
||||
rocsparse_int *d_Arows, *d_Mrows;
|
||||
rocsparse_int *d_Acols, *d_Mcols;
|
||||
double *d_Avals, *d_Mvals;
|
||||
double *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve
|
||||
double *d_pw, *d_s, *d_t, *d_v;
|
||||
Scalar *d_Avals, *d_Mvals;
|
||||
Scalar *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve
|
||||
Scalar *d_pw, *d_s, *d_t, *d_v;
|
||||
void *d_buffer; // buffer space, used by rocsparse ilu0 analysis
|
||||
int ver;
|
||||
char rev[64];
|
||||
|
||||
|
||||
/// Solve linear system using ilu0-bicgstab
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
void gpu_pbicgstab(WellContributions<double>& wellContribs, BdaResult& res);
|
||||
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
|
||||
|
||||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
|
||||
void initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix);
|
||||
|
||||
/// Copy linear system to GPU
|
||||
/// \param[in] b input vector, contains N values
|
||||
void copy_system_to_gpu(double *b);
|
||||
void copy_system_to_gpu(Scalar* b);
|
||||
|
||||
/// Update linear system to GPU
|
||||
/// \param[in] b input vector, contains N values
|
||||
void update_system_on_gpu(double *b);
|
||||
void update_system_on_gpu(Scalar* b);
|
||||
|
||||
/// Analyze sparsity pattern to extract parallelism
|
||||
/// \return true iff analysis was successful
|
||||
@ -114,16 +110,20 @@ private:
|
||||
/// Solve linear system
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
void solve_system(WellContributions<double>& wellContribs, BdaResult& res);
|
||||
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res);
|
||||
|
||||
public:
|
||||
/// Construct a openclSolver
|
||||
/// \param[in] linear_solver_verbosity verbosity of openclSolver
|
||||
/// \param[in] maxit maximum number of iterations for openclSolver
|
||||
/// \param[in] tolerance required relative tolerance for openclSolver
|
||||
/// Construct a rocsparseSolver
|
||||
/// \param[in] linear_solver_verbosity verbosity of rocsparseSolver
|
||||
/// \param[in] maxit maximum number of iterations for rocsparseSolver
|
||||
/// \param[in] tolerance required relative tolerance for rocsparseSolver
|
||||
/// \param[in] platformID the OpenCL platform to be used
|
||||
/// \param[in] deviceID the device to be used
|
||||
rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
|
||||
rocsparseSolverBackend(int linear_solver_verbosity,
|
||||
int maxit,
|
||||
Scalar tolerance,
|
||||
unsigned int platformID,
|
||||
unsigned int deviceID);
|
||||
|
||||
/// For the CPR coarse solver
|
||||
// rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);
|
||||
@ -138,10 +138,10 @@ public:
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
/// \return status code
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
||||
double* b,
|
||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
||||
WellContributions<double>& wellContribs,
|
||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
Scalar* b,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||
WellContributions<Scalar>& wellContribs,
|
||||
BdaResult& res) override;
|
||||
|
||||
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
||||
@ -150,13 +150,10 @@ public:
|
||||
|
||||
/// Get result after linear solve, and peform postprocessing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
void get_result(double *x) override;
|
||||
void get_result(Scalar* x) override;
|
||||
|
||||
}; // end class rocsparseSolverBackend
|
||||
|
||||
} // namespace Accelerator
|
||||
} // namespace Opm
|
||||
} // namespace Opm::Accelerator
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user