mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
rocsparseSolverBackend: template Scalar type
This commit is contained in:
parent
e620d9d044
commit
3eed028978
@ -112,7 +112,9 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
|
|||||||
} else if (accelerator_mode.compare("rocsparse") == 0) {
|
} else if (accelerator_mode.compare("rocsparse") == 0) {
|
||||||
#if HAVE_ROCSPARSE
|
#if HAVE_ROCSPARSE
|
||||||
use_gpu = true; // should be replaced by a 'use_bridge' boolean
|
use_gpu = true; // should be replaced by a 'use_bridge' boolean
|
||||||
backend.reset(new Opm::Accelerator::rocsparseSolverBackend<block_size>(linear_solver_verbosity, maxit, tolerance, platformID, deviceID));
|
using ROCS = Accelerator::rocsparseSolverBackend<double,block_size>;
|
||||||
|
backend = std::make_unique<ROCS>(linear_solver_verbosity, maxit,
|
||||||
|
tolerance, platformID, deviceID);
|
||||||
#else
|
#else
|
||||||
OPM_THROW(std::logic_error, "Error rocsparseSolver was chosen, but rocsparse/rocblas was not found by CMake");
|
OPM_THROW(std::logic_error, "Error rocsparseSolver was chosen, but rocsparse/rocblas was not found by CMake");
|
||||||
#endif
|
#endif
|
||||||
|
@ -93,24 +93,20 @@
|
|||||||
extern std::shared_ptr<std::thread> copyThread;
|
extern std::shared_ptr<std::thread> copyThread;
|
||||||
#endif //HAVE_OPENMP
|
#endif //HAVE_OPENMP
|
||||||
|
|
||||||
namespace Opm
|
namespace Opm::Accelerator {
|
||||||
{
|
|
||||||
namespace Accelerator
|
|
||||||
{
|
|
||||||
|
|
||||||
using Opm::OpmLog;
|
|
||||||
using Dune::Timer;
|
using Dune::Timer;
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template<class Scalar, unsigned int block_size>
|
||||||
rocsparseSolverBackend<block_size>::
|
rocsparseSolverBackend<Scalar,block_size>::
|
||||||
rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_,
|
rocsparseSolverBackend(int verbosity_, int maxit_, Scalar tolerance_,
|
||||||
unsigned int platformID_, unsigned int deviceID_)
|
unsigned int platformID_, unsigned int deviceID_)
|
||||||
: Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
|
: Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
|
||||||
{
|
{
|
||||||
int numDevices = 0;
|
int numDevices = 0;
|
||||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||||
if (static_cast<int>(deviceID) >= numDevices) {
|
if (static_cast<int>(deviceID) >= numDevices) {
|
||||||
OPM_THROW(std::runtime_error, "Error chosen too high HIP device ID");
|
OPM_THROW(std::runtime_error, "Invalid HIP device ID");
|
||||||
}
|
}
|
||||||
HIP_CHECK(hipSetDevice(deviceID));
|
HIP_CHECK(hipSetDevice(deviceID));
|
||||||
|
|
||||||
@ -130,46 +126,45 @@ rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_,
|
|||||||
ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream));
|
ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Scalar, unsigned int block_size>
|
||||||
template <unsigned int block_size>
|
rocsparseSolverBackend<Scalar,block_size>::~rocsparseSolverBackend()
|
||||||
rocsparseSolverBackend<block_size>::~rocsparseSolverBackend() {
|
{
|
||||||
hipError_t hipstatus = hipStreamSynchronize(stream);
|
hipError_t hipstatus = hipStreamSynchronize(stream);
|
||||||
if(hipstatus != hipSuccess){
|
if (hipstatus != hipSuccess) {
|
||||||
OpmLog::error("Could not synchronize with hipStream");
|
OpmLog::error("Could not synchronize with hipStream");
|
||||||
}
|
}
|
||||||
hipstatus = hipStreamDestroy(stream);
|
hipstatus = hipStreamDestroy(stream);
|
||||||
if(hipstatus != hipSuccess){
|
if (hipstatus != hipSuccess) {
|
||||||
OpmLog::error("Could not destroy hipStream");
|
OpmLog::error("Could not destroy hipStream");
|
||||||
}
|
}
|
||||||
rocsparse_status status1 = rocsparse_destroy_handle(handle);
|
rocsparse_status status1 = rocsparse_destroy_handle(handle);
|
||||||
if(status1 != rocsparse_status_success){
|
if (status1 != rocsparse_status_success) {
|
||||||
OpmLog::error("Could not destroy rocsparse handle");
|
OpmLog::error("Could not destroy rocsparse handle");
|
||||||
}
|
}
|
||||||
rocblas_status status2 = rocblas_destroy_handle(blas_handle);
|
rocblas_status status2 = rocblas_destroy_handle(blas_handle);
|
||||||
if(status2 != rocblas_status_success){
|
if (status2 != rocblas_status_success) {
|
||||||
OpmLog::error("Could not destroy rocblas handle");
|
OpmLog::error("Could not destroy rocblas handle");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<class Scalar, unsigned int block_size>
|
||||||
template <unsigned int block_size>
|
void rocsparseSolverBackend<Scalar,block_size>::
|
||||||
void rocsparseSolverBackend<block_size>::
|
gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||||
gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
|
||||||
BdaResult& res)
|
BdaResult& res)
|
||||||
{
|
{
|
||||||
float it = 0.5;
|
float it = 0.5;
|
||||||
double rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2;
|
Scalar rho, rhop, beta, alpha, nalpha, omega, nomega, tmp1, tmp2;
|
||||||
double norm, norm_0;
|
Scalar norm, norm_0;
|
||||||
double zero = 0.0;
|
Scalar zero = 0.0;
|
||||||
double one = 1.0;
|
Scalar one = 1.0;
|
||||||
double mone = -1.0;
|
Scalar mone = -1.0;
|
||||||
|
|
||||||
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
||||||
|
|
||||||
// set stream here, the WellContributions object is destroyed every linear solve
|
// set stream here, the WellContributions object is destroyed every linear solve
|
||||||
// the number of wells can change every linear solve
|
// the number of wells can change every linear solve
|
||||||
if (wellContribs.getNumWells() > 0) {
|
if (wellContribs.getNumWells() > 0) {
|
||||||
static_cast<WellContributionsRocsparse<double>&>(wellContribs).setStream(stream);
|
static_cast<WellContributionsRocsparse<Scalar>&>(wellContribs).setStream(stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|
||||||
@ -259,7 +254,7 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
|||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
if (wellContribs.getNumWells() > 0) {
|
if (wellContribs.getNumWells() > 0) {
|
||||||
static_cast<WellContributionsRocsparse<double>&>(wellContribs).apply(d_pw, d_v);
|
static_cast<WellContributionsRocsparse<Scalar>&>(wellContribs).apply(d_pw, d_v);
|
||||||
}
|
}
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
HIP_CHECK(hipStreamSynchronize(stream));
|
HIP_CHECK(hipStreamSynchronize(stream));
|
||||||
@ -325,7 +320,7 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
|||||||
|
|
||||||
// apply wellContributions
|
// apply wellContributions
|
||||||
if (wellContribs.getNumWells() > 0) {
|
if (wellContribs.getNumWells() > 0) {
|
||||||
static_cast<WellContributionsRocsparse<double>&>(wellContribs).apply(d_s, d_t);
|
static_cast<WellContributionsRocsparse<Scalar>&>(wellContribs).apply(d_s, d_t);
|
||||||
}
|
}
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
HIP_CHECK(hipStreamSynchronize(stream));
|
HIP_CHECK(hipStreamSynchronize(stream));
|
||||||
@ -365,8 +360,11 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
|||||||
|
|
||||||
if (verbosity >= 1) {
|
if (verbosity >= 1) {
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "=== converged: " << res.converged << ", conv_rate: " << res.conv_rate << ", time: " << res.elapsed << \
|
out << "=== converged: " << res.converged
|
||||||
", time per iteration: " << res.elapsed / it << ", iterations: " << it;
|
<< ", conv_rate: " << res.conv_rate
|
||||||
|
<< ", time: " << res.elapsed << \
|
||||||
|
", time per iteration: " << res.elapsed / it
|
||||||
|
<< ", iterations: " << it;
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
@ -380,10 +378,10 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<double>& wellContribs,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template<class Scalar, unsigned int block_size>
|
||||||
void rocsparseSolverBackend<block_size>::
|
void rocsparseSolverBackend<Scalar,block_size>::
|
||||||
initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix)
|
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix)
|
||||||
{
|
{
|
||||||
this->Nb = matrix->Nb;
|
this->Nb = matrix->Nb;
|
||||||
this->N = Nb * block_size;
|
this->N = Nb * block_size;
|
||||||
@ -397,12 +395,14 @@ initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::ostringstream out;
|
std::ostringstream out;
|
||||||
out << "Initializing GPU, matrix size: " << Nb << " blockrows, nnzb: " << nnzb << "\n";
|
out << "Initializing GPU, matrix size: "
|
||||||
|
<< Nb << " blockrows, nnzb: " << nnzb << "\n";
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
out << "Blocks in ILU matrix: " << jacMatrix->nnzbs << "\n";
|
out << "Blocks in ILU matrix: " << jacMatrix->nnzbs << "\n";
|
||||||
}
|
}
|
||||||
out << "Maxit: " << maxit << std::scientific << ", tolerance: " << tolerance << "\n";
|
out << "Maxit: " << maxit
|
||||||
out << "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
|
<< std::scientific << ", tolerance: " << tolerance << "\n"
|
||||||
|
<< "PlatformID: " << platformID << ", deviceID: " << deviceID << "\n";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
out.str("");
|
out.str("");
|
||||||
out.clear();
|
out.clear();
|
||||||
@ -410,26 +410,26 @@ initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
|||||||
mat = matrix;
|
mat = matrix;
|
||||||
jacMat = jacMatrix;
|
jacMat = jacMatrix;
|
||||||
|
|
||||||
HIP_CHECK(hipMalloc((void**)&d_r, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_r, sizeof(Scalar) * N));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(Scalar) * N));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_p, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_p, sizeof(Scalar) * N));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_pw, sizeof(Scalar) * N));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_s, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_s, sizeof(Scalar) * N));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(Scalar) * N));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_v, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_v, sizeof(Scalar) * N));
|
||||||
|
|
||||||
HIP_CHECK(hipMalloc((void**)&d_Arows, sizeof(rocsparse_int) * (Nb + 1)));
|
HIP_CHECK(hipMalloc((void**)&d_Arows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_Acols, sizeof(rocsparse_int) * nnzb));
|
HIP_CHECK(hipMalloc((void**)&d_Acols, sizeof(rocsparse_int) * nnzb));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(double) * nnz));
|
HIP_CHECK(hipMalloc((void**)&d_Avals, sizeof(Scalar) * nnz));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_x, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_x, sizeof(Scalar) * N));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_b, sizeof(double) * N));
|
HIP_CHECK(hipMalloc((void**)&d_b, sizeof(Scalar) * N));
|
||||||
|
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * nnzbs_prec));
|
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * nnzbs_prec));
|
||||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size));
|
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size));
|
||||||
} else { // preconditioner matrix is same
|
} else { // preconditioner matrix is same
|
||||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * nnzbs_prec * block_size * block_size));
|
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size));
|
||||||
d_Mcols = d_Acols;
|
d_Mcols = d_Acols;
|
||||||
d_Mrows = d_Arows;
|
d_Mrows = d_Arows;
|
||||||
}
|
}
|
||||||
@ -437,26 +437,43 @@ initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
|||||||
initialized = true;
|
initialized = true;
|
||||||
} // end initialize()
|
} // end initialize()
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template<class Scalar, unsigned int block_size>
|
||||||
void rocsparseSolverBackend<block_size>::copy_system_to_gpu(double *b) {
|
void rocsparseSolverBackend<Scalar,block_size>::
|
||||||
|
copy_system_to_gpu(Scalar *b)
|
||||||
|
{
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Arows, mat->rowPointers,
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices, sizeof(rocsparse_int) * nnzb, hipMemcpyHostToDevice, stream));
|
sizeof(rocsparse_int) * (Nb + 1),
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream));
|
hipMemcpyHostToDevice, stream));
|
||||||
HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Acols, mat->colIndices,
|
||||||
HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream));
|
sizeof(rocsparse_int) * nnzb,
|
||||||
|
hipMemcpyHostToDevice, stream));
|
||||||
|
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues,
|
||||||
|
sizeof(Scalar) * nnz,
|
||||||
|
hipMemcpyHostToDevice, stream));
|
||||||
|
HIP_CHECK(hipMemsetAsync(d_x, 0, N * sizeof(Scalar), stream));
|
||||||
|
HIP_CHECK(hipMemcpyAsync(d_b, b, N * sizeof(Scalar) * N,
|
||||||
|
hipMemcpyHostToDevice, stream));
|
||||||
|
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
if(omp_get_max_threads() > 1)
|
if (omp_get_max_threads() > 1) {
|
||||||
copyThread->join();
|
copyThread->join();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers,
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, sizeof(rocsparse_int) * nnzbs_prec, hipMemcpyHostToDevice, stream));
|
sizeof(rocsparse_int) * (Nb + 1),
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
hipMemcpyHostToDevice, stream));
|
||||||
|
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices,
|
||||||
|
sizeof(rocsparse_int) * nnzbs_prec,
|
||||||
|
hipMemcpyHostToDevice, stream));
|
||||||
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues,
|
||||||
|
sizeof(Scalar) * nnzbs_prec * block_size * block_size,
|
||||||
|
hipMemcpyHostToDevice, stream));
|
||||||
} else {
|
} else {
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals,
|
||||||
|
sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
@ -471,24 +488,31 @@ void rocsparseSolverBackend<block_size>::copy_system_to_gpu(double *b) {
|
|||||||
} // end copy_system_to_gpu()
|
} // end copy_system_to_gpu()
|
||||||
|
|
||||||
// don't copy rowpointers and colindices, they stay the same
|
// don't copy rowpointers and colindices, they stay the same
|
||||||
template <unsigned int block_size>
|
template<class Scalar, unsigned int block_size>
|
||||||
void rocsparseSolverBackend<block_size>::update_system_on_gpu(double *b) {
|
void rocsparseSolverBackend<Scalar,block_size>::
|
||||||
|
update_system_on_gpu(Scalar* b)
|
||||||
|
{
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(double) * nnz, hipMemcpyHostToDevice, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Avals, mat->nnzValues, sizeof(Scalar) * nnz,
|
||||||
HIP_CHECK(hipMemsetAsync(d_x, 0, sizeof(double) * N, stream));
|
hipMemcpyHostToDevice, stream));
|
||||||
HIP_CHECK(hipMemcpyAsync(d_b, b, sizeof(double) * N, hipMemcpyHostToDevice, stream));
|
HIP_CHECK(hipMemsetAsync(d_x, 0, N * sizeof(Scalar), stream));
|
||||||
|
HIP_CHECK(hipMemcpyAsync(d_b, b, N* sizeof(Scalar),
|
||||||
|
hipMemcpyHostToDevice, stream));
|
||||||
|
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
if (omp_get_max_threads() > 1)
|
if (omp_get_max_threads() > 1) {
|
||||||
copyThread->join();
|
copyThread->join();
|
||||||
#endif
|
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
|
||||||
} else {
|
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, stream));
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues,
|
||||||
|
sizeof(Scalar) * nnzbs_prec * block_size * block_size,
|
||||||
|
hipMemcpyHostToDevice, stream));
|
||||||
|
} else {
|
||||||
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals,
|
||||||
|
sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||||
|
}
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
HIP_CHECK(hipStreamSynchronize(stream));
|
HIP_CHECK(hipStreamSynchronize(stream));
|
||||||
|
|
||||||
@ -500,8 +524,10 @@ void rocsparseSolverBackend<block_size>::update_system_on_gpu(double *b) {
|
|||||||
}
|
}
|
||||||
} // end update_system_on_gpu()
|
} // end update_system_on_gpu()
|
||||||
|
|
||||||
template <unsigned int block_size>
|
template<class Scalar, unsigned int block_size>
|
||||||
bool rocsparseSolverBackend<block_size>::analyze_matrix() {
|
bool rocsparseSolverBackend<Scalar,block_size>::
|
||||||
|
analyze_matrix()
|
||||||
|
{
|
||||||
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
@ -530,7 +556,8 @@ bool rocsparseSolverBackend<block_size>::analyze_matrix() {
|
|||||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec,
|
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec,
|
||||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
||||||
|
|
||||||
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
d_bufferSize = std::max(d_bufferSize_M,
|
||||||
|
std::max(d_bufferSize_L, d_bufferSize_U));
|
||||||
|
|
||||||
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
||||||
|
|
||||||
@ -578,9 +605,10 @@ bool rocsparseSolverBackend<block_size>::analyze_matrix() {
|
|||||||
return true;
|
return true;
|
||||||
} // end analyze_matrix()
|
} // end analyze_matrix()
|
||||||
|
|
||||||
|
template<class Scalar, unsigned int block_size>
|
||||||
template <unsigned int block_size>
|
bool rocsparseSolverBackend<Scalar,block_size>::
|
||||||
bool rocsparseSolverBackend<block_size>::create_preconditioner() {
|
create_preconditioner()
|
||||||
|
{
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
bool result = true;
|
bool result = true;
|
||||||
@ -605,10 +633,9 @@ bool rocsparseSolverBackend<block_size>::create_preconditioner() {
|
|||||||
return result;
|
return result;
|
||||||
} // end create_preconditioner()
|
} // end create_preconditioner()
|
||||||
|
|
||||||
|
template<class Scalar, unsigned int block_size>
|
||||||
template <unsigned int block_size>
|
void rocsparseSolverBackend<Scalar,block_size>::
|
||||||
void rocsparseSolverBackend<block_size>::
|
solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res)
|
||||||
solve_system(WellContributions<double>& wellContribs, BdaResult& res)
|
|
||||||
{
|
{
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
@ -621,17 +648,18 @@ solve_system(WellContributions<double>& wellContribs, BdaResult& res)
|
|||||||
out << "rocsparseSolver::solve_system(): " << t.stop() << " s";
|
out << "rocsparseSolver::solve_system(): " << t.stop() << " s";
|
||||||
OpmLog::info(out.str());
|
OpmLog::info(out.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // end solve_system()
|
} // end solve_system()
|
||||||
|
|
||||||
|
|
||||||
// copy result to host memory
|
// copy result to host memory
|
||||||
// caller must be sure that x is a valid array
|
// caller must be sure that x is a valid array
|
||||||
template <unsigned int block_size>
|
template<class Scalar, unsigned int block_size>
|
||||||
void rocsparseSolverBackend<block_size>::get_result(double *x) {
|
void rocsparseSolverBackend<Scalar,block_size>::
|
||||||
|
get_result(Scalar* x)
|
||||||
|
{
|
||||||
Timer t;
|
Timer t;
|
||||||
|
|
||||||
HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(double) * N, hipMemcpyDeviceToHost, stream));
|
HIP_CHECK(hipMemcpyAsync(x, d_x, sizeof(Scalar) * N,
|
||||||
|
hipMemcpyDeviceToHost, stream));
|
||||||
HIP_CHECK(hipStreamSynchronize(stream)); // always wait, caller might want to use x immediately
|
HIP_CHECK(hipStreamSynchronize(stream)); // always wait, caller might want to use x immediately
|
||||||
|
|
||||||
if (verbosity >= 3) {
|
if (verbosity >= 3) {
|
||||||
@ -641,13 +669,12 @@ void rocsparseSolverBackend<block_size>::get_result(double *x) {
|
|||||||
}
|
}
|
||||||
} // end get_result()
|
} // end get_result()
|
||||||
|
|
||||||
|
template<class Scalar, unsigned int block_size>
|
||||||
template <unsigned int block_size>
|
SolverStatus rocsparseSolverBackend<Scalar,block_size>::
|
||||||
SolverStatus rocsparseSolverBackend<block_size>::
|
solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||||
solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
Scalar* b,
|
||||||
double *b,
|
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
WellContributions<Scalar>& wellContribs,
|
||||||
WellContributions<double>& wellContribs,
|
|
||||||
BdaResult& res)
|
BdaResult& res)
|
||||||
{
|
{
|
||||||
if (initialized == false) {
|
if (initialized == false) {
|
||||||
@ -672,19 +699,14 @@ SolverStatus rocsparseSolverBackend<block_size>::
|
|||||||
return SolverStatus::BDA_SOLVER_SUCCESS;
|
return SolverStatus::BDA_SOLVER_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define INSTANTIATE_TYPE(T) \
|
||||||
|
template class rocsparseSolverBackend<T,1>; \
|
||||||
|
template class rocsparseSolverBackend<T,2>; \
|
||||||
|
template class rocsparseSolverBackend<T,3>; \
|
||||||
|
template class rocsparseSolverBackend<T,4>; \
|
||||||
|
template class rocsparseSolverBackend<T,5>; \
|
||||||
|
template class rocsparseSolverBackend<T,6>;
|
||||||
|
|
||||||
#define INSTANTIATE_BDA_FUNCTIONS(n) \
|
INSTANTIATE_TYPE(double)
|
||||||
template rocsparseSolverBackend<n>::rocsparseSolverBackend( \
|
|
||||||
int, int, double, unsigned int, unsigned int);
|
|
||||||
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(1);
|
} // namespace Opm::Accelerator
|
||||||
INSTANTIATE_BDA_FUNCTIONS(2);
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(3);
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(4);
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(5);
|
|
||||||
INSTANTIATE_BDA_FUNCTIONS(6);
|
|
||||||
|
|
||||||
#undef INSTANTIATE_BDA_FUNCTIONS
|
|
||||||
|
|
||||||
} // namespace Accelerator
|
|
||||||
} // namespace Opm
|
|
||||||
|
@ -31,16 +31,13 @@
|
|||||||
|
|
||||||
#include <hip/hip_version.h>
|
#include <hip/hip_version.h>
|
||||||
|
|
||||||
namespace Opm
|
namespace Opm::Accelerator {
|
||||||
{
|
|
||||||
namespace Accelerator
|
|
||||||
{
|
|
||||||
|
|
||||||
/// This class implements a rocsparse-based ilu0-bicgstab solver on GPU
|
/// This class implements a rocsparse-based ilu0-bicgstab solver on GPU
|
||||||
template <unsigned int block_size>
|
template<class Scalar, unsigned int block_size>
|
||||||
class rocsparseSolverBackend : public BdaSolver<double,block_size>
|
class rocsparseSolverBackend : public BdaSolver<Scalar,block_size>
|
||||||
{
|
{
|
||||||
using Base = BdaSolver<double,block_size>;
|
using Base = BdaSolver<Scalar,block_size>;
|
||||||
|
|
||||||
using Base::N;
|
using Base::N;
|
||||||
using Base::Nb;
|
using Base::Nb;
|
||||||
@ -59,8 +56,8 @@ private:
|
|||||||
bool useJacMatrix = false;
|
bool useJacMatrix = false;
|
||||||
|
|
||||||
bool analysis_done = false;
|
bool analysis_done = false;
|
||||||
std::shared_ptr<BlockedMatrix<double>> mat{}; // original matrix
|
std::shared_ptr<BlockedMatrix<Scalar>> mat{}; // original matrix
|
||||||
std::shared_ptr<BlockedMatrix<double>> jacMat{}; // matrix for preconditioner
|
std::shared_ptr<BlockedMatrix<Scalar>> jacMat{}; // matrix for preconditioner
|
||||||
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
|
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
|
||||||
|
|
||||||
rocsparse_direction dir = rocsparse_direction_row;
|
rocsparse_direction dir = rocsparse_direction_row;
|
||||||
@ -76,32 +73,31 @@ private:
|
|||||||
|
|
||||||
rocsparse_int *d_Arows, *d_Mrows;
|
rocsparse_int *d_Arows, *d_Mrows;
|
||||||
rocsparse_int *d_Acols, *d_Mcols;
|
rocsparse_int *d_Acols, *d_Mcols;
|
||||||
double *d_Avals, *d_Mvals;
|
Scalar *d_Avals, *d_Mvals;
|
||||||
double *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve
|
Scalar *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve
|
||||||
double *d_pw, *d_s, *d_t, *d_v;
|
Scalar *d_pw, *d_s, *d_t, *d_v;
|
||||||
void *d_buffer; // buffer space, used by rocsparse ilu0 analysis
|
void *d_buffer; // buffer space, used by rocsparse ilu0 analysis
|
||||||
int ver;
|
int ver;
|
||||||
char rev[64];
|
char rev[64];
|
||||||
|
|
||||||
|
|
||||||
/// Solve linear system using ilu0-bicgstab
|
/// Solve linear system using ilu0-bicgstab
|
||||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] res summary of solver result
|
/// \param[inout] res summary of solver result
|
||||||
void gpu_pbicgstab(WellContributions<double>& wellContribs, BdaResult& res);
|
void gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res);
|
||||||
|
|
||||||
/// Initialize GPU and allocate memory
|
/// Initialize GPU and allocate memory
|
||||||
/// \param[in] matrix matrix A
|
/// \param[in] matrix matrix A
|
||||||
/// \param[in] jacMatrix matrix for preconditioner
|
/// \param[in] jacMatrix matrix for preconditioner
|
||||||
void initialize(std::shared_ptr<BlockedMatrix<double>> matrix,
|
void initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix);
|
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix);
|
||||||
|
|
||||||
/// Copy linear system to GPU
|
/// Copy linear system to GPU
|
||||||
/// \param[in] b input vector, contains N values
|
/// \param[in] b input vector, contains N values
|
||||||
void copy_system_to_gpu(double *b);
|
void copy_system_to_gpu(Scalar* b);
|
||||||
|
|
||||||
/// Update linear system to GPU
|
/// Update linear system to GPU
|
||||||
/// \param[in] b input vector, contains N values
|
/// \param[in] b input vector, contains N values
|
||||||
void update_system_on_gpu(double *b);
|
void update_system_on_gpu(Scalar* b);
|
||||||
|
|
||||||
/// Analyze sparsity pattern to extract parallelism
|
/// Analyze sparsity pattern to extract parallelism
|
||||||
/// \return true iff analysis was successful
|
/// \return true iff analysis was successful
|
||||||
@ -114,16 +110,20 @@ private:
|
|||||||
/// Solve linear system
|
/// Solve linear system
|
||||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] res summary of solver result
|
/// \param[inout] res summary of solver result
|
||||||
void solve_system(WellContributions<double>& wellContribs, BdaResult& res);
|
void solve_system(WellContributions<Scalar>& wellContribs, BdaResult& res);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
/// Construct a openclSolver
|
/// Construct a rocsparseSolver
|
||||||
/// \param[in] linear_solver_verbosity verbosity of openclSolver
|
/// \param[in] linear_solver_verbosity verbosity of rocsparseSolver
|
||||||
/// \param[in] maxit maximum number of iterations for openclSolver
|
/// \param[in] maxit maximum number of iterations for rocsparseSolver
|
||||||
/// \param[in] tolerance required relative tolerance for openclSolver
|
/// \param[in] tolerance required relative tolerance for rocsparseSolver
|
||||||
/// \param[in] platformID the OpenCL platform to be used
|
/// \param[in] platformID the OpenCL platform to be used
|
||||||
/// \param[in] deviceID the device to be used
|
/// \param[in] deviceID the device to be used
|
||||||
rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, unsigned int platformID, unsigned int deviceID);
|
rocsparseSolverBackend(int linear_solver_verbosity,
|
||||||
|
int maxit,
|
||||||
|
Scalar tolerance,
|
||||||
|
unsigned int platformID,
|
||||||
|
unsigned int deviceID);
|
||||||
|
|
||||||
/// For the CPR coarse solver
|
/// For the CPR coarse solver
|
||||||
// rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);
|
// rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);
|
||||||
@ -138,10 +138,10 @@ public:
|
|||||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||||
/// \param[inout] res summary of solver result
|
/// \param[inout] res summary of solver result
|
||||||
/// \return status code
|
/// \return status code
|
||||||
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<double>> matrix,
|
SolverStatus solve_system(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||||
double* b,
|
Scalar* b,
|
||||||
std::shared_ptr<BlockedMatrix<double>> jacMatrix,
|
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
||||||
WellContributions<double>& wellContribs,
|
WellContributions<Scalar>& wellContribs,
|
||||||
BdaResult& res) override;
|
BdaResult& res) override;
|
||||||
|
|
||||||
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
||||||
@ -150,13 +150,10 @@ public:
|
|||||||
|
|
||||||
/// Get result after linear solve, and peform postprocessing if necessary
|
/// Get result after linear solve, and peform postprocessing if necessary
|
||||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||||
void get_result(double *x) override;
|
void get_result(Scalar* x) override;
|
||||||
|
|
||||||
}; // end class rocsparseSolverBackend
|
}; // end class rocsparseSolverBackend
|
||||||
|
|
||||||
} // namespace Accelerator
|
} // namespace Opm::Accelerator
|
||||||
} // namespace Opm
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user