mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-11 10:25:34 -06:00
refactor rocsparseSolverBackend to allow flexible preconditioner
This commit is contained in:
parent
071f009bf3
commit
3eb87b7b04
@ -264,6 +264,8 @@ if(USE_BDA_BRIDGE)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocalutionSolverBackend.cpp)
|
||||
endif()
|
||||
if(rocsparse_FOUND AND rocblas_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseBILU0.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocm/rocsparseWellContributions.cpp)
|
||||
endif()
|
||||
@ -668,6 +670,8 @@ if (USE_BDA_BRIDGE)
|
||||
opm/simulators/linalg/bda/Matrix.hpp
|
||||
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
|
||||
opm/simulators/linalg/bda/rocm/rocalutionSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp
|
||||
opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp
|
||||
opm/simulators/linalg/bda/rocm/rocsparseSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/rocm/rocsparseWellContributions.hpp
|
||||
opm/simulators/linalg/bda/WellContributions.hpp
|
||||
|
@ -115,7 +115,7 @@ BdaBridge(std::string accelerator_mode_,
|
||||
use_gpu = true; // should be replaced by a 'use_bridge' boolean
|
||||
using ROCS = Accelerator::rocsparseSolverBackend<Scalar,block_size>;
|
||||
backend = std::make_unique<ROCS>(linear_solver_verbosity, maxit,
|
||||
tolerance, platformID, deviceID);
|
||||
tolerance, platformID, deviceID, linsolver);
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error rocsparseSolver was chosen, but rocsparse/rocblas was not found by CMake");
|
||||
#endif
|
||||
|
317
opm/simulators/linalg/bda/rocm/rocsparseBILU0.cpp
Normal file
317
opm/simulators/linalg/bda/rocm/rocsparseBILU0.cpp
Normal file
@ -0,0 +1,317 @@
|
||||
/*
|
||||
Copyright 2024 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include <dune/common/timer.hh>
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
|
||||
#include <opm/simulators/linalg/bda/Reorder.hpp>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <iostream> //Razvan
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#define HIP_CHECK(STAT) \
|
||||
do { \
|
||||
const hipError_t stat = (STAT); \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseBILU0::hip "; \
|
||||
oss << "error: " << hipGetErrorString(stat); \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCSPARSE_CHECK(STAT) \
|
||||
do { \
|
||||
const rocsparse_status stat = (STAT); \
|
||||
if(stat != rocsparse_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseBILU0::rocsparse "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define ROCBLAS_CHECK(STAT) \
|
||||
do { \
|
||||
const rocblas_status stat = (STAT); \
|
||||
if(stat != rocblas_status_success) \
|
||||
{ \
|
||||
std::ostringstream oss; \
|
||||
oss << "rocsparseBILU0::rocblas "; \
|
||||
oss << "error: " << stat; \
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <thread>
|
||||
#include <omp.h>
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
#endif //HAVE_OPENMP
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
using Opm::OpmLog;
|
||||
using Dune::Timer;
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
rocsparseBILU0<Scalar, block_size>::
|
||||
rocsparseBILU0(int verbosity_) :
|
||||
Base(verbosity_)
|
||||
{
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols) {
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
this->nnz = nnzb * block_size * block_size;
|
||||
this->nnzbs_prec = this->nnzb;
|
||||
|
||||
if (jacMatrix) {
|
||||
this->useJacMatrix = true;
|
||||
this->nnzbs_prec = jacMatrix->nnzbs;
|
||||
this->jacMat = jacMatrix;
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(double) * this->N));
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * this->nnzbs_prec));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * this->nnzbs_prec * block_size * block_size));
|
||||
} else { // preconditioner matrix is same
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(double) * this->nnzbs_prec * block_size * block_size));
|
||||
d_Mcols = d_Acols;
|
||||
d_Mrows = d_Arows;
|
||||
}
|
||||
|
||||
return true;
|
||||
} // end initialize()
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
analyze_matrix(BlockedMatrix<Scalar> *mat) {
|
||||
return analyze_matrix(mat, &(*this->jacMat));
|
||||
}
|
||||
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
analyze_matrix(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) {
|
||||
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||
Timer t;
|
||||
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_info(&spmv_info));
|
||||
#endif
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_M));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_L));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
||||
|
||||
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
||||
|
||||
// analysis of ilu LU decomposition
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir, \
|
||||
Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
|
||||
if (rocsparse_status_success != status) {
|
||||
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
||||
return false;
|
||||
}
|
||||
|
||||
// analysis of ilu apply
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
|
||||
Nb, this->nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
|
||||
Nb, this->nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(this->stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseBILU0::analyze_matrix(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar> *mat) {
|
||||
return create_preconditioner(mat, &*this->jacMat);
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
bool rocsparseBILU0<Scalar, block_size>::
|
||||
create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) {
|
||||
Timer t;
|
||||
bool result = true;
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
// Check for zero pivot
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
|
||||
if(rocsparse_status_success != status)
|
||||
{
|
||||
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(this->stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseBILU0::create_preconditioner(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
return result;
|
||||
} // end create_preconditioner()
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
copy_system_to_gpu(double *d_Avals) {
|
||||
Timer t;
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, this->jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, this->jacMat->colIndices, sizeof(rocsparse_int) * this->nnzbs_prec, hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(double) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(this->stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseBILU0::copy_system_to_gpu(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
} // end copy_system_to_gpu()
|
||||
|
||||
// don't copy rowpointers and colindices, they stay the same
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
update_system_on_gpu(double *d_Avals) {
|
||||
Timer t;
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(double) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(double) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(this->stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::update_system_on_gpu(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
} // end update_system_on_gpu()
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
apply(double& y, double& x) {
|
||||
double zero = 0.0;
|
||||
double one = 1.0;
|
||||
|
||||
Timer t_apply;
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
|
||||
this->operation, Nb, this->nnzbs_prec, &one, \
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &y, d_t, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
|
||||
this->operation, Nb, this->nnzbs_prec, &one, \
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, &x, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
if (verbosity >= 3) {
|
||||
std::ostringstream out;
|
||||
HIP_CHECK(hipStreamSynchronize(this->stream));
|
||||
out << "rocsparseBILU0 apply: " << t_apply.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
}
|
||||
|
||||
#define INSTANCE_TYPE(T) \
|
||||
template class rocsparseBILU0<T,1>; \
|
||||
template class rocsparseBILU0<T,2>; \
|
||||
template class rocsparseBILU0<T,3>; \
|
||||
template class rocsparseBILU0<T,4>; \
|
||||
template class rocsparseBILU0<T,5>; \
|
||||
template class rocsparseBILU0<T,6>;
|
||||
|
||||
INSTANCE_TYPE(double)
|
||||
// #define INSTANTIATE_BDA_FUNCTIONS(n) \
|
||||
// template class rocsparseBILU0<n>;
|
||||
//
|
||||
// INSTANTIATE_BDA_FUNCTIONS(1);
|
||||
// INSTANTIATE_BDA_FUNCTIONS(2);
|
||||
// INSTANTIATE_BDA_FUNCTIONS(3);
|
||||
// INSTANTIATE_BDA_FUNCTIONS(4);
|
||||
// INSTANTIATE_BDA_FUNCTIONS(5);
|
||||
// INSTANTIATE_BDA_FUNCTIONS(6);
|
||||
//
|
||||
// #undef INSTANTIATE_BDA_FUNCTIONS
|
||||
//
|
||||
// } // namespace Accelerator
|
||||
} // namespace Opm
|
97
opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp
Normal file
97
opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
Copyright 2024 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef OPM_ROCSPARSEBILU0_HPP
|
||||
#define OPM_ROCSPARSEBILU0_HPP
|
||||
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BlockedMatrix.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
|
||||
|
||||
#include <rocblas/rocblas.h>
|
||||
#include <rocsparse/rocsparse.h>
|
||||
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
/// This class implements a Blocked ILU0 preconditioner
|
||||
/// The decomposition is done on GPU, using exact decomposition, or ChowPatel decomposition
|
||||
/// The preconditioner is applied via two exact triangular solves
|
||||
template <class Scalar, unsigned int block_size>
|
||||
class rocsparseBILU0 : public rocsparsePreconditioner<Scalar, block_size>
|
||||
{
|
||||
typedef rocsparsePreconditioner<Scalar, block_size> Base;
|
||||
|
||||
using Base::N;
|
||||
using Base::Nb;
|
||||
using Base::nnz;
|
||||
using Base::nnzb;
|
||||
using Base::verbosity;
|
||||
|
||||
private:
|
||||
|
||||
rocsparse_mat_descr descr_M, descr_L, descr_U;
|
||||
rocsparse_mat_info ilu_info;
|
||||
#if HIP_VERSION >= 50400000
|
||||
rocsparse_mat_info spmv_info;
|
||||
#endif
|
||||
|
||||
rocsparse_int *d_Mrows, *d_Mcols;
|
||||
double *d_Mvals, *d_t;
|
||||
void *d_buffer; // buffer space, used by rocsparse ilu0 analysis
|
||||
|
||||
public:
|
||||
|
||||
rocsparseBILU0(int verbosity_);
|
||||
|
||||
/// Initialize GPU and allocate memory
|
||||
/// \param[in] matrix matrix A
|
||||
/// \param[in] jacMatrix matrix for preconditioner
|
||||
bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols);
|
||||
|
||||
// analysis, extract parallelism if specified
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat);
|
||||
bool analyze_matrix(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat);
|
||||
|
||||
// ilu_decomposition
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat) override;
|
||||
bool create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) override;
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
// via Lz = y
|
||||
// and Ux = z
|
||||
void apply(double& y, double& x) override;
|
||||
|
||||
#if HAVE_OPENCL
|
||||
// apply preconditioner, x = prec(y)
|
||||
void apply(const cl::Buffer& y, cl::Buffer& x) {}
|
||||
#endif
|
||||
|
||||
void copy_system_to_gpu(double *mVals);
|
||||
void update_system(double *vals, double *b);
|
||||
void update_system_on_gpu(double *b);
|
||||
};
|
||||
} // namespace Opm
|
||||
|
||||
#endif
|
||||
|
76
opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.cpp
Normal file
76
opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.cpp
Normal file
@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright 2024 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
#include <memory>
|
||||
#include <opm/common/TimingMacros.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
|
||||
// #include <opm/simulators/linalg/bda/rocm/rocsparseCPR.hpp>
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
std::unique_ptr<rocsparsePreconditioner<Scalar,block_size> > rocsparsePreconditioner<Scalar,block_size>::
|
||||
create(PreconditionerType type, int verbosity) {
|
||||
if (type == PreconditionerType::BILU0) {
|
||||
return std::make_unique<Opm::Accelerator::rocsparseBILU0<Scalar, block_size> >(verbosity);
|
||||
// } else if (type == PreconditionerType::CPR) {
|
||||
// return std::make_unique<Opm::Accelerator::rocsparseCPR<block_size> >(verbosity);
|
||||
} else {
|
||||
OPM_THROW(std::logic_error, "Invalid PreconditionerType");
|
||||
}
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparsePreconditioner<Scalar, block_size>::
|
||||
set_matrix_analysis(rocsparse_mat_descr descr_L, rocsparse_mat_descr descr_U) {
|
||||
descr_L = descr_L;
|
||||
descr_U = descr_U;
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparsePreconditioner<Scalar, block_size>::
|
||||
set_context(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation operation, hipStream_t stream) {
|
||||
this->handle = handle;
|
||||
this->dir = dir;
|
||||
this->operation = operation;
|
||||
this->stream = stream;
|
||||
}
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
void rocsparsePreconditioner<Scalar, block_size>::
|
||||
setJacMat(BlockedMatrix<Scalar> jacMat) {
|
||||
this->jacMat = std::make_shared<BlockedMatrix<Scalar>>(jacMat);
|
||||
}
|
||||
|
||||
#define INSTANTIATE_TYPE(T) \
|
||||
template class rocsparsePreconditioner<T,1>; \
|
||||
template class rocsparsePreconditioner<T,2>; \
|
||||
template class rocsparsePreconditioner<T,3>; \
|
||||
template class rocsparsePreconditioner<T,4>; \
|
||||
template class rocsparsePreconditioner<T,5>; \
|
||||
template class rocsparsePreconditioner<T,6>;
|
||||
|
||||
INSTANTIATE_TYPE(double)
|
||||
|
||||
} //namespace Opm
|
||||
|
79
opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp
Normal file
79
opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp
Normal file
@ -0,0 +1,79 @@
|
||||
/*
|
||||
Copyright 2024 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef OPM_ROCSPARSEPRECONDITIONER_HEADER_INCLUDED
|
||||
#define OPM_ROCSPARSEPRECONDITIONER_HEADER_INCLUDED
|
||||
|
||||
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
|
||||
|
||||
#include <rocsparse/rocsparse.h>
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
template<class Scalar> class BlockedMatrix;
|
||||
|
||||
template <class Scalar, unsigned int block_size>
|
||||
class rocsparsePreconditioner : public Preconditioner<Scalar, block_size>
|
||||
{
|
||||
|
||||
protected:
|
||||
rocsparse_handle handle;
|
||||
rocsparse_direction dir = rocsparse_direction_row;
|
||||
rocsparse_operation operation = rocsparse_operation_none;
|
||||
rocsparse_mat_descr descr_L, descr_U;
|
||||
|
||||
hipStream_t stream;
|
||||
|
||||
rocsparsePreconditioner(int verbosity_) :
|
||||
Preconditioner<Scalar, block_size>(verbosity_)
|
||||
{};
|
||||
|
||||
public:
|
||||
|
||||
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
|
||||
bool useJacMatrix = false;
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMat{}; // matrix for preconditioner
|
||||
|
||||
virtual ~rocsparsePreconditioner() = default;
|
||||
|
||||
static std::unique_ptr<rocsparsePreconditioner<Scalar, block_size>> create(PreconditionerType type, int verbosity);
|
||||
|
||||
// apply preconditioner, x = prec(y)
|
||||
virtual void apply(double& y, double& x) = 0;
|
||||
|
||||
// create/update preconditioner, probably used every linear solve
|
||||
// the version with two params can be overloaded, if not, it will default to using the one param version
|
||||
virtual bool create_preconditioner(BlockedMatrix<Scalar> *mat) = 0;
|
||||
virtual bool create_preconditioner(BlockedMatrix<Scalar> *mat, BlockedMatrix<Scalar> *jacMat) = 0;
|
||||
|
||||
virtual bool initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix, std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix, rocsparse_int *d_Arows, rocsparse_int *d_Acols) = 0;
|
||||
|
||||
virtual void copy_system_to_gpu(double *b)=0;
|
||||
|
||||
/// Update linear system to GPU
|
||||
/// \param[in] b input vector, contains N values
|
||||
virtual void update_system_on_gpu(double *b)=0;
|
||||
|
||||
void set_matrix_analysis(rocsparse_mat_descr descr_L, rocsparse_mat_descr descr_U);
|
||||
void set_context(rocsparse_handle handle, rocsparse_direction dir, rocsparse_operation operation, hipStream_t stream);
|
||||
void setJacMat(BlockedMatrix<Scalar> jacMat);
|
||||
};
|
||||
} //namespace Opm
|
||||
|
||||
#endif
|
@ -41,6 +41,8 @@
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/Preconditioner.hpp>
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_version.h>
|
||||
|
||||
@ -87,12 +89,6 @@
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <thread>
|
||||
#include <omp.h>
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
#endif //HAVE_OPENMP
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
using Dune::Timer;
|
||||
@ -100,10 +96,26 @@ using Dune::Timer;
|
||||
template<class Scalar, unsigned int block_size>
|
||||
rocsparseSolverBackend<Scalar,block_size>::
|
||||
rocsparseSolverBackend(int verbosity_, int maxit_, Scalar tolerance_,
|
||||
unsigned int platformID_, unsigned int deviceID_)
|
||||
unsigned int platformID_, unsigned int deviceID_, std::string linsolver)
|
||||
: Base(verbosity_, maxit_, tolerance_, platformID_, deviceID_)
|
||||
{
|
||||
int numDevices = 0;
|
||||
bool use_cpr, use_isai;
|
||||
|
||||
if (linsolver.compare("ilu0") == 0) {
|
||||
use_cpr = false;
|
||||
use_isai = false;
|
||||
} else if (linsolver.compare("cpr_quasiimpes") == 0) {
|
||||
use_cpr = true;
|
||||
use_isai = false;
|
||||
} else if (linsolver.compare("isai") == 0) {
|
||||
OPM_THROW(std::logic_error, "Error rocsparseSolver does not support --linerar-solver=isai");
|
||||
} else if (linsolver.compare("cpr_trueimpes") == 0) {
|
||||
OPM_THROW(std::logic_error, "Error rocsparseSolver does not support --linerar-solver=cpr_trueimpes");
|
||||
} else {
|
||||
OPM_THROW(std::logic_error, "Error unknown value for argument --linear-solver, " + linsolver);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (static_cast<int>(deviceID) >= numDevices) {
|
||||
OPM_THROW(std::runtime_error, "Invalid HIP device ID");
|
||||
@ -124,6 +136,15 @@ rocsparseSolverBackend(int verbosity_, int maxit_, Scalar tolerance_,
|
||||
HIP_CHECK(hipStreamCreate(&stream));
|
||||
ROCSPARSE_CHECK(rocsparse_set_stream(handle, stream));
|
||||
ROCBLAS_CHECK(rocblas_set_stream(blas_handle, stream));
|
||||
|
||||
using PreconditionerType = typename Opm::Accelerator::PreconditionerType;
|
||||
// if (use_cpr) {
|
||||
// prec = rocsparsePreconditioner<block_size>::create(PreconditionerType::CPR, verbosity);
|
||||
// } else {
|
||||
prec = rocsparsePreconditioner<Scalar, block_size>::create(PreconditionerType::BILU0, verbosity);
|
||||
// }
|
||||
|
||||
prec->set_context(handle, dir, operation, stream);
|
||||
}
|
||||
|
||||
template<class Scalar, unsigned int block_size>
|
||||
@ -170,17 +191,17 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
#elif HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_x, &zero, d_r));
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_x, &zero, d_r));
|
||||
#endif
|
||||
@ -216,13 +237,9 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
t_prec.start();
|
||||
}
|
||||
|
||||
// apply ilu0
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_p, d_t, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, d_pw, rocsparse_solve_policy_auto, d_buffer));
|
||||
// apply ilu0
|
||||
prec->apply(*d_p, *d_pw);
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_prec.stop();
|
||||
@ -232,17 +249,17 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
// spmv
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
#elif HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_pw, &zero, d_v));
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_pw, &zero, d_v));
|
||||
#endif
|
||||
@ -283,12 +300,9 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
if (verbosity >= 3) {
|
||||
t_prec.start();
|
||||
}
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_r, d_t, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(handle, dir, \
|
||||
operation, Nb, nnzbs_prec, &one, \
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, d_s, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
prec->apply(*d_r, *d_s);
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_prec.stop();
|
||||
@ -298,17 +312,17 @@ gpu_pbicgstab([[maybe_unused]] WellContributions<Scalar>& wellContribs,
|
||||
// spmv
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
#elif HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_ex(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
spmv_info, d_s, &zero, d_t));
|
||||
#else
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv(handle, dir, operation,
|
||||
Nb, Nb, nnzb, &one, descr_M,
|
||||
Nb, Nb, nnzb, &one, descr_A,
|
||||
d_Avals, d_Arows, d_Acols, block_size,
|
||||
d_s, &zero, d_t));
|
||||
#endif
|
||||
@ -384,14 +398,18 @@ initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix)
|
||||
{
|
||||
this->Nb = matrix->Nb;
|
||||
this->N = Nb * block_size;
|
||||
this->N = this->Nb * block_size;
|
||||
this->nnzb = matrix->nnzbs;
|
||||
this->nnz = nnzb * block_size * block_size;
|
||||
nnzbs_prec = nnzb;
|
||||
this->nnz = this->nnzb * block_size * block_size;
|
||||
|
||||
if (jacMatrix) {
|
||||
useJacMatrix = true;
|
||||
nnzbs_prec = jacMatrix->nnzbs;
|
||||
prec->useJacMatrix = true;
|
||||
prec->jacMat = jacMatrix;
|
||||
prec->nnzbs_prec = jacMatrix->nnzbs;
|
||||
} else {
|
||||
prec->useJacMatrix = false;
|
||||
prec->jacMat = matrix;
|
||||
prec->nnzbs_prec = this->nnzb;
|
||||
}
|
||||
|
||||
std::ostringstream out;
|
||||
@ -408,7 +426,6 @@ initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
out.clear();
|
||||
|
||||
mat = matrix;
|
||||
jacMat = jacMatrix;
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_r, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_rw, sizeof(Scalar) * N));
|
||||
@ -424,15 +441,7 @@ initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
HIP_CHECK(hipMalloc((void**)&d_x, sizeof(Scalar) * N));
|
||||
HIP_CHECK(hipMalloc((void**)&d_b, sizeof(Scalar) * N));
|
||||
|
||||
if (useJacMatrix) {
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * nnzbs_prec));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size));
|
||||
} else { // preconditioner matrix is same
|
||||
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * nnzbs_prec * block_size * block_size));
|
||||
d_Mcols = d_Acols;
|
||||
d_Mrows = d_Arows;
|
||||
}
|
||||
prec->initialize(matrix, jacMatrix, d_Arows, d_Acols);//TODO: do we need all parameters?
|
||||
|
||||
initialized = true;
|
||||
} // end initialize()
|
||||
@ -456,25 +465,7 @@ copy_system_to_gpu(Scalar *b)
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, N * sizeof(Scalar) * N,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
|
||||
if (useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers,
|
||||
sizeof(rocsparse_int) * (Nb + 1),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices,
|
||||
sizeof(rocsparse_int) * nnzbs_prec,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues,
|
||||
sizeof(Scalar) * nnzbs_prec * block_size * block_size,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals,
|
||||
sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
prec->copy_system_to_gpu(d_Avals);
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
@ -500,19 +491,8 @@ update_system_on_gpu(Scalar* b)
|
||||
HIP_CHECK(hipMemcpyAsync(d_b, b, N* sizeof(Scalar),
|
||||
hipMemcpyHostToDevice, stream));
|
||||
|
||||
if (useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues,
|
||||
sizeof(Scalar) * nnzbs_prec * block_size * block_size,
|
||||
hipMemcpyHostToDevice, stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals,
|
||||
sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, stream));
|
||||
}
|
||||
prec->update_system_on_gpu(d_Avals);
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
@ -528,58 +508,15 @@ template<class Scalar, unsigned int block_size>
|
||||
bool rocsparseSolverBackend<Scalar,block_size>::
|
||||
analyze_matrix()
|
||||
{
|
||||
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
||||
Timer t;
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_set_pointer_mode(handle, rocsparse_pointer_mode_host));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_info(&ilu_info));
|
||||
#if HIP_VERSION >= 50400000
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_info(&spmv_info));
|
||||
#endif
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_A));
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_M));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_L));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper));
|
||||
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(handle, dir, Nb, nnzbs_prec,
|
||||
descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec,
|
||||
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(handle, dir, operation, Nb, nnzbs_prec,
|
||||
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
||||
|
||||
d_bufferSize = std::max(d_bufferSize_M,
|
||||
std::max(d_bufferSize_L, d_bufferSize_U));
|
||||
|
||||
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
||||
|
||||
// analysis of ilu LU decomposition
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(handle, dir, \
|
||||
Nb, nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(handle, ilu_info, &zero_position);
|
||||
if (rocsparse_status_success != status) {
|
||||
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
||||
return false;
|
||||
}
|
||||
|
||||
// analysis of ilu apply
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(handle, dir, operation, \
|
||||
Nb, nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(handle, dir, operation, \
|
||||
Nb, nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \
|
||||
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
#if HIP_VERSION >= 60000000
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrmv_analysis(handle, dir, operation,
|
||||
@ -593,6 +530,13 @@ analyze_matrix()
|
||||
block_size, spmv_info));
|
||||
#endif
|
||||
|
||||
if(!prec->analyze_matrix(&*mat)) {
|
||||
std::ostringstream out;
|
||||
out << "Warning: rocsparseSolver matrix analysis failed!";
|
||||
OpmLog::info(out.str());
|
||||
return false;
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
std::ostringstream out;
|
||||
@ -609,28 +553,7 @@ template<class Scalar, unsigned int block_size>
|
||||
bool rocsparseSolverBackend<Scalar,block_size>::
|
||||
create_preconditioner()
|
||||
{
|
||||
Timer t;
|
||||
|
||||
bool result = true;
|
||||
ROCSPARSE_CHECK(rocsparse_dbsrilu0(handle, dir, Nb, nnzbs_prec, descr_M,
|
||||
d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer));
|
||||
|
||||
// Check for zero pivot
|
||||
int zero_position = 0;
|
||||
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(handle, ilu_info, &zero_position);
|
||||
if(rocsparse_status_success != status)
|
||||
{
|
||||
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::create_preconditioner(): " << t.stop() << " s";
|
||||
OpmLog::info(out.str());
|
||||
}
|
||||
return result;
|
||||
return prec->create_preconditioner(&*mat);
|
||||
} // end create_preconditioner()
|
||||
|
||||
template<class Scalar, unsigned int block_size>
|
||||
|
@ -26,6 +26,8 @@
|
||||
#include <opm/simulators/linalg/bda/BdaSolver.hpp>
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocm/rocsparsePreconditioner.hpp>
|
||||
|
||||
#include <rocblas/rocblas.h>
|
||||
#include <rocsparse/rocsparse.h>
|
||||
|
||||
@ -57,29 +59,26 @@ private:
|
||||
|
||||
bool analysis_done = false;
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> mat{}; // original matrix
|
||||
std::shared_ptr<BlockedMatrix<Scalar>> jacMat{}; // matrix for preconditioner
|
||||
int nnzbs_prec = 0; // number of nnz blocks in preconditioner matrix M
|
||||
|
||||
rocsparse_direction dir = rocsparse_direction_row;
|
||||
rocsparse_operation operation = rocsparse_operation_none;
|
||||
rocsparse_handle handle;
|
||||
rocblas_handle blas_handle;
|
||||
rocsparse_mat_descr descr_A, descr_M, descr_L, descr_U;
|
||||
rocsparse_mat_info ilu_info;
|
||||
rocsparse_mat_descr descr_A;
|
||||
#if HIP_VERSION >= 50400000
|
||||
rocsparse_mat_info spmv_info;
|
||||
#endif
|
||||
hipStream_t stream;
|
||||
|
||||
rocsparse_int *d_Arows, *d_Mrows;
|
||||
rocsparse_int *d_Acols, *d_Mcols;
|
||||
Scalar *d_Avals, *d_Mvals;
|
||||
Scalar *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve
|
||||
rocsparse_int *d_Arows, *d_Acols;
|
||||
Scalar *d_Avals;
|
||||
Scalar *d_x, *d_b, *d_r, *d_rw, *d_p; // vectors, used during linear solve
|
||||
Scalar *d_pw, *d_s, *d_t, *d_v;
|
||||
void *d_buffer; // buffer space, used by rocsparse ilu0 analysis
|
||||
int ver;
|
||||
char rev[64];
|
||||
|
||||
std::unique_ptr<rocsparsePreconditioner<Scalar, block_size> > prec; // can perform blocked ILU0 and AMG on pressure component
|
||||
|
||||
/// Solve linear system using ilu0-bicgstab
|
||||
/// \param[in] wellContribs WellContributions, to apply them separately, instead of adding them to matrix A
|
||||
/// \param[inout] res summary of solver result
|
||||
@ -119,14 +118,16 @@ public:
|
||||
/// \param[in] tolerance required relative tolerance for rocsparseSolver
|
||||
/// \param[in] platformID the OpenCL platform to be used
|
||||
/// \param[in] deviceID the device to be used
|
||||
/// \param[in] linsolver indicating the preconditioner, equal to the --linear-solver cmdline argument
|
||||
rocsparseSolverBackend(int linear_solver_verbosity,
|
||||
int maxit,
|
||||
Scalar tolerance,
|
||||
unsigned int platformID,
|
||||
unsigned int deviceID);
|
||||
unsigned int deviceID,
|
||||
std::string linsolver);
|
||||
|
||||
/// For the CPR coarse solver
|
||||
// rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, ILUReorder opencl_ilu_reorder);
|
||||
rocsparseSolverBackend(int linear_solver_verbosity, int maxit, double tolerance, bool opencl_ilu_reorder);
|
||||
|
||||
/// Destroy a openclSolver, and free memory
|
||||
~rocsparseSolverBackend();
|
||||
@ -144,10 +145,6 @@ public:
|
||||
WellContributions<Scalar>& wellContribs,
|
||||
BdaResult& res) override;
|
||||
|
||||
/// Solve scalar linear system, for example a coarse system of an AMG preconditioner
|
||||
/// Data is already on the GPU
|
||||
// SolverStatus solve_system(BdaResult &res);
|
||||
|
||||
/// Get result after linear solve, and peform postprocessing if necessary
|
||||
/// \param[inout] x resulting x vector, caller must guarantee that x points to a valid array
|
||||
void get_result(Scalar* x) override;
|
||||
|
Loading…
Reference in New Issue
Block a user