mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
287 lines
11 KiB
C++
287 lines
11 KiB
C++
/*
|
|
Copyright 2024 Equinor ASA
|
|
|
|
This file is part of the Open Porous Media project (OPM).
|
|
|
|
OPM is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
OPM is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include <algorithm>
|
|
|
|
#include <opm/common/OpmLog/OpmLog.hpp>
|
|
#include <opm/common/ErrorMacros.hpp>
|
|
#include <dune/common/timer.hh>
|
|
|
|
#include <opm/simulators/linalg/bda/rocm/rocsparseBILU0.hpp>
|
|
#include <opm/simulators/linalg/bda/Reorder.hpp>
|
|
#include <opm/simulators/linalg/bda/Misc.hpp>
|
|
|
|
#include <sstream>
|
|
|
|
#include <thread>
|
|
extern std::shared_ptr<std::thread> copyThread;
|
|
|
|
#if HAVE_OPENMP
|
|
#include <omp.h>
|
|
#endif //HAVE_OPENMP
|
|
|
|
namespace Opm::Accelerator {
|
|
|
|
using Opm::OpmLog;
|
|
using Dune::Timer;
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
rocsparseBILU0<Scalar, block_size>::
|
|
rocsparseBILU0(int verbosity_) :
|
|
Base(verbosity_)
|
|
{
|
|
}
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
bool rocsparseBILU0<Scalar, block_size>::
|
|
initialize(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
|
std::shared_ptr<BlockedMatrix<Scalar>> jacMatrix,
|
|
rocsparse_int *d_Arows,
|
|
rocsparse_int *d_Acols)
|
|
{
|
|
this->Nb = matrix->Nb;
|
|
this->N = Nb * block_size;
|
|
this->nnzb = matrix->nnzbs;
|
|
this->nnz = nnzb * block_size * block_size;
|
|
this->nnzbs_prec = this->nnzb;
|
|
|
|
if (jacMatrix) {
|
|
this->useJacMatrix = true;
|
|
this->nnzbs_prec = jacMatrix->nnzbs;
|
|
this->jacMat = jacMatrix;
|
|
}
|
|
|
|
HIP_CHECK(hipMalloc((void**)&d_t, sizeof(Scalar) * this->N));
|
|
|
|
if (this->useJacMatrix) {
|
|
HIP_CHECK(hipMalloc((void**)&d_Mrows, sizeof(rocsparse_int) * (Nb + 1)));
|
|
HIP_CHECK(hipMalloc((void**)&d_Mcols, sizeof(rocsparse_int) * this->nnzbs_prec));
|
|
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size));
|
|
} else { // preconditioner matrix is same
|
|
HIP_CHECK(hipMalloc((void**)&d_Mvals, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size));
|
|
d_Mcols = d_Acols;
|
|
d_Mrows = d_Arows;
|
|
}
|
|
|
|
return true;
|
|
} // end initialize()
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
bool rocsparseBILU0<Scalar, block_size>::
|
|
analyze_matrix(BlockedMatrix<Scalar> *mat) {
|
|
return analyze_matrix(mat, &(*this->jacMat));
|
|
}
|
|
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
bool rocsparseBILU0<Scalar, block_size>::
|
|
analyze_matrix(BlockedMatrix<Scalar> *mat,
|
|
BlockedMatrix<Scalar> *jacMat)
|
|
{
|
|
std::size_t d_bufferSize_M, d_bufferSize_L, d_bufferSize_U, d_bufferSize;
|
|
Timer t;
|
|
|
|
ROCSPARSE_CHECK(rocsparse_create_mat_info(&ilu_info));
|
|
#if HIP_VERSION >= 50400000
|
|
ROCSPARSE_CHECK(rocsparse_create_mat_info(&spmv_info));
|
|
#endif
|
|
|
|
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_M));
|
|
|
|
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_L));
|
|
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_L, rocsparse_fill_mode_lower));
|
|
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_L, rocsparse_diag_type_unit));
|
|
|
|
ROCSPARSE_CHECK(rocsparse_create_mat_descr(&descr_U));
|
|
ROCSPARSE_CHECK(rocsparse_set_mat_fill_mode(descr_U, rocsparse_fill_mode_upper));
|
|
ROCSPARSE_CHECK(rocsparse_set_mat_diag_type(descr_U, rocsparse_diag_type_non_unit));
|
|
|
|
ROCSPARSE_CHECK(rocsparse_dbsrilu0_buffer_size(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_M));
|
|
|
|
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
|
|
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_L));
|
|
|
|
ROCSPARSE_CHECK(rocsparse_dbsrsv_buffer_size(this->handle, this->dir, this->operation, Nb, this->nnzbs_prec,
|
|
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &d_bufferSize_U));
|
|
|
|
d_bufferSize = std::max(d_bufferSize_M, std::max(d_bufferSize_L, d_bufferSize_U));
|
|
|
|
HIP_CHECK(hipMalloc((void**)&d_buffer, d_bufferSize));
|
|
|
|
// analysis of ilu LU decomposition
|
|
ROCSPARSE_CHECK(rocsparse_dbsrilu0_analysis(this->handle, this->dir, \
|
|
Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, \
|
|
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
|
|
|
int zero_position = 0;
|
|
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
|
|
if (rocsparse_status_success != status) {
|
|
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
|
return false;
|
|
}
|
|
|
|
// analysis of ilu apply
|
|
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
|
|
Nb, this->nnzbs_prec, descr_L, d_Mvals, d_Mrows, d_Mcols, \
|
|
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
|
ROCSPARSE_CHECK(rocsparse_dbsrsv_analysis(this->handle, this->dir, this->operation, \
|
|
Nb, this->nnzbs_prec, descr_U, d_Mvals, d_Mrows, d_Mcols, \
|
|
block_size, ilu_info, rocsparse_analysis_policy_reuse, rocsparse_solve_policy_auto, d_buffer));
|
|
|
|
if (verbosity >= 3) {
|
|
HIP_CHECK(hipStreamSynchronize(this->stream));
|
|
std::ostringstream out;
|
|
out << "rocsparseBILU0::analyze_matrix(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
bool rocsparseBILU0<Scalar, block_size>::
|
|
create_preconditioner(BlockedMatrix<Scalar> *mat) {
|
|
return create_preconditioner(mat, &*this->jacMat);
|
|
}
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
bool rocsparseBILU0<Scalar, block_size>::
|
|
create_preconditioner(BlockedMatrix<Scalar> *mat,
|
|
BlockedMatrix<Scalar> *jacMat)
|
|
{
|
|
Timer t;
|
|
bool result = true;
|
|
|
|
ROCSPARSE_CHECK(rocsparse_dbsrilu0(this->handle, this->dir, Nb, this->nnzbs_prec, descr_M, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, rocsparse_solve_policy_auto, d_buffer));
|
|
|
|
// Check for zero pivot
|
|
int zero_position = 0;
|
|
rocsparse_status status = rocsparse_bsrilu0_zero_pivot(this->handle, ilu_info, &zero_position);
|
|
if(rocsparse_status_success != status)
|
|
{
|
|
printf("L has structural and/or numerical zero at L(%d,%d)\n", zero_position, zero_position);
|
|
return false;
|
|
}
|
|
|
|
if (verbosity >= 3) {
|
|
HIP_CHECK(hipStreamSynchronize(this->stream));
|
|
std::ostringstream out;
|
|
out << "rocsparseBILU0::create_preconditioner(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
return result;
|
|
} // end create_preconditioner()
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
void rocsparseBILU0<Scalar, block_size>::
|
|
copy_system_to_gpu(Scalar *d_Avals) {
|
|
Timer t;
|
|
bool use_multithreading = true;
|
|
|
|
#if HAVE_OPENMP
|
|
if (omp_get_max_threads() == 1)
|
|
use_multithreading = false;
|
|
#endif
|
|
|
|
if (this->useJacMatrix) {
|
|
if (use_multithreading) {
|
|
copyThread->join();
|
|
}
|
|
|
|
HIP_CHECK(hipMemcpyAsync(d_Mrows, this->jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, this->stream));
|
|
HIP_CHECK(hipMemcpyAsync(d_Mcols, this->jacMat->colIndices, sizeof(rocsparse_int) * this->nnzbs_prec, hipMemcpyHostToDevice, this->stream));
|
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
|
} else {
|
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
|
}
|
|
|
|
if (verbosity >= 3) {
|
|
HIP_CHECK(hipStreamSynchronize(this->stream));
|
|
std::ostringstream out;
|
|
out << "rocsparseBILU0::copy_system_to_gpu(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
} // end copy_system_to_gpu()
|
|
|
|
// don't copy rowpointers and colindices, they stay the same
|
|
template <class Scalar, unsigned int block_size>
|
|
void rocsparseBILU0<Scalar, block_size>::
|
|
update_system_on_gpu(Scalar *d_Avals) {
|
|
Timer t;
|
|
bool use_multithreading = true;
|
|
|
|
#if HAVE_OPENMP
|
|
if (omp_get_max_threads() == 1)
|
|
use_multithreading = false;
|
|
#endif
|
|
|
|
if (this->useJacMatrix) {
|
|
if (use_multithreading) {
|
|
copyThread->join();
|
|
}
|
|
|
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
|
} else {
|
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
|
}
|
|
|
|
if (verbosity >= 3) {
|
|
HIP_CHECK(hipStreamSynchronize(this->stream));
|
|
std::ostringstream out;
|
|
out << "rocsparseSolver::update_system_on_gpu(): " << t.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
} // end update_system_on_gpu()
|
|
|
|
template <class Scalar, unsigned int block_size>
|
|
void rocsparseBILU0<Scalar, block_size>::
|
|
apply(Scalar& y, Scalar& x) {
|
|
Scalar zero = 0.0;
|
|
Scalar one = 1.0;
|
|
|
|
Timer t_apply;
|
|
|
|
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
|
|
this->operation, Nb, this->nnzbs_prec, &one, \
|
|
descr_L, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, &y, d_t, rocsparse_solve_policy_auto, d_buffer));
|
|
|
|
ROCSPARSE_CHECK(rocsparse_dbsrsv_solve(this->handle, this->dir, \
|
|
this->operation, Nb, this->nnzbs_prec, &one, \
|
|
descr_U, d_Mvals, d_Mrows, d_Mcols, block_size, ilu_info, d_t, &x, rocsparse_solve_policy_auto, d_buffer));
|
|
|
|
if (verbosity >= 3) {
|
|
std::ostringstream out;
|
|
HIP_CHECK(hipStreamSynchronize(this->stream));
|
|
out << "rocsparseBILU0 apply: " << t_apply.stop() << " s";
|
|
OpmLog::info(out.str());
|
|
}
|
|
}
|
|
|
|
#define INSTANCE_TYPE(T) \
|
|
template class rocsparseBILU0<T,1>; \
|
|
template class rocsparseBILU0<T,2>; \
|
|
template class rocsparseBILU0<T,3>; \
|
|
template class rocsparseBILU0<T,4>; \
|
|
template class rocsparseBILU0<T,5>; \
|
|
template class rocsparseBILU0<T,6>;
|
|
|
|
INSTANCE_TYPE(double)
|
|
} // namespace Opm
|