mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
remove dependency of std::thread on openmp
This commit is contained in:
parent
1477a4d76e
commit
f063f6776e
@ -43,11 +43,11 @@
|
||||
|
||||
#include <opm/grid/polyhedralgrid.hh>
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <thread>
|
||||
#include <omp.h>
|
||||
|
||||
std::shared_ptr<std::thread> copyThread;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif // HAVE_OPENMP
|
||||
|
||||
namespace Opm::detail {
|
||||
@ -113,9 +113,11 @@ apply(Vector& rhs,
|
||||
}
|
||||
#endif
|
||||
|
||||
bool use_multithreading = false;
|
||||
bool use_multithreading = true;
|
||||
#if HAVE_OPENMP
|
||||
use_multithreading = omp_get_max_threads() > 1;
|
||||
// if user manually sets --threads-per-process=1, do not use multithreading
|
||||
if (omp_get_max_threads() == 1)
|
||||
use_multithreading = false;
|
||||
#endif // HAVE_OPENMP
|
||||
|
||||
if (numJacobiBlocks_ > 1) {
|
||||
@ -123,9 +125,9 @@ apply(Vector& rhs,
|
||||
//NOTE: copyThread can safely write to jacMat because in solve_system both matrix and *blockJacobiForGPUILU0_ diagonal entries
|
||||
//are checked and potentially overwritten in replaceZeroDiagonal() by mainThread. However, no matter the thread writing sequence,
|
||||
//the final entry in jacMat is correct.
|
||||
#if HAVE_OPENMP
|
||||
//#if HAVE_OPENMP
|
||||
copyThread = std::make_shared<std::thread>([&](){this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);});
|
||||
#endif // HAVE_OPENMP
|
||||
//#endif // HAVE_OPENMP
|
||||
}
|
||||
else {
|
||||
this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifndef OPM_MISC_HPP
|
||||
#define OPM_MISC_HPP
|
||||
|
||||
#ifdef HAVE_ROCSPARSE
|
||||
#include <hip/hip_runtime_api.h>
|
||||
#include <hip/hip_version.h>
|
||||
#include <sstream>
|
||||
@ -40,6 +41,7 @@
|
||||
OPM_THROW(std::logic_error, oss.str()); \
|
||||
} \
|
||||
} while(0)
|
||||
#endif
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
|
||||
|
@ -38,10 +38,11 @@
|
||||
// otherwise, the nonzeroes of the matrix are assumed to be in a contiguous array, and a single GPU memcpy is enough
|
||||
#define COPY_ROW_BY_ROW 0
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <thread>
|
||||
#include <omp.h>
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif // HAVE_OPENMP
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
@ -342,11 +343,17 @@ copy_system_to_gpu(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
#else
|
||||
cudaMemcpyAsync(d_bVals, matrix->nnzValues,
|
||||
nnz * sizeof(Scalar), cudaMemcpyHostToDevice, stream);
|
||||
if (useJacMatrix) {
|
||||
|
||||
bool use_multithreading = true;
|
||||
#if HAVE_OPENMP
|
||||
if(omp_get_max_threads() > 1)
|
||||
copyThread->join();
|
||||
if(omp_get_max_threads() == 1)
|
||||
use_multithreading = false;
|
||||
#endif
|
||||
|
||||
if (useJacMatrix) {
|
||||
if(use_multithreading)
|
||||
copyThread->join();
|
||||
|
||||
cudaMemcpyAsync(d_mVals, jacMatrix->nnzValues,
|
||||
nnzbs_prec * block_size * block_size * sizeof(Scalar),
|
||||
cudaMemcpyHostToDevice, stream);
|
||||
@ -399,12 +406,17 @@ update_system_on_gpu(std::shared_ptr<BlockedMatrix<Scalar>> matrix,
|
||||
#else
|
||||
cudaMemcpyAsync(d_bVals, matrix->nnzValues,
|
||||
nnz * sizeof(Scalar), cudaMemcpyHostToDevice, stream);
|
||||
if (useJacMatrix) {
|
||||
|
||||
bool use_multithreading = true;
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
}
|
||||
if (omp_get_max_threads() == 1)
|
||||
use_multithreading = false;
|
||||
#endif
|
||||
|
||||
if (useJacMatrix) {
|
||||
if (use_multithreading)
|
||||
copyThread->join();
|
||||
|
||||
cudaMemcpyAsync(d_mVals, jacMatrix->nnzValues,
|
||||
nnzbs_prec * block_size * block_size * sizeof(Scalar),
|
||||
cudaMemcpyHostToDevice, stream);
|
||||
|
@ -31,10 +31,11 @@
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <thread>
|
||||
#include <omp.h>
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <omp.h>
|
||||
#endif //HAVE_OPENMP
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
@ -194,13 +195,15 @@ create_preconditioner(BlockedMatrix<Scalar>* mat, BlockedMatrix<Scalar>* jacMat)
|
||||
const unsigned int bs = block_size;
|
||||
|
||||
auto *matToDecompose = jacMat ? jacMat : mat;
|
||||
bool use_multithreading = true;
|
||||
|
||||
if (jacMat) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
copyThread->join();
|
||||
}
|
||||
if (omp_get_max_threads() == 1)
|
||||
use_multithreading = false;
|
||||
#endif
|
||||
|
||||
if (jacMat && use_multithreading) {
|
||||
copyThread->join();
|
||||
}
|
||||
|
||||
// TODO: remove this copy by replacing inplace ilu decomp by out-of-place ilu decomp
|
||||
|
@ -73,11 +73,11 @@ openclSolverBackend(int verbosity_,
|
||||
|
||||
using PreconditionerType = typename Opm::Accelerator::PreconditionerType;
|
||||
if (use_cpr) {
|
||||
prec = openclPreconditioner<Scalar,block_size>::create(PreconditionerType::CPR,verbosity,opencl_ilu_parallel);
|
||||
prec = openclPreconditioner<Scalar, block_size>::create(PreconditionerType::CPR, verbosity, opencl_ilu_parallel);
|
||||
} else if (use_isai) {
|
||||
prec = openclPreconditioner<Scalar,block_size>::create(PreconditionerType::BISAI,verbosity,opencl_ilu_parallel);
|
||||
prec = openclPreconditioner<Scalar, block_size>::create(PreconditionerType::BISAI, verbosity, opencl_ilu_parallel);
|
||||
} else {
|
||||
prec = openclPreconditioner<Scalar,block_size>::create(PreconditionerType::BILU0,verbosity,opencl_ilu_parallel);
|
||||
prec = openclPreconditioner<Scalar, block_size>::create(PreconditionerType::BILU0, verbosity, opencl_ilu_parallel);
|
||||
}
|
||||
|
||||
std::ostringstream out;
|
||||
@ -225,8 +225,10 @@ openclSolverBackend(int verbosity_,
|
||||
|
||||
template<class Scalar, unsigned int block_size>
|
||||
openclSolverBackend<Scalar,block_size>::
|
||||
openclSolverBackend(int verbosity_, int maxit_,
|
||||
Scalar tolerance_, bool opencl_ilu_parallel_)
|
||||
openclSolverBackend(int verbosity_,
|
||||
int maxit_,
|
||||
Scalar tolerance_,
|
||||
bool opencl_ilu_parallel_)
|
||||
: Base(verbosity_, maxit_, tolerance_)
|
||||
, opencl_ilu_parallel(opencl_ilu_parallel_)
|
||||
{
|
||||
@ -245,7 +247,8 @@ setOpencl(std::shared_ptr<cl::Context>& context_,
|
||||
|
||||
template<class Scalar, unsigned int block_size>
|
||||
void openclSolverBackend<Scalar,block_size>::
|
||||
gpu_pbicgstab(WellContributions<Scalar>& wellContribs, BdaResult& res)
|
||||
gpu_pbicgstab(WellContributions<Scalar>& wellContribs,
|
||||
BdaResult& res)
|
||||
{
|
||||
float it;
|
||||
Scalar rho, rhop, beta, alpha, omega, tmp1, tmp2;
|
||||
|
@ -30,11 +30,11 @@
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include <thread>
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
#include <thread>
|
||||
#include <omp.h>
|
||||
extern std::shared_ptr<std::thread> copyThread;
|
||||
#endif //HAVE_OPENMP
|
||||
|
||||
namespace Opm::Accelerator {
|
||||
@ -193,13 +193,18 @@ template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
copy_system_to_gpu(Scalar *d_Avals) {
|
||||
Timer t;
|
||||
bool use_multithreading = true;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() == 1)
|
||||
use_multithreading = false;
|
||||
#endif
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
if (use_multithreading) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, this->jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, this->jacMat->colIndices, sizeof(rocsparse_int) * this->nnzbs_prec, hipMemcpyHostToDevice, this->stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
@ -220,13 +225,18 @@ template <class Scalar, unsigned int block_size>
|
||||
void rocsparseBILU0<Scalar, block_size>::
|
||||
update_system_on_gpu(Scalar *d_Avals) {
|
||||
Timer t;
|
||||
bool use_multithreading = true;
|
||||
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() == 1)
|
||||
use_multithreading = false;
|
||||
#endif
|
||||
|
||||
if (this->useJacMatrix) {
|
||||
#if HAVE_OPENMP
|
||||
if (omp_get_max_threads() > 1) {
|
||||
if (use_multithreading) {
|
||||
copyThread->join();
|
||||
}
|
||||
#endif
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, this->jacMat->nnzValues, sizeof(Scalar) * this->nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, this->stream));
|
||||
} else {
|
||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, d_Avals, sizeof(Scalar) * nnz, hipMemcpyDeviceToDevice, this->stream));
|
||||
|
Loading…
Reference in New Issue
Block a user