mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
add support for single thread copy
This commit is contained in:
parent
b0157def17
commit
cc1dfca9e0
@ -45,6 +45,7 @@
|
|||||||
|
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <omp.h>
|
||||||
|
|
||||||
std::shared_ptr<std::thread> copyThread;
|
std::shared_ptr<std::thread> copyThread;
|
||||||
#endif // HAVE_OPENMP
|
#endif // HAVE_OPENMP
|
||||||
@ -113,22 +114,28 @@ apply(Vector& rhs,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (numJacobiBlocks_ > 1) {
|
bool use_multithreading = false;
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
//NOTE: copyThread can safely write to jacMat because in solve_system both matrix and *blockJacobiForGPUILU0_ diagonal entries
|
use_multithreading = omp_get_max_threads() > 1;
|
||||||
//are checked and potentially overwritten in replaceZeroDiagonal() by mainThread. However, no matter the thread writing sequence,
|
#endif // HAVE_OPENMP
|
||||||
//the final entry in jacMat is correct.
|
|
||||||
copyThread = std::make_shared<std::thread>([&](){this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);});
|
if (numJacobiBlocks_ > 1) {
|
||||||
#else
|
if(use_multithreading) {
|
||||||
this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);
|
//NOTE: copyThread can safely write to jacMat because in solve_system both matrix and *blockJacobiForGPUILU0_ diagonal entries
|
||||||
#endif
|
//are checked and potentially overwritten in replaceZeroDiagonal() by mainThread. However, no matter the thread writing sequence,
|
||||||
|
//the final entry in jacMat is correct.
|
||||||
|
copyThread = std::make_shared<std::thread>([&](){this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);});
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this->copyMatToBlockJac(matrix, *blockJacobiForGPUILU0_);
|
||||||
|
}
|
||||||
|
|
||||||
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
|
// Const_cast needed since the CUDA stuff overwrites values for better matrix condition..
|
||||||
bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(),
|
bridge_->solve_system(&matrix, blockJacobiForGPUILU0_.get(),
|
||||||
numJacobiBlocks_, rhs, *wellContribs, result);
|
numJacobiBlocks_, rhs, *wellContribs, result);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
bridge_->solve_system(&matrix, &matrix,
|
bridge_->solve_system(&matrix, &matrix,
|
||||||
numJacobiBlocks_, rhs, *wellContribs, result);
|
numJacobiBlocks_, rhs, *wellContribs, result);
|
||||||
if (result.converged) {
|
if (result.converged) {
|
||||||
// get result vector x from non-Dune backend, iff solve was successful
|
// get result vector x from non-Dune backend, iff solve was successful
|
||||||
|
@ -40,6 +40,7 @@
|
|||||||
|
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <omp.h>
|
||||||
extern std::shared_ptr<std::thread> copyThread;
|
extern std::shared_ptr<std::thread> copyThread;
|
||||||
#endif // HAVE_OPENMP
|
#endif // HAVE_OPENMP
|
||||||
|
|
||||||
@ -328,7 +329,8 @@ void cusparseSolverBackend<block_size>::copy_system_to_gpu(std::shared_ptr<Block
|
|||||||
cudaMemcpyAsync(d_bVals, matrix->nnzValues, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
cudaMemcpyAsync(d_bVals, matrix->nnzValues, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
copyThread->join();
|
if(omp_get_max_threads() > 1)
|
||||||
|
copyThread->join();
|
||||||
#endif
|
#endif
|
||||||
cudaMemcpyAsync(d_mVals, jacMatrix->nnzValues, nnzbs_prec * block_size * block_size * sizeof(double), cudaMemcpyHostToDevice, stream);
|
cudaMemcpyAsync(d_mVals, jacMatrix->nnzValues, nnzbs_prec * block_size * block_size * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||||
} else {
|
} else {
|
||||||
@ -372,7 +374,8 @@ void cusparseSolverBackend<block_size>::update_system_on_gpu(std::shared_ptr<Blo
|
|||||||
cudaMemcpyAsync(d_bVals, matrix->nnzValues, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
cudaMemcpyAsync(d_bVals, matrix->nnzValues, nnz * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
copyThread->join();
|
if(omp_get_max_threads() > 1)
|
||||||
|
copyThread->join();
|
||||||
#endif
|
#endif
|
||||||
cudaMemcpyAsync(d_mVals, jacMatrix->nnzValues, nnzbs_prec * block_size * block_size * sizeof(double), cudaMemcpyHostToDevice, stream);
|
cudaMemcpyAsync(d_mVals, jacMatrix->nnzValues, nnzbs_prec * block_size * block_size * sizeof(double), cudaMemcpyHostToDevice, stream);
|
||||||
} else {
|
} else {
|
||||||
|
@ -89,6 +89,7 @@
|
|||||||
|
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <omp.h>
|
||||||
extern std::shared_ptr<std::thread> copyThread;
|
extern std::shared_ptr<std::thread> copyThread;
|
||||||
#endif //HAVE_OPENMP
|
#endif //HAVE_OPENMP
|
||||||
|
|
||||||
@ -441,7 +442,8 @@ void rocsparseSolverBackend<block_size>::copy_system_to_gpu(double *b) {
|
|||||||
|
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
copyThread->join();
|
if(omp_get_max_threads() > 1)
|
||||||
|
copyThread->join();
|
||||||
#endif
|
#endif
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Mrows, jacMat->rowPointers, sizeof(rocsparse_int) * (Nb + 1), hipMemcpyHostToDevice, stream));
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, sizeof(rocsparse_int) * nnzbs_prec, hipMemcpyHostToDevice, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Mcols, jacMat->colIndices, sizeof(rocsparse_int) * nnzbs_prec, hipMemcpyHostToDevice, stream));
|
||||||
@ -472,7 +474,8 @@ void rocsparseSolverBackend<block_size>::update_system_on_gpu(double *b) {
|
|||||||
|
|
||||||
if (useJacMatrix) {
|
if (useJacMatrix) {
|
||||||
#if HAVE_OPENMP
|
#if HAVE_OPENMP
|
||||||
copyThread->join();
|
if (omp_get_max_threads() > 1)
|
||||||
|
copyThread->join();
|
||||||
#endif
|
#endif
|
||||||
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
HIP_CHECK(hipMemcpyAsync(d_Mvals, jacMat->nnzValues, sizeof(double) * nnzbs_prec * block_size * block_size, hipMemcpyHostToDevice, stream));
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
Reference in New Issue
Block a user