mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Merge pull request #4582 from Tongdongq/rocsparse-separate-wells
RocsparseSolver separate wells
This commit is contained in:
commit
e9bb5d0539
@ -312,6 +312,11 @@ macro (files_hook)
|
||||
set(HDF5_FOUND OFF)
|
||||
unset(HAVE_HDF5)
|
||||
endif()
|
||||
if(HAVE_ROCSPARSE AND HAVE_CUDA)
|
||||
message(WARNING "WARNING! Using CUDA and ROCm at the same time is not allowed. Please choose only one of them by setting CMAKE_DISABLE_FIND_PACKAGE_<rocsparse|CUDA>=<ON|OFF>. Disabling CUDA...\n")
|
||||
set(CUDA_FOUND OFF)
|
||||
unset(HAVE_CUDA)
|
||||
endif()
|
||||
# read the list of components from this file (in the project directory);
|
||||
# it should set various lists with the names of the files to include
|
||||
# include needs to be here to make reset HDF5_FOUND available in
|
||||
|
@ -151,11 +151,6 @@ if (Damaris_FOUND AND MPI_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/utils/initDamarisXmlFile.cpp)
|
||||
endif()
|
||||
if(CUDA_FOUND)
|
||||
if(USE_BDA_BRIDGE)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cuWellContributions.cu)
|
||||
endif()
|
||||
|
||||
# CUISTL SOURCE
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuBlasHandle.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/cuistl/detail/CuSparseHandle.cpp)
|
||||
@ -195,6 +190,7 @@ if(CUDA_FOUND)
|
||||
list (APPEND PUBLIC_HEADER_FILES opm/simulators/linalg/cuistl/set_device.hpp)
|
||||
|
||||
endif()
|
||||
|
||||
if(USE_BDA_BRIDGE)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp
|
||||
opm/simulators/linalg/bda/WellContributions.cpp
|
||||
@ -219,6 +215,11 @@ if(USE_BDA_BRIDGE)
|
||||
endif()
|
||||
if(rocsparse_FOUND AND rocblas_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseSolverBackend.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/rocsparseWellContributions.cpp)
|
||||
endif()
|
||||
if(CUDA_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cusparseSolverBackend.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cuda/cuWellContributions.cu)
|
||||
endif()
|
||||
if(amgcl_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/amgclSolverBackend.cpp)
|
||||
@ -473,6 +474,7 @@ list (APPEND PUBLIC_HEADER_FILES
|
||||
opm/simulators/linalg/bda/MultisegmentWellContribution.hpp
|
||||
opm/simulators/linalg/bda/rocalutionSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/rocsparseSolverBackend.hpp
|
||||
opm/simulators/linalg/bda/rocsparseWellContributions.hpp
|
||||
opm/simulators/linalg/bda/WellContributions.hpp
|
||||
opm/simulators/linalg/amgcpr.hh
|
||||
opm/simulators/linalg/twolevelmethodcpr.hh
|
||||
|
@ -311,7 +311,7 @@ namespace Opm
|
||||
EWOMS_REGISTER_PARAM(TypeTag, bool, LinearSolverPrintJsonDefinition, "Write the JSON definition of the linear solver setup to the DBG file.");
|
||||
EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseSetup, "Reuse preconditioner setup. Valid options are 0: recreate the preconditioner for every linear solve, 1: recreate once every timestep, 2: recreate if last linear solve took more than 10 iterations, 3: never recreate, 4: recreated every CprReuseInterval");
|
||||
EWOMS_REGISTER_PARAM(TypeTag, int, CprReuseInterval, "Reuse preconditioner interval. Used when CprReuseSetup is set to 4, then the preconditioner will be fully recreated instead of reused every N linear solve, where N is this parameter.");
|
||||
EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Choose a linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]'");
|
||||
EWOMS_REGISTER_PARAM(TypeTag, std::string, AcceleratorMode, "Choose a linear solver, usage: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]'");
|
||||
EWOMS_REGISTER_PARAM(TypeTag, int, BdaDeviceId, "Choose device ID for cusparseSolver or openclSolver, use 'nvidia-smi' or 'clinfo' to determine valid IDs");
|
||||
EWOMS_REGISTER_PARAM(TypeTag, int, OpenclPlatformId, "Choose platform ID for openclSolver, use 'clinfo' to determine valid platform IDs");
|
||||
EWOMS_REGISTER_PARAM(TypeTag, bool, OpenclIluParallel, "Parallelize ILU decomposition and application on GPU");
|
||||
|
@ -100,8 +100,8 @@ apply(Vector& rhs,
|
||||
auto wellContribs = WellContributions::create(accelerator_mode_, useWellConn);
|
||||
bridge_->initWellContributions(*wellContribs, x.N() * x[0].N());
|
||||
|
||||
// the WellContributions can only be applied separately with CUDA or OpenCL, not with amgcl or rocalution
|
||||
#if HAVE_CUDA || HAVE_OPENCL
|
||||
// the WellContributions can only be applied separately with CUDA, OpenCL or rocsparse, not with amgcl or rocalution
|
||||
#if HAVE_CUDA || HAVE_OPENCL || HAVE_ROCSPARSE
|
||||
if (!useWellConn) {
|
||||
getContribs(*wellContribs);
|
||||
}
|
||||
|
@ -108,7 +108,7 @@ BdaBridge<BridgeMatrix, BridgeVector, block_size>::BdaBridge(std::string acceler
|
||||
} else if (accelerator_mode.compare("none") == 0) {
|
||||
use_gpu = false;
|
||||
} else {
|
||||
OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]");
|
||||
OPM_THROW(std::logic_error, "Error unknown value for parameter 'AcceleratorMode', should be passed like '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -49,7 +49,7 @@ private:
|
||||
|
||||
public:
|
||||
/// Construct a BdaBridge
|
||||
/// \param[in] accelerator_mode to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution]'
|
||||
/// \param[in] accelerator_mode to select if an accelerated solver is used, is passed via command-line: '--accelerator-mode=[none|cusparse|opencl|amgcl|rocalution|rocsparse]'
|
||||
/// \param[in] linear_solver_verbosity verbosity of BdaSolver
|
||||
/// \param[in] maxit maximum number of iterations for BdaSolver
|
||||
/// \param[in] tolerance required relative tolerance for BdaSolver
|
||||
|
@ -32,6 +32,10 @@
|
||||
#include <opm/simulators/linalg/bda/cuda/cuWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_ROCSPARSE
|
||||
#include <opm/simulators/linalg/bda/rocsparseWellContributions.hpp>
|
||||
#endif
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
@ -54,9 +58,14 @@ WellContributions::create(const std::string& accelerator_mode, bool useWellConn)
|
||||
}
|
||||
else if(accelerator_mode.compare("rocsparse") == 0){
|
||||
if (!useWellConn) {
|
||||
OPM_THROW(std::logic_error, "Error rocsparse requires --matrix-add-well-contributions=true");
|
||||
#if HAVE_ROCSPARSE
|
||||
return std::make_unique<WellContributionsRocsparse>();
|
||||
#else
|
||||
OPM_THROW(std::runtime_error, "Cannot initialize well contributions: rocsparse is not enabled");
|
||||
#endif
|
||||
}
|
||||
return std::make_unique<WellContributions>();
|
||||
|
||||
}
|
||||
else if(accelerator_mode.compare("amgcl") == 0){
|
||||
if (!useWellConn) {
|
||||
|
@ -37,6 +37,7 @@
|
||||
#undef HAVE_CUDA
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocsparseSolverBackend.hpp>
|
||||
#include <opm/simulators/linalg/bda/rocsparseWellContributions.hpp>
|
||||
|
||||
#include <opm/simulators/linalg/bda/BdaResult.hpp>
|
||||
|
||||
@ -96,11 +97,12 @@ using Dune::Timer;
|
||||
|
||||
template <unsigned int block_size>
|
||||
rocsparseSolverBackend<block_size>::rocsparseSolverBackend(int verbosity_, int maxit_, double tolerance_, unsigned int platformID_, unsigned int deviceID_) : BdaSolver<block_size>(verbosity_, maxit_, tolerance_, platformID_, deviceID_) {
|
||||
hipDevice_t device;
|
||||
if(hipDeviceGet(&device, deviceID) != hipSuccess)
|
||||
{
|
||||
OPM_THROW(std::logic_error, "HIP Error: could not get device");
|
||||
int numDevices = 0;
|
||||
HIP_CHECK(hipGetDeviceCount(&numDevices));
|
||||
if (static_cast<int>(deviceID) >= numDevices) {
|
||||
OPM_THROW(std::runtime_error, "Error chosen too high HIP device ID");
|
||||
}
|
||||
HIP_CHECK(hipSetDevice(deviceID));
|
||||
|
||||
ROCSPARSE_CHECK(rocsparse_create_handle(&handle));
|
||||
ROCBLAS_CHECK(rocblas_create_handle(&blas_handle));
|
||||
@ -151,7 +153,13 @@ void rocsparseSolverBackend<block_size>::gpu_pbicgstab([[maybe_unused]] WellCont
|
||||
double one = 1.0;
|
||||
double mone = -1.0;
|
||||
|
||||
Timer t_total, t_prec(false), t_spmv(false), t_rest(false);
|
||||
Timer t_total, t_prec(false), t_spmv(false), t_well(false), t_rest(false);
|
||||
|
||||
// set stream here, the WellContributions object is destroyed every linear solve
|
||||
// the number of wells can change every linear solve
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
static_cast<WellContributionsRocsparse&>(wellContribs).setStream(stream);
|
||||
}
|
||||
|
||||
// HIP_VERSION is defined as (HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR * 100000 + HIP_VERSION_PATCH)
|
||||
#if HIP_VERSION >= 50400000
|
||||
@ -225,10 +233,18 @@ void rocsparseSolverBackend<block_size>::gpu_pbicgstab([[maybe_unused]] WellCont
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_spmv.stop();
|
||||
t_rest.start();
|
||||
t_well.start();
|
||||
}
|
||||
|
||||
// apply wellContributions
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
static_cast<WellContributionsRocsparse&>(wellContribs).apply(d_pw, d_v);
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_well.stop();
|
||||
t_rest.start();
|
||||
}
|
||||
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_rw, 1, d_v, 1, &tmp1));
|
||||
alpha = rho / tmp1;
|
||||
@ -278,10 +294,18 @@ void rocsparseSolverBackend<block_size>::gpu_pbicgstab([[maybe_unused]] WellCont
|
||||
if(verbosity >= 3){
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_spmv.stop();
|
||||
t_rest.start();
|
||||
t_well.start();
|
||||
}
|
||||
|
||||
// apply wellContributions
|
||||
if(wellContribs.getNumWells() > 0){
|
||||
static_cast<WellContributionsRocsparse&>(wellContribs).apply(d_s, d_t);
|
||||
}
|
||||
if (verbosity >= 3) {
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
t_well.stop();
|
||||
t_rest.start();
|
||||
}
|
||||
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_r, 1, &tmp1));
|
||||
ROCBLAS_CHECK(rocblas_ddot(blas_handle, N, d_t, 1, d_t, 1, &tmp2));
|
||||
@ -323,6 +347,7 @@ void rocsparseSolverBackend<block_size>::gpu_pbicgstab([[maybe_unused]] WellCont
|
||||
std::ostringstream out;
|
||||
out << "rocsparseSolver::prec_apply: " << t_prec.elapsed() << " s\n";
|
||||
out << "rocsparseSolver::spmv: " << t_spmv.elapsed() << " s\n";
|
||||
out << "rocsparseSolver::well: " << t_well.elapsed() << " s\n";
|
||||
out << "rocsparseSolver::rest: " << t_rest.elapsed() << " s\n";
|
||||
out << "rocsparseSolver::total_solve: " << res.elapsed << " s\n";
|
||||
OpmLog::info(out.str());
|
||||
|
239
opm/simulators/linalg/bda/rocsparseWellContributions.cpp
Normal file
239
opm/simulators/linalg/bda/rocsparseWellContributions.cpp
Normal file
@ -0,0 +1,239 @@
|
||||
/*
|
||||
Copyright 2023 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <config.h> // CMake
|
||||
|
||||
// MultisegmentWellContribution includes the cuda runtime if found by CMake
|
||||
// this leads to inclusion of both amd_hip_vector_types.h and vector_types.h
|
||||
// which both define vector types like uchar2, short3 and double4.
|
||||
// Restore the value (if defined) afterwards.
|
||||
#ifdef HAVE_CUDA
|
||||
#define HIP_HAVE_CUDA_DEFINED HAVE_CUDA
|
||||
#endif
|
||||
|
||||
#undef HAVE_CUDA
|
||||
|
||||
#include <opm/simulators/linalg/bda/rocsparseWellContributions.hpp>
|
||||
|
||||
#ifdef HIP_HAVE_CUDA_DEFINED
|
||||
#define HAVE_CUDA HIP_HAVE_CUDA_DEFINED
|
||||
#undef HIP_HAVE_CUDA_DEFINED
|
||||
#endif
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
|
||||
|
||||
#include <hip/hip_runtime.h>
|
||||
|
||||
|
||||
#define HIP_CHECK(stat) \
|
||||
{ \
|
||||
if(stat != hipSuccess) \
|
||||
{ \
|
||||
OPM_THROW(std::logic_error, "HIP error"); \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
#ifdef __HIP__
|
||||
/// HIP kernel to apply the standard wellcontributions
|
||||
__global__ void stdwell_apply(
|
||||
const double *Cnnzs,
|
||||
const double *Dnnzs,
|
||||
const double *Bnnzs,
|
||||
const unsigned *Ccols,
|
||||
const unsigned *Bcols,
|
||||
const double *x,
|
||||
double *y,
|
||||
const unsigned dim,
|
||||
const unsigned dim_wells,
|
||||
const unsigned *val_pointers)
|
||||
{
|
||||
unsigned wgId = blockIdx.x;
|
||||
unsigned wiId = threadIdx.x;
|
||||
unsigned valSize = val_pointers[wgId + 1] - val_pointers[wgId];
|
||||
unsigned valsPerBlock = dim*dim_wells;
|
||||
unsigned numActiveWorkItems = (blockDim.x/valsPerBlock)*valsPerBlock;
|
||||
unsigned numBlocksPerWarp = blockDim.x/valsPerBlock;
|
||||
unsigned c = wiId % dim;
|
||||
unsigned r = (wiId/dim) % dim_wells;
|
||||
double temp;
|
||||
|
||||
extern __shared__ double localSum[];
|
||||
double *z1 = localSum + gridDim.x;
|
||||
double *z2 = z1 + dim_wells;
|
||||
|
||||
localSum[wiId] = 0;
|
||||
if(wiId < numActiveWorkItems){
|
||||
unsigned b = wiId/valsPerBlock + val_pointers[wgId];
|
||||
while(b < valSize + val_pointers[wgId]){
|
||||
int colIdx = Bcols[b];
|
||||
localSum[wiId] += Bnnzs[b*dim*dim_wells + r*dim + c]*x[colIdx*dim + c];
|
||||
b += numBlocksPerWarp;
|
||||
}
|
||||
|
||||
// merge all blocks in this workgroup into 1 block
|
||||
// if numBlocksPerWarp >= 3, should use loop
|
||||
// block 1: block 2:
|
||||
// 0 1 2 12 13 14
|
||||
// 3 4 5 15 16 17
|
||||
// 6 7 8 18 19 20
|
||||
// 9 10 11 21 22 23
|
||||
// workitem i will hold the sum of workitems i and i + valsPerBlock
|
||||
if(wiId < valsPerBlock){
|
||||
for (unsigned i = 1; i < numBlocksPerWarp; ++i) {
|
||||
localSum[wiId] += localSum[wiId + i*valsPerBlock];
|
||||
}
|
||||
}
|
||||
|
||||
if(c == 0 && wiId < valsPerBlock){
|
||||
for(unsigned i = dim - 1; i > 0; --i){
|
||||
localSum[wiId] += localSum[wiId + i];
|
||||
}
|
||||
z1[r] = localSum[wiId];
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(wiId < dim_wells){
|
||||
temp = 0.0;
|
||||
for(unsigned i = 0; i < dim_wells; ++i){
|
||||
temp += Dnnzs[wgId*dim_wells*dim_wells + wiId*dim_wells + i]*z1[i];
|
||||
}
|
||||
z2[wiId] = temp;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(wiId < dim*valSize){
|
||||
temp = 0.0;
|
||||
unsigned bb = wiId/dim + val_pointers[wgId];
|
||||
for (unsigned j = 0; j < dim_wells; ++j){
|
||||
temp += Cnnzs[bb*dim*dim_wells + j*dim + c]*z2[j];
|
||||
}
|
||||
|
||||
int colIdx = Ccols[bb];
|
||||
y[colIdx*dim + c] -= temp;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void WellContributionsRocsparse::apply_stdwells([[maybe_unused]] double *d_x,
|
||||
[[maybe_unused]] double *d_y){
|
||||
#ifdef __HIP__
|
||||
unsigned gridDim = num_std_wells;
|
||||
unsigned blockDim = 64;
|
||||
unsigned shared_mem_size = (blockDim + 2 * dim_wells) * sizeof(double); // shared memory for localSum, z1 and z2
|
||||
// dim3(N) will create a vector {N, 1, 1}
|
||||
stdwell_apply<<<dim3(gridDim), dim3(blockDim), shared_mem_size, stream>>>(
|
||||
d_Cnnzs_hip, d_Dnnzs_hip, d_Bnnzs_hip, d_Ccols_hip, d_Bcols_hip,
|
||||
d_x, d_y, dim, dim_wells, d_val_pointers_hip
|
||||
);
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error separate wellcontributions for rocsparse only supported when compiling with hipcc");
|
||||
#endif
|
||||
}
|
||||
|
||||
void WellContributionsRocsparse::apply_mswells(double *d_x, double *d_y){
|
||||
if (h_x.empty()) {
|
||||
h_x.resize(N);
|
||||
h_y.resize(N);
|
||||
}
|
||||
|
||||
HIP_CHECK(hipMemcpyAsync(h_x.data(), d_x, sizeof(double) * N, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(h_y.data(), d_y, sizeof(double) * N, hipMemcpyDeviceToHost, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for (auto& well : multisegments) {
|
||||
well->apply(h_x.data(), h_y.data());
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
HIP_CHECK(hipMemcpyAsync(d_y, h_y.data(), sizeof(double) * N, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
void WellContributionsRocsparse::apply(double *d_x, double *d_y){
|
||||
if(num_std_wells > 0){
|
||||
apply_stdwells(d_x, d_y);
|
||||
}
|
||||
|
||||
if(num_ms_wells > 0){
|
||||
apply_mswells(d_x, d_y);
|
||||
}
|
||||
}
|
||||
|
||||
void WellContributionsRocsparse::setStream(hipStream_t stream_){
|
||||
stream = stream_;
|
||||
}
|
||||
|
||||
void WellContributionsRocsparse::APIaddMatrix(MatrixType type,
|
||||
int* colIndices,
|
||||
double* values,
|
||||
unsigned int val_size)
|
||||
{
|
||||
if (!allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
HIP_CHECK(hipMemcpyAsync(d_Cnnzs_hip + num_blocks_so_far * dim * dim_wells, values, sizeof(d_Cnnzs_hip) * val_size * dim * dim_wells, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Ccols_hip + num_blocks_so_far, colIndices, sizeof(d_Ccols_hip) * val_size, hipMemcpyHostToDevice, stream));
|
||||
break;
|
||||
|
||||
case MatrixType::D:
|
||||
HIP_CHECK(hipMemcpyAsync(d_Dnnzs_hip + num_std_wells_so_far * dim_wells * dim_wells, values, sizeof(d_Dnnzs_hip) * dim_wells * dim_wells, hipMemcpyHostToDevice, stream));
|
||||
break;
|
||||
|
||||
case MatrixType::B:
|
||||
HIP_CHECK(hipMemcpyAsync(d_Bnnzs_hip + num_blocks_so_far * dim * dim_wells, values, sizeof(d_Bnnzs_hip) * val_size * dim * dim_wells, hipMemcpyHostToDevice, stream));
|
||||
HIP_CHECK(hipMemcpyAsync(d_Bcols_hip + num_blocks_so_far, colIndices, sizeof(d_Bcols_hip) * val_size, hipMemcpyHostToDevice, stream));
|
||||
|
||||
val_pointers[num_std_wells_so_far] = num_blocks_so_far;
|
||||
if (num_std_wells_so_far == num_std_wells - 1) {
|
||||
val_pointers[num_std_wells] = num_blocks;
|
||||
HIP_CHECK(hipMemcpyAsync(d_val_pointers_hip, val_pointers.data(), sizeof(d_val_pointers_hip) * (num_std_wells + 1), hipMemcpyHostToDevice, stream));
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
OPM_THROW(std::logic_error, "Error unsupported matrix ID for WellContributionsRocsparse::addMatrix()");
|
||||
}
|
||||
HIP_CHECK(hipStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
void WellContributionsRocsparse::APIalloc()
|
||||
{
|
||||
HIP_CHECK(hipMalloc((void**)&d_Cnnzs_hip, sizeof(d_Cnnzs_hip) * num_blocks * dim * dim_wells));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Dnnzs_hip, sizeof(d_Dnnzs_hip) * num_std_wells * dim_wells * dim_wells));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Bnnzs_hip, sizeof(d_Bnnzs_hip) * num_blocks * dim * dim_wells));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Ccols_hip, sizeof(d_Ccols_hip) * num_blocks));
|
||||
HIP_CHECK(hipMalloc((void**)&d_Bcols_hip, sizeof(d_Bcols_hip) * num_blocks));
|
||||
HIP_CHECK(hipMalloc((void**)&d_val_pointers_hip, sizeof(d_val_pointers_hip) * (num_std_wells + 1)));
|
||||
}
|
||||
|
||||
} //namespace Opm
|
60
opm/simulators/linalg/bda/rocsparseWellContributions.hpp
Normal file
60
opm/simulators/linalg/bda/rocsparseWellContributions.hpp
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
Copyright 2023 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef WELLCONTRIBUTIONS_ROCSPARSE_HEADER_INCLUDED
|
||||
#define WELLCONTRIBUTIONS_ROCSPARSE_HEADER_INCLUDED
|
||||
|
||||
#include <opm/simulators/linalg/bda/WellContributions.hpp>
|
||||
|
||||
#include <hip/hip_runtime_api.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
class WellContributionsRocsparse : public WellContributions
|
||||
{
|
||||
private:
|
||||
hipStream_t stream;
|
||||
|
||||
public:
|
||||
void apply_stdwells(double *d_x, double *d_y);
|
||||
void apply_mswells(double *d_x, double *d_y);
|
||||
void apply(double *d_x, double *d_y);
|
||||
void setStream(hipStream_t stream);
|
||||
|
||||
protected:
|
||||
/// Allocate memory for the StandardWells
|
||||
void APIalloc() override;
|
||||
|
||||
void APIaddMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size) override;
|
||||
|
||||
double *d_Cnnzs_hip, *d_Dnnzs_hip, *d_Bnnzs_hip;
|
||||
unsigned *d_Ccols_hip, *d_Bcols_hip;
|
||||
unsigned *d_val_pointers_hip;
|
||||
|
||||
std::vector<double> h_x;
|
||||
std::vector<double> h_y;
|
||||
};
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user