mirror of
https://github.com/OPM/opm-simulators.git
synced 2025-02-25 18:55:30 -06:00
Split WellContributions into .cpp and .cu
This commit is contained in:
parent
f90bb85960
commit
5971a7ae9e
@ -42,6 +42,7 @@ list (APPEND MAIN_SOURCE_FILES
|
||||
|
||||
if(CUDA_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/cusparseSolverBackend.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
@ -53,7 +54,7 @@ if(OPENCL_FOUND)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/opencl.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclSolverBackend.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cpp)
|
||||
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
|
||||
endif()
|
||||
if(MPI_FOUND)
|
||||
|
@ -332,7 +332,7 @@ protected:
|
||||
#else
|
||||
const std::string gpu_mode = EWOMS_GET_PARAM(TypeTag, std::string, GpuMode);
|
||||
if (gpu_mode.compare("none") != 0) {
|
||||
OPM_THROW(std::logic_error,"Error cannot use GPU solver since CUDA was not found during compilation");
|
||||
OPM_THROW(std::logic_error,"Error cannot use GPU solver since neither CUDA nor OpenCL was not found by cmake");
|
||||
}
|
||||
#endif
|
||||
extractParallelGridInformationToISTL(simulator_.vanguard().grid(), parallelInformation_);
|
||||
|
150
opm/simulators/linalg/bda/WellContributions.cpp
Normal file
150
opm/simulators/linalg/bda/WellContributions.cpp
Normal file
@ -0,0 +1,150 @@
|
||||
/*
|
||||
Copyright 2020 Equinor ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include <config.h> // CMake
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
#include "opm/simulators/linalg/bda/WellContributions.hpp"
|
||||
|
||||
namespace Opm
|
||||
{
|
||||
|
||||
|
||||
void WellContributions::alloc()
|
||||
{
|
||||
if (num_std_wells > 0) {
|
||||
#if HAVE_CUDA
|
||||
allocStandardWells();
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error cannot allocate on GPU for StandardWells because CUDA was not found by cmake");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
WellContributions::~WellContributions()
|
||||
{
|
||||
if (h_x) {
|
||||
delete[] h_x;
|
||||
delete[] h_y;
|
||||
}
|
||||
|
||||
// delete MultisegmentWellContributions
|
||||
for (auto ms : multisegments) {
|
||||
delete ms;
|
||||
}
|
||||
multisegments.clear();
|
||||
|
||||
#if HAVE_CUDA
|
||||
freeStandardWells();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void WellContributions::apply(cl::Buffer& d_x, cl::Buffer& d_y) {
|
||||
|
||||
// apply MultisegmentWells
|
||||
if (num_ms_wells > 0) {
|
||||
// allocate pinned memory on host if not yet done
|
||||
if (h_x == nullptr) {
|
||||
h_x = new double[N];
|
||||
h_y = new double[N];
|
||||
}
|
||||
|
||||
// copy vectors x and y from GPU to CPU
|
||||
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, h_x);
|
||||
queue->enqueueReadBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y);
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for (MultisegmentWellContribution *well : multisegments) {
|
||||
well->apply(h_x, h_y);
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
queue->enqueueWriteBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y);
|
||||
}
|
||||
|
||||
|
||||
// apply StandardWells
|
||||
if (num_std_wells > 0) {
|
||||
OPM_THROW(std::logic_error, "Error StandardWells are not supported by openclSolver");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void WellContributions::addMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size)
|
||||
{
|
||||
#if HAVE_CUDA
|
||||
addMatrixGpu(type, colIndices, values, val_size);
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error cannot add StandardWell matrix on GPU because CUDA was not found by cmake");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
|
||||
{
|
||||
dim = dim_;
|
||||
dim_wells = dim_wells_;
|
||||
}
|
||||
|
||||
void WellContributions::addNumBlocks(unsigned int nnz)
|
||||
{
|
||||
if (allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add more sizes after allocated in WellContributions");
|
||||
}
|
||||
num_blocks += nnz;
|
||||
num_std_wells++;
|
||||
}
|
||||
|
||||
void WellContributions::addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
|
||||
unsigned int Nb, unsigned int Mb,
|
||||
unsigned int BnumBlocks, std::vector<double> &Bvalues, std::vector<unsigned int> &BcolIndices, std::vector<unsigned int> &BrowPointers,
|
||||
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
|
||||
std::vector<double> &Cvalues)
|
||||
{
|
||||
this->N = Nb * dim;
|
||||
MultisegmentWellContribution *well = new MultisegmentWellContribution(dim, dim_wells, Nb, Mb, BnumBlocks, Bvalues, BcolIndices, BrowPointers, DnumBlocks, Dvalues, DcolPointers, DrowIndices, Cvalues);
|
||||
multisegments.emplace_back(well);
|
||||
++num_ms_wells;
|
||||
}
|
||||
|
||||
|
||||
void WellContributions::setReordering(int *toOrder_, bool reorder_)
|
||||
{
|
||||
this->toOrder = toOrder_;
|
||||
this->reorder = reorder_;
|
||||
for (auto& ms : multisegments) {
|
||||
ms->setReordering(toOrder_, reorder_);
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void WellContributions::setOpenCLQueue(cl::CommandQueue *queue_)
|
||||
{
|
||||
this->queue = queue_;
|
||||
}
|
||||
#endif
|
||||
|
||||
} //namespace Opm
|
||||
|
@ -22,10 +22,8 @@
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#if HAVE_CUDA
|
||||
#include "opm/simulators/linalg/bda/cuda_header.hpp"
|
||||
#include <cuda_runtime.h>
|
||||
#endif
|
||||
|
||||
#include <opm/common/OpmLog/OpmLog.hpp>
|
||||
#include <opm/common/ErrorMacros.hpp>
|
||||
@ -35,7 +33,6 @@ namespace Opm
|
||||
{
|
||||
|
||||
// apply WellContributions using y -= C^T * (D^-1 * (B * x))
|
||||
#if HAVE_CUDA
|
||||
__global__ void apply_well_contributions(
|
||||
const double * __restrict__ Cnnzs,
|
||||
const double * __restrict__ Dnnzs,
|
||||
@ -127,12 +124,10 @@ __global__ void apply_well_contributions(
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
void WellContributions::alloc()
|
||||
void WellContributions::allocStandardWells()
|
||||
{
|
||||
if (num_std_wells > 0) {
|
||||
#if HAVE_CUDA
|
||||
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
|
||||
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
|
||||
@ -142,34 +137,12 @@ void WellContributions::alloc()
|
||||
cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_std_wells + 1));
|
||||
cudaCheckLastError("apply_gpu malloc failed");
|
||||
allocated = true;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
WellContributions::~WellContributions()
|
||||
{
|
||||
// free pinned memory for MultisegmentWellContributions
|
||||
if (h_x) {
|
||||
if (host_mem_cuda) {
|
||||
#if HAVE_CUDA
|
||||
cudaFreeHost(h_x);
|
||||
cudaFreeHost(h_y);
|
||||
#endif
|
||||
} else {
|
||||
delete[] h_x;
|
||||
delete[] h_y;
|
||||
}
|
||||
}
|
||||
|
||||
// delete MultisegmentWellContributions
|
||||
for (auto ms : multisegments) {
|
||||
delete ms;
|
||||
}
|
||||
multisegments.clear();
|
||||
|
||||
void WellContributions::freeStandardWells() {
|
||||
// delete data for StandardWell
|
||||
if (num_std_wells > 0) {
|
||||
#if HAVE_CUDA
|
||||
cudaFree(d_Cnnzs);
|
||||
cudaFree(d_Dnnzs);
|
||||
cudaFree(d_Bnnzs);
|
||||
@ -177,14 +150,12 @@ WellContributions::~WellContributions()
|
||||
cudaFree(d_Bcols);
|
||||
delete[] val_pointers;
|
||||
cudaFree(d_val_pointers);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Apply the WellContributions, similar to StandardWell::apply()
|
||||
// y -= (C^T *(D^-1*( B*x)))
|
||||
#if HAVE_CUDA
|
||||
void WellContributions::apply(double *d_x, double *d_y)
|
||||
{
|
||||
// apply MultisegmentWells
|
||||
@ -221,48 +192,14 @@ void WellContributions::apply(double *d_x, double *d_y)
|
||||
apply_well_contributions <<< num_std_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void WellContributions::apply(cl::Buffer& d_x, cl::Buffer& d_y) {
|
||||
|
||||
// apply MultisegmentWells
|
||||
if (num_ms_wells > 0) {
|
||||
// allocate pinned memory on host if not yet done
|
||||
if (h_x == nullptr) {
|
||||
h_x = new double[N];
|
||||
h_y = new double[N];
|
||||
}
|
||||
|
||||
// copy vectors x and y from GPU to CPU
|
||||
queue->enqueueReadBuffer(d_x, CL_TRUE, 0, sizeof(double) * N, h_x);
|
||||
queue->enqueueReadBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y);
|
||||
|
||||
// actually apply MultisegmentWells
|
||||
for (MultisegmentWellContribution *well : multisegments) {
|
||||
well->apply(h_x, h_y);
|
||||
}
|
||||
|
||||
// copy vector y from CPU to GPU
|
||||
queue->enqueueWriteBuffer(d_y, CL_TRUE, 0, sizeof(double) * N, h_y);
|
||||
}
|
||||
|
||||
|
||||
// apply StandardWells
|
||||
if (num_std_wells > 0) {
|
||||
OPM_THROW(std::logic_error, "Error StandardWells are not supported by openclSolver");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void WellContributions::addMatrix(MatrixType type, int *colIndices, double *values, unsigned int val_size)
|
||||
void WellContributions::addMatrixGpu(MatrixType type, int *colIndices, double *values, unsigned int val_size)
|
||||
{
|
||||
if (!allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
|
||||
}
|
||||
|
||||
#if HAVE_CUDA
|
||||
switch (type) {
|
||||
case MatrixType::C:
|
||||
cudaMemcpy(d_Cnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
|
||||
@ -288,12 +225,8 @@ void WellContributions::addMatrix(MatrixType type, int *colIndices, double *valu
|
||||
num_blocks_so_far += val_size;
|
||||
num_std_wells_so_far++;
|
||||
}
|
||||
#else
|
||||
OPM_THROW(std::logic_error, "Error cannot use StandardWells since CUDA was not found");
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HAVE_CUDA
|
||||
void WellContributions::setCudaStream(cudaStream_t stream_)
|
||||
{
|
||||
this->stream = stream_;
|
||||
@ -301,51 +234,6 @@ void WellContributions::setCudaStream(cudaStream_t stream_)
|
||||
well->setCudaStream(stream_);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
|
||||
{
|
||||
dim = dim_;
|
||||
dim_wells = dim_wells_;
|
||||
}
|
||||
|
||||
void WellContributions::addNumBlocks(unsigned int nnz)
|
||||
{
|
||||
if (allocated) {
|
||||
OPM_THROW(std::logic_error, "Error cannot add more sizes after allocated in WellContributions");
|
||||
}
|
||||
num_blocks += nnz;
|
||||
num_std_wells++;
|
||||
}
|
||||
|
||||
void WellContributions::addMultisegmentWellContribution(unsigned int dim, unsigned int dim_wells,
|
||||
unsigned int Nb, unsigned int Mb,
|
||||
unsigned int BnumBlocks, std::vector<double> &Bvalues, std::vector<unsigned int> &BcolIndices, std::vector<unsigned int> &BrowPointers,
|
||||
unsigned int DnumBlocks, double *Dvalues, int *DcolPointers, int *DrowIndices,
|
||||
std::vector<double> &Cvalues)
|
||||
{
|
||||
this->N = Nb * dim;
|
||||
MultisegmentWellContribution *well = new MultisegmentWellContribution(dim, dim_wells, Nb, Mb, BnumBlocks, Bvalues, BcolIndices, BrowPointers, DnumBlocks, Dvalues, DcolPointers, DrowIndices, Cvalues);
|
||||
multisegments.emplace_back(well);
|
||||
++num_ms_wells;
|
||||
}
|
||||
|
||||
|
||||
void WellContributions::setReordering(int *toOrder_, bool reorder_)
|
||||
{
|
||||
this->toOrder = toOrder_;
|
||||
this->reorder = reorder_;
|
||||
for (auto& ms : multisegments) {
|
||||
ms->setReordering(toOrder_, reorder_);
|
||||
}
|
||||
}
|
||||
|
||||
#if HAVE_OPENCL
|
||||
void WellContributions::setOpenCLQueue(cl::CommandQueue *queue_)
|
||||
{
|
||||
this->queue = queue_;
|
||||
}
|
||||
#endif
|
||||
|
||||
} //namespace Opm
|
||||
|
||||
|
@ -39,7 +39,8 @@ namespace Opm
|
||||
/// This class serves to eliminate the need to include the WellContributions into the matrix (with --matrix-add-well-contributions=true) for the cusparseSolver
|
||||
/// If the --matrix-add-well-contributions commandline parameter is true, this class should not be used
|
||||
/// So far, StandardWell and MultisegmentWell are supported
|
||||
/// A single instance (or pointer) of this class is passed to the cusparseSolver.
|
||||
/// StandardWells are only supported for cusparseSolver (CUDA), MultisegmentWells are supported for both cusparseSolver and openclSolver
|
||||
/// A single instance (or pointer) of this class is passed to the BdaSolver.
|
||||
/// For StandardWell, this class contains all the data and handles the computation. For MultisegmentWell, the vector 'multisegments' contains all the data. For more information, check the MultisegmentWellContribution class.
|
||||
|
||||
/// A StandardWell uses C, D and B and performs y -= (C^T * (D^-1 * (B*x)))
|
||||
@ -55,6 +56,15 @@ namespace Opm
|
||||
class WellContributions
|
||||
{
|
||||
|
||||
public:
|
||||
|
||||
/// StandardWell has C, D and B matrices that need to be copied
|
||||
enum class MatrixType {
|
||||
C,
|
||||
D,
|
||||
B
|
||||
};
|
||||
|
||||
private:
|
||||
unsigned int num_blocks = 0; // total number of blocks in all wells
|
||||
unsigned int dim; // number of columns in blocks in B and C, equal to StandardWell::numEq
|
||||
@ -76,7 +86,9 @@ private:
|
||||
cl::CommandQueue *queue = nullptr;
|
||||
#endif
|
||||
|
||||
#if HAVE_CUDA
|
||||
// data for StandardWells, could remain nullptrs if not used
|
||||
// StandardWells are only supported for cusparseSolver now
|
||||
double *d_Cnnzs = nullptr;
|
||||
double *d_Dnnzs = nullptr;
|
||||
double *d_Bnnzs = nullptr;
|
||||
@ -85,6 +97,7 @@ private:
|
||||
double *d_z1 = nullptr;
|
||||
double *d_z2 = nullptr;
|
||||
unsigned int *d_val_pointers = nullptr;
|
||||
#endif
|
||||
|
||||
double *h_x = nullptr, *h_y = nullptr; // CUDA pinned memory for GPU memcpy
|
||||
bool host_mem_cuda = false; // true iff h_x and h_y are allocated by cudaMallocHost(), so they need to be freed using cudaFreeHost()
|
||||
@ -92,14 +105,20 @@ private:
|
||||
int *toOrder = nullptr;
|
||||
bool reorder = false;
|
||||
|
||||
public:
|
||||
/// Store a matrix in this object, in blocked csr format, can only be called after alloc() is called
|
||||
/// \param[in] type indicate if C, D or B is sent
|
||||
/// \param[in] colIndices columnindices of blocks in C or B, ignored for D
|
||||
/// \param[in] values array of nonzeroes
|
||||
/// \param[in] val_size number of blocks in C or B, ignored for D
|
||||
void addMatrixGpu(MatrixType type, int *colIndices, double *values, unsigned int val_size);
|
||||
|
||||
/// StandardWell has C, D and B matrices that need to be copied
|
||||
enum class MatrixType {
|
||||
C,
|
||||
D,
|
||||
B
|
||||
};
|
||||
/// Allocate GPU memory for StandardWells
|
||||
void allocStandardWells();
|
||||
|
||||
/// Free GPU memory from StandardWells
|
||||
void freeStandardWells();
|
||||
|
||||
public:
|
||||
|
||||
/// Set a cudaStream to be used
|
||||
/// \param[in] stream the cudaStream that is used to launch the kernel in
|
||||
|
Loading…
Reference in New Issue
Block a user