Make sure OpenCL can be used without CUDA

This commit is contained in:
T.D. (Tongdong) Qiu 2020-06-25 18:44:49 +02:00
parent b7f47c9787
commit 8913e1d057
14 changed files with 44 additions and 12 deletions

View File

@ -138,8 +138,16 @@ endif()
find_package(OpenCL)
if(OpenCL_FOUND)
# the current OpenCL implementation relies on cl.hpp, not cl2.hpp
# make sure it is available, otherwise disable OpenCL
find_file(CL_HPP CL/cl.hpp HINTS ${OpenCL_INCLUDE_DIRS})
if(CL_HPP)
set(HAVE_OPENCL 1)
include_directories(${OpenCL_INCLUDE_DIRS})
else()
message(WARNING " OpenCL was found, but this version of opm-simulators relies on CL/cl.hpp, which implements OpenCL 1.0, 1.1 and 1.2.\n Deactivating OpenCL")
set(OpenCL_FOUND OFF)
endif()
endif()
# read the list of components from this file (in the project directory);

View File

@ -52,6 +52,8 @@ if(OPENCL_FOUND)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/Reorder.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclSolverBackend.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
endif()
if(MPI_FOUND)
list(APPEND MAIN_SOURCE_FILES opm/simulators/utils/ParallelEclipseState.cpp

View File

@ -17,6 +17,7 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <opm/common/OpmLog/OpmLog.hpp>
#include <opm/common/ErrorMacros.hpp>

View File

@ -112,10 +112,12 @@ void MultisegmentWellContribution::apply(double *h_x, double *h_y)
}
}
#if HAVE_CUDA
void MultisegmentWellContribution::setCudaStream(cudaStream_t stream_)
{
stream = stream_;
}
#endif
unsigned int MultisegmentWellContribution::getColIdx(unsigned int idx)
{

View File

@ -22,7 +22,7 @@
#include <cstdlib>
#include <cstring>
#ifdef __NVCC__
#if HAVE_CUDA
#include "opm/simulators/linalg/bda/cuda_header.hpp"
#include <cuda_runtime.h>
#endif
@ -35,6 +35,7 @@ namespace Opm
{
// apply WellContributions using y -= C^T * (D^-1 * (B * x))
#if HAVE_CUDA
__global__ void apply_well_contributions(
const double * __restrict__ Cnnzs,
const double * __restrict__ Dnnzs,
@ -126,10 +127,12 @@ __global__ void apply_well_contributions(
}
}
#endif
void WellContributions::alloc()
{
if (num_std_wells > 0) {
#if HAVE_CUDA
cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
@ -139,6 +142,7 @@ void WellContributions::alloc()
cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_std_wells + 1));
cudaCheckLastError("apply_gpu malloc failed");
allocated = true;
#endif
}
}
@ -147,8 +151,10 @@ WellContributions::~WellContributions()
// free pinned memory for MultisegmentWellContributions
if (h_x) {
if (host_mem_cuda) {
#if HAVE_CUDA
cudaFreeHost(h_x);
cudaFreeHost(h_y);
#endif
} else {
delete[] h_x;
delete[] h_y;
@ -163,6 +169,7 @@ WellContributions::~WellContributions()
// delete data for StandardWell
if (num_std_wells > 0) {
#if HAVE_CUDA
cudaFree(d_Cnnzs);
cudaFree(d_Dnnzs);
cudaFree(d_Bnnzs);
@ -170,12 +177,14 @@ WellContributions::~WellContributions()
cudaFree(d_Bcols);
delete[] val_pointers;
cudaFree(d_val_pointers);
#endif
}
}
// Apply the WellContributions, similar to StandardWell::apply()
// y -= (C^T *(D^-1*( B*x)))
#if HAVE_CUDA
void WellContributions::apply(double *d_x, double *d_y)
{
// apply MultisegmentWells
@ -212,6 +221,7 @@ void WellContributions::apply(double *d_x, double *d_y)
apply_well_contributions <<< num_std_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
}
}
#endif
#if HAVE_OPENCL
void WellContributions::apply(cl::Buffer& d_x, cl::Buffer& d_y) {
@ -252,6 +262,7 @@ void WellContributions::addMatrix(MatrixType type, int *colIndices, double *valu
OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
}
#if HAVE_CUDA
switch (type) {
case MatrixType::C:
cudaMemcpy(d_Cnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
@ -277,8 +288,12 @@ void WellContributions::addMatrix(MatrixType type, int *colIndices, double *valu
num_blocks_so_far += val_size;
num_std_wells_so_far++;
}
#else
OPM_THROW(std::logic_error, "Error cannot use StandardWells since CUDA was not found");
#endif
}
#if HAVE_CUDA
void WellContributions::setCudaStream(cudaStream_t stream_)
{
this->stream = stream_;
@ -286,6 +301,7 @@ void WellContributions::setCudaStream(cudaStream_t stream_)
well->setCudaStream(stream_);
}
}
#endif
void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
{

View File

@ -104,7 +104,9 @@ public:
/// Set a cudaStream to be used
/// \param[in] stream the cudaStream that is used to launch the kernel in
#if HAVE_CUDA
void setCudaStream(cudaStream_t stream);
#endif
/// Create a new WellContributions, implementation is empty
WellContributions() {};
@ -116,7 +118,9 @@ public:
/// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
/// \param[in] d_x vector x, must be on GPU
/// \param[inout] d_y vector y, must be on GPU
#if HAVE_CUDA
void apply(double *d_x, double *d_y);
#endif
#if HAVE_OPENCL
void apply(cl::Buffer& x, cl::Buffer& y);
#endif

View File

@ -17,9 +17,7 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __NVCC__
#error "Cannot compile for cusparse: NVIDIA compiler not found"
#endif
#include <config.h>
#include <cuda_runtime.h>
#include <sstream>

View File

@ -17,6 +17,7 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <cmath>
#include <sstream>

View File

@ -235,7 +235,7 @@ namespace Opm {
// subtract B*inv(D)*C * x from A*x
void apply(const BVector& x, BVector& Ax) const;
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_OPENCL
// accumulate the contributions of all Wells in the WellContributions object
void getWellContributions(WellContributions& x) const;
#endif

View File

@ -901,7 +901,7 @@ namespace Opm {
}
}
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_OPENCL
template<typename TypeTag>
void
BlackoilWellModel<TypeTag>::

View File

@ -135,7 +135,7 @@ namespace Opm
/// r = r - C D^-1 Rw
virtual void apply(BVector& r) const override;
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_OPENCL
/// add the contribution (C, D, B matrices) of this Well to the WellContributions object
void addWellContribution(WellContributions& wellContribs) const;
#endif

View File

@ -640,7 +640,7 @@ namespace Opm
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_OPENCL
template<typename TypeTag>
void
MultisegmentWell<TypeTag>::

View File

@ -185,7 +185,7 @@ namespace Opm
/// r = r - C D^-1 Rw
virtual void apply(BVector& r) const override;
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_OPENCL
/// add the contribution (C, D^-1, B matrices) of this Well to the WellContributions object
void addWellContribution(WellContributions& wellContribs) const;

View File

@ -2293,7 +2293,7 @@ namespace Opm
duneC_.mmtv(invDrw_, r);
}
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_OPENCL
template<typename TypeTag>
void
StandardWell<TypeTag>::