Make sure OpenCL can be used without CUDA

2025-02-25 18:55:30 -06:00 · 2020-06-25 18:44:49 +02:00 · 2020-06-25 18:44:49 +02:00 · 8913e1d057
commit 8913e1d057
parent b7f47c9787
14 changed files with 44 additions and 12 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -138,8 +138,16 @@ endif()
 find_package(OpenCL)

 if(OpenCL_FOUND)
+  # the current OpenCL implementation relies on cl.hpp, not cl2.hpp
+  # make sure it is available, otherwise disable OpenCL
+  find_file(CL_HPP CL/cl.hpp HINTS ${OpenCL_INCLUDE_DIRS})
+  if(CL_HPP)
    set(HAVE_OPENCL 1)
    include_directories(${OpenCL_INCLUDE_DIRS})
+  else()
+    message(WARNING " OpenCL was found, but this version of opm-simulators relies on CL/cl.hpp, which implements OpenCL 1.0, 1.1 and 1.2.\n Deactivating OpenCL")
+    set(OpenCL_FOUND OFF)
+  endif()
 endif()

 # read the list of components from this file (in the project directory);
--- a/CMakeLists_files.cmake
+++ b/CMakeLists_files.cmake
@ -52,6 +52,8 @@ if(OPENCL_FOUND)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/Reorder.cpp)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/openclSolverBackend.cpp)
  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/BdaBridge.cpp)
+  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/WellContributions.cu)
+  list (APPEND MAIN_SOURCE_FILES opm/simulators/linalg/bda/MultisegmentWellContribution.cpp)
 endif()
 if(MPI_FOUND)
  list(APPEND MAIN_SOURCE_FILES opm/simulators/utils/ParallelEclipseState.cpp
--- a/opm/simulators/linalg/bda/BILU0.cpp
+++ b/opm/simulators/linalg/bda/BILU0.cpp
@ -17,6 +17,7 @@
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include <config.h>
 #include <opm/common/OpmLog/OpmLog.hpp>
 #include <opm/common/ErrorMacros.hpp>

--- a/opm/simulators/linalg/bda/MultisegmentWellContribution.cpp
+++ b/opm/simulators/linalg/bda/MultisegmentWellContribution.cpp
@ -112,10 +112,12 @@ void MultisegmentWellContribution::apply(double *h_x, double *h_y)
    }
 }

+#if HAVE_CUDA
 void MultisegmentWellContribution::setCudaStream(cudaStream_t stream_)
 {
    stream = stream_;
 }
+#endif

 unsigned int MultisegmentWellContribution::getColIdx(unsigned int idx)
 {
--- a/opm/simulators/linalg/bda/WellContributions.cu
+++ b/opm/simulators/linalg/bda/WellContributions.cu
@ -22,7 +22,7 @@
 #include <cstdlib>
 #include <cstring>

-#ifdef __NVCC__
+#if HAVE_CUDA
 #include "opm/simulators/linalg/bda/cuda_header.hpp"
 #include <cuda_runtime.h>
 #endif
@ -35,6 +35,7 @@ namespace Opm
 {

 // apply WellContributions using y -= C^T * (D^-1 * (B * x))
+#if HAVE_CUDA
 __global__ void apply_well_contributions(
    const double * __restrict__ Cnnzs,
    const double * __restrict__ Dnnzs,
@ -126,10 +127,12 @@ __global__ void apply_well_contributions(
    }

 }
+#endif

 void WellContributions::alloc()
 {
    if (num_std_wells > 0) {
+#if HAVE_CUDA
        cudaMalloc((void**)&d_Cnnzs, sizeof(double) * num_blocks * dim * dim_wells);
        cudaMalloc((void**)&d_Dnnzs, sizeof(double) * num_std_wells * dim_wells * dim_wells);
        cudaMalloc((void**)&d_Bnnzs, sizeof(double) * num_blocks * dim * dim_wells);
@ -139,6 +142,7 @@ void WellContributions::alloc()
        cudaMalloc((void**)&d_val_pointers, sizeof(int) * (num_std_wells + 1));
        cudaCheckLastError("apply_gpu malloc failed");
        allocated = true;
+#endif
    }
 }

@ -147,8 +151,10 @@ WellContributions::~WellContributions()
    // free pinned memory for MultisegmentWellContributions
    if (h_x) {
        if (host_mem_cuda) {
+#if HAVE_CUDA
            cudaFreeHost(h_x);
            cudaFreeHost(h_y);
+#endif
        } else {
            delete[] h_x;
            delete[] h_y;
@ -163,6 +169,7 @@ WellContributions::~WellContributions()

    // delete data for StandardWell
    if (num_std_wells > 0) {
+#if HAVE_CUDA
        cudaFree(d_Cnnzs);
        cudaFree(d_Dnnzs);
        cudaFree(d_Bnnzs);
@ -170,12 +177,14 @@ WellContributions::~WellContributions()
        cudaFree(d_Bcols);
        delete[] val_pointers;
        cudaFree(d_val_pointers);
+#endif
    }
 }


 // Apply the WellContributions, similar to StandardWell::apply()
 // y -= (C^T *(D^-1*(   B*x)))
+#if HAVE_CUDA
 void WellContributions::apply(double *d_x, double *d_y)
 {
    // apply MultisegmentWells
@ -212,6 +221,7 @@ void WellContributions::apply(double *d_x, double *d_y)
        apply_well_contributions <<< num_std_wells, 32, smem_size, stream>>>(d_Cnnzs, d_Dnnzs, d_Bnnzs, d_Ccols, d_Bcols, d_x, d_y, dim, dim_wells, d_val_pointers);
    }
 }
+#endif

 #if HAVE_OPENCL
 void WellContributions::apply(cl::Buffer& d_x, cl::Buffer& d_y) {
@ -252,6 +262,7 @@ void WellContributions::addMatrix(MatrixType type, int *colIndices, double *valu
        OPM_THROW(std::logic_error, "Error cannot add wellcontribution before allocating memory in WellContributions");
    }

+#if HAVE_CUDA
    switch (type) {
    case MatrixType::C:
        cudaMemcpy(d_Cnnzs + num_blocks_so_far * dim * dim_wells, values, sizeof(double) * val_size * dim * dim_wells, cudaMemcpyHostToDevice);
@ -277,8 +288,12 @@ void WellContributions::addMatrix(MatrixType type, int *colIndices, double *valu
        num_blocks_so_far += val_size;
        num_std_wells_so_far++;
    }
+#else
+    OPM_THROW(std::logic_error, "Error cannot use StandardWells since CUDA was not found");
+#endif
 }

+#if HAVE_CUDA
 void WellContributions::setCudaStream(cudaStream_t stream_)
 {
    this->stream = stream_;
@ -286,6 +301,7 @@ void WellContributions::setCudaStream(cudaStream_t stream_)
        well->setCudaStream(stream_);
    }
 }
+#endif

 void WellContributions::setBlockSize(unsigned int dim_, unsigned int dim_wells_)
 {
--- a/opm/simulators/linalg/bda/WellContributions.hpp
+++ b/opm/simulators/linalg/bda/WellContributions.hpp
@ -104,7 +104,9 @@ public:

    /// Set a cudaStream to be used
    /// \param[in] stream           the cudaStream that is used to launch the kernel in
+#if HAVE_CUDA
    void setCudaStream(cudaStream_t stream);
+#endif

    /// Create a new WellContributions, implementation is empty
    WellContributions() {};
@ -116,7 +118,9 @@ public:
    /// performs y -= (C^T * (D^-1 * (B*x))) for all Wells
    /// \param[in] d_x        vector x, must be on GPU
    /// \param[inout] d_y     vector y, must be on GPU
+#if HAVE_CUDA
    void apply(double *d_x, double *d_y);
+#endif
 #if HAVE_OPENCL
    void apply(cl::Buffer& x, cl::Buffer& y);
 #endif
--- a/opm/simulators/linalg/bda/cusparseSolverBackend.cu
+++ b/opm/simulators/linalg/bda/cusparseSolverBackend.cu
@ -17,9 +17,7 @@
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */

-#ifndef __NVCC__
-#error "Cannot compile for cusparse: NVIDIA compiler not found"
-#endif
+#include <config.h>

 #include <cuda_runtime.h>
 #include <sstream>
--- a/opm/simulators/linalg/bda/openclSolverBackend.cpp
+++ b/opm/simulators/linalg/bda/openclSolverBackend.cpp
@ -17,6 +17,7 @@
  along with OPM.  If not, see <http://www.gnu.org/licenses/>.
 */

+#include <config.h>
 #include <cmath>
 #include <sstream>

--- a/opm/simulators/wells/BlackoilWellModel.hpp
+++ b/opm/simulators/wells/BlackoilWellModel.hpp
@ -235,7 +235,7 @@ namespace Opm {
            // subtract B*inv(D)*C * x from A*x
            void apply(const BVector& x, BVector& Ax) const;

-#if HAVE_CUDA
+#if HAVE_CUDA || HAVE_OPENCL
            // accumulate the contributions of all Wells in the WellContributions object
            void getWellContributions(WellContributions& x) const;
 #endif
--- a/opm/simulators/wells/BlackoilWellModel_impl.hpp
+++ b/opm/simulators/wells/BlackoilWellModel_impl.hpp
@ -901,7 +901,7 @@ namespace Opm {
        }
    }

-#if HAVE_CUDA
+#if HAVE_CUDA || HAVE_OPENCL
    template<typename TypeTag>
    void
    BlackoilWellModel<TypeTag>::
--- a/opm/simulators/wells/MultisegmentWell.hpp
+++ b/opm/simulators/wells/MultisegmentWell.hpp
@ -135,7 +135,7 @@ namespace Opm
        /// r = r - C D^-1 Rw
        virtual void apply(BVector& r) const override;

-#if HAVE_CUDA
+#if HAVE_CUDA || HAVE_OPENCL
        /// add the contribution (C, D, B matrices) of this Well to the WellContributions object
        void addWellContribution(WellContributions& wellContribs) const;
 #endif
--- a/opm/simulators/wells/MultisegmentWell_impl.hpp
+++ b/opm/simulators/wells/MultisegmentWell_impl.hpp
@ -640,7 +640,7 @@ namespace Opm



-#if HAVE_CUDA
+#if HAVE_CUDA || HAVE_OPENCL
    template<typename TypeTag>
    void
    MultisegmentWell<TypeTag>::
--- a/opm/simulators/wells/StandardWell.hpp
+++ b/opm/simulators/wells/StandardWell.hpp
@ -185,7 +185,7 @@ namespace Opm
        /// r = r - C D^-1 Rw
        virtual void apply(BVector& r) const override;

-#if HAVE_CUDA
+#if HAVE_CUDA || HAVE_OPENCL
        /// add the contribution (C, D^-1, B matrices) of this Well to the WellContributions object
        void addWellContribution(WellContributions& wellContribs) const;

--- a/opm/simulators/wells/StandardWell_impl.hpp
+++ b/opm/simulators/wells/StandardWell_impl.hpp
@ -2293,7 +2293,7 @@ namespace Opm
        duneC_.mmtv(invDrw_, r);
    }

-#if HAVE_CUDA
+#if HAVE_CUDA || HAVE_OPENCL
    template<typename TypeTag>
    void
    StandardWell<TypeTag>::