From c28a12636ac56c24bb77945e2147d27cb9e0ce0d Mon Sep 17 00:00:00 2001
From: Markus Blatt <markus@dr-blatt.de>
Date: Tue, 4 Aug 2020 12:49:35 +0200
Subject: [PATCH] Also free memory allocated with CUDA also with CUDA

With multisegment wells we allocate WellContributions::hx and hy with
`CudaMallocHost`. Yet we tried to deallocate them with
`delete[]`. This caused segementation faults e.g. for
model1/MSW_MODEL_1. Now we use `CudaFreeHost` for freeing if we used
CUDA.

Closes #2719
---
 opm/simulators/linalg/bda/WellContributions.cpp | 7 +++----
 opm/simulators/linalg/bda/WellContributions.cu  | 9 +++++++--
 opm/simulators/linalg/bda/WellContributions.hpp | 5 ++---
 3 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/opm/simulators/linalg/bda/WellContributions.cpp b/opm/simulators/linalg/bda/WellContributions.cpp
index 1289bcce5..1533003de 100644
--- a/opm/simulators/linalg/bda/WellContributions.cpp
+++ b/opm/simulators/linalg/bda/WellContributions.cpp
@@ -43,6 +43,9 @@ void WellContributions::alloc()
 
 WellContributions::~WellContributions()
 {
+#if HAVE_CUDA
+    freeCudaMemory();
+#endif
     if (h_x) {
         delete[] h_x;
         delete[] h_y;
@@ -53,10 +56,6 @@ WellContributions::~WellContributions()
         delete ms;
     }
     multisegments.clear();
-
-#if HAVE_CUDA
-    freeStandardWells();
-#endif
 }
 
 
diff --git a/opm/simulators/linalg/bda/WellContributions.cu b/opm/simulators/linalg/bda/WellContributions.cu
index 60cf44b95..bf9417a98 100644
--- a/opm/simulators/linalg/bda/WellContributions.cu
+++ b/opm/simulators/linalg/bda/WellContributions.cu
@@ -140,7 +140,7 @@ void WellContributions::allocStandardWells()
     }
 }
 
-void WellContributions::freeStandardWells() {
+void WellContributions::freeCudaMemory() {
     // delete data for StandardWell
     if (num_std_wells > 0) {
         cudaFree(d_Cnnzs);
@@ -151,6 +151,12 @@ void WellContributions::freeStandardWells() {
         delete[] val_pointers;
         cudaFree(d_val_pointers);
     }
+
+    if (num_ms_wells > 0 && h_x) {
+        cudaFreeHost(h_x);
+	cudaFreeHost(h_y);
+	h_x = h_y = nullptr; // Mark as free for constructor
+    }
 }
 
 
@@ -168,7 +174,6 @@ void WellContributions::apply(double *d_x, double *d_y)
         if (h_x == nullptr) {
             cudaMallocHost(&h_x, sizeof(double) * N);
             cudaMallocHost(&h_y, sizeof(double) * N);
-            host_mem_cuda = true;
         }
 
         // copy vectors x and y from GPU to CPU
diff --git a/opm/simulators/linalg/bda/WellContributions.hpp b/opm/simulators/linalg/bda/WellContributions.hpp
index 54943e943..cd8b9064e 100644
--- a/opm/simulators/linalg/bda/WellContributions.hpp
+++ b/opm/simulators/linalg/bda/WellContributions.hpp
@@ -100,7 +100,6 @@ private:
 #endif
 
     double *h_x = nullptr, *h_y = nullptr;  // CUDA pinned memory for GPU memcpy
-    bool host_mem_cuda = false;             // true iff h_x and h_y are allocated by cudaMallocHost(), so they need to be freed using cudaFreeHost()
 
     int *toOrder = nullptr;
     bool reorder = false;
@@ -115,8 +114,8 @@ private:
     /// Allocate GPU memory for StandardWells
     void allocStandardWells();
 
-    /// Free GPU memory from StandardWells
-    void freeStandardWells();
+    /// Free GPU memory allocated with cuda.
+    void freeCudaMemory();
 
 public: