From d17c59d7728ec9c848982b4613badcdc8e41951f Mon Sep 17 00:00:00 2001
From: James E McClure <mcclurej@vt.edu>
Date: Tue, 28 Aug 2018 20:45:12 -0400
Subject: [PATCH] atomic add for GPU <6.0

---
 gpu/D3Q19.cu | 14 ++++++++++++++
 gpu/dfh.cu   | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/gpu/D3Q19.cu b/gpu/D3Q19.cu
index d4c7e65c..d816c4c0 100644
--- a/gpu/D3Q19.cu
+++ b/gpu/D3Q19.cu
@@ -30,6 +30,20 @@ __constant__ __device__ double mrt_V12=0.04166666666666666;
 //__shared__ double Transform[722]=
 //	   {};
 
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+#else
+__device__ double atomicAdd(double* address, double val) { 
+   unsigned long long int* address_as_ull = (unsigned long long int*)address;
+   unsigned long long int old = *address_as_ull, assumed;
+
+   do {
+      assumed = old;
+      old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val+__longlong_as_double(assumed)));
+   } while (assumed != old);
+   return __longlong_as_double(old);
+}
+#endif
+
 using namespace cooperative_groups;
 __device__ double reduce_sum(thread_group g, double *temp, double val)
 {
diff --git a/gpu/dfh.cu b/gpu/dfh.cu
index 0a19d9f7..dc0ab0af 100644
--- a/gpu/dfh.cu
+++ b/gpu/dfh.cu
@@ -5,6 +5,20 @@
 #define NBLOCKS 1024
 #define NTHREADS 256
 
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
+#else
+__device__ double atomicAdd(double* address, double val) { 
+   unsigned long long int* address_as_ull = (unsigned long long int*)address;
+   unsigned long long int old = *address_as_ull, assumed;
+
+   do {
+      assumed = old;
+      old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val+__longlong_as_double(assumed)));
+   } while (assumed != old);
+   return __longlong_as_double(old);
+}
+#endif
+
 __global__ void dvc_ScaLBL_Gradient_Unpack(double weight, double Cqx, double Cqy, double Cqz, 
 		int *list, int start, int count, double *recvbuf, double *phi, double *grad, int N){
 	//....................................................................................