Added atomicadd for double needed on Titan (can be reverted for later GPU)

2017-10-01 12:06:36 -04:00 · 2017-10-01 12:06:36 -04:00 · 54b8a6bf0a
commit 54b8a6bf0a
parent 0b77f9b027
1 changed files with 13 additions and 0 deletions
--- a/gpu/D3Q19.cu
+++ b/gpu/D3Q19.cu
@ -7,6 +7,19 @@
 // functionality for parallel reduction in Flux BC routines -- probably should be re-factored to another location
 // functions copied from https://devblogs.nvidia.com/parallelforall/faster-parallel-reductions-kepler/

+__device__ double atomicAdd(double* address, double val)
+{
+    unsigned long long int* address_as_ull = (unsigned long long int*)address;
+
+    unsigned long long int old = *address_as_ull, assumed;
+
+    do{ assumed = old;
+    	old = atomicCAS(address_as_ull, assumed,__double_as_longlong(val +__longlong_as_double(assumed)));
+    } while (assumed != old);
+
+    return __longlong_as_double(old);
+}
+
 __inline__ __device__
 double warpReduceSum(double val) {
  for (int offset = warpSize/2; offset > 0; offset /= 2)