From d17c59d7728ec9c848982b4613badcdc8e41951f Mon Sep 17 00:00:00 2001 From: James E McClure Date: Tue, 28 Aug 2018 20:45:12 -0400 Subject: [PATCH] atomic add for GPU <6.0 --- gpu/D3Q19.cu | 14 ++++++++++++++ gpu/dfh.cu | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/gpu/D3Q19.cu b/gpu/D3Q19.cu index d4c7e65c..d816c4c0 100644 --- a/gpu/D3Q19.cu +++ b/gpu/D3Q19.cu @@ -30,6 +30,20 @@ __constant__ __device__ double mrt_V12=0.04166666666666666; //__shared__ double Transform[722]= // {}; +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else +__device__ double atomicAdd(double* address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val+__longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + using namespace cooperative_groups; __device__ double reduce_sum(thread_group g, double *temp, double val) { diff --git a/gpu/dfh.cu b/gpu/dfh.cu index 0a19d9f7..dc0ab0af 100644 --- a/gpu/dfh.cu +++ b/gpu/dfh.cu @@ -5,6 +5,20 @@ #define NBLOCKS 1024 #define NTHREADS 256 +#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 +#else +__device__ double atomicAdd(double* address, double val) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val+__longlong_as_double(assumed))); + } while (assumed != old); + return __longlong_as_double(old); +} +#endif + __global__ void dvc_ScaLBL_Gradient_Unpack(double weight, double Cqx, double Cqy, double Cqz, int *list, int start, int count, double *recvbuf, double *phi, double *grad, int N){ //....................................................................................