2013-08-26 15:12:25 -04:00
|
|
|
// Basic cuda functions callable from C/C++ code
|
|
|
|
|
#include <cuda.h>
|
|
|
|
|
|
2018-01-24 10:08:43 -05:00
|
|
|
extern "C" void dvc_AllocateDeviceMemory(void** address, size_t size){
|
2013-08-26 15:12:25 -04:00
|
|
|
cudaMalloc(address,size);
|
2015-09-01 21:51:41 -04:00
|
|
|
cudaMemset(*address,0,size);
|
2013-08-26 15:12:25 -04:00
|
|
|
}
|
|
|
|
|
|
2018-01-24 10:08:43 -05:00
|
|
|
extern "C" void dvc_CopyToDevice(void* dest, void* source, size_t size){
|
2013-08-26 15:12:25 -04:00
|
|
|
cudaMemcpy(dest,source,size,cudaMemcpyHostToDevice);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2018-01-24 10:08:43 -05:00
|
|
|
extern "C" void dvc_CopyToHost(void* dest, void* source, size_t size){
|
2013-08-26 15:12:25 -04:00
|
|
|
cudaMemcpy(dest,source,size,cudaMemcpyDeviceToHost);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
extern "C" void dvc_Barrier(){
|
|
|
|
|
cudaDeviceSynchronize();
|
|
|
|
|
}
|
2018-01-24 10:08:43 -05:00
|
|
|
/*
|
|
|
|
|
#if __CUDA_ARCH__ < 600
|
|
|
|
|
__device__ double atomicAdd(double* address, double val) {
|
|
|
|
|
unsigned long long int* address_as_ull = (unsigned long long int*)address; unsigned long long int old = *address_as_ull, assumed;
|
|
|
|
|
do {
|
|
|
|
|
assumed = old;
|
|
|
|
|
old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val + __longlong_as_double(assumed)));
|
|
|
|
|
// Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN)
|
|
|
|
|
}
|
|
|
|
|
while (assumed != old); return __longlong_as_double(old);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
*/
|