Fixing compile errors with HIP
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
SET( HIP_SEPERABLE_COMPILATION ON )
|
||||
SET_SOURCE_FILES_PROPERTIES( BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.hip PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
|
||||
HIP_ADD_LIBRARY( lbpm-hip BGK.cu Color.cu CudaExtras.cu D3Q19.cu D3Q7.cu dfh.cu Extras.cu MRT.cu SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
|
||||
FILE( GLOB HIP_SOURCES "*.cu" )
|
||||
SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
|
||||
HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip )
|
||||
#ADD_DEPENDENCIES( lbpm-hip copy-include )
|
||||
|
||||
72
hip/D3Q19.cu
72
hip/D3Q19.cu
@@ -89,9 +89,25 @@ __global__ void sum_kernel_block(double *sum, double *input, int n)
|
||||
|
||||
__inline__ __device__
|
||||
double warpReduceSum(double val) {
|
||||
#if 0
|
||||
for (int offset = warpSize/2; offset > 0; offset /= 2)
|
||||
val += __shfl_down_sync(0xFFFFFFFF, val, offset, 32);
|
||||
return val;
|
||||
#else
|
||||
short int id = threadIdx.x % warpSize;
|
||||
__shared__ double tmp[64];
|
||||
tmp[id] = val;
|
||||
__syncthreads();
|
||||
if ( warpSize == 64) {
|
||||
tmp[id] += tmp[id+32]; __syncthreads();
|
||||
}
|
||||
tmp[id] += tmp[id+16]; __syncthreads();
|
||||
tmp[id] += tmp[id+8]; __syncthreads();
|
||||
tmp[id] += tmp[id+4]; __syncthreads();
|
||||
tmp[id] += tmp[id+2]; __syncthreads();
|
||||
tmp[id] += tmp[id+1]; __syncthreads();
|
||||
return tmp[0];
|
||||
#endif
|
||||
}
|
||||
|
||||
__inline__ __device__
|
||||
@@ -1730,6 +1746,44 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Pressure_BC_Z(int *list, double *dist,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
|
||||
int idx, n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
double f5 = 0.111111111111111111111111 - dist[6*Np+n];
|
||||
double f11 = 0.05555555555555555555556 - dist[12*Np+n];
|
||||
double f14 = 0.05555555555555555555556 - dist[13*Np+n];
|
||||
double f15 = 0.05555555555555555555556 - dist[16*Np+n];
|
||||
double f18 = 0.05555555555555555555556 - dist[17*Np+n];
|
||||
|
||||
dist[6*Np+n] = f5;
|
||||
dist[12*Np+n] = f11;
|
||||
dist[13*Np+n] = f14;
|
||||
dist[16*Np+n] = f15;
|
||||
dist[17*Np+n] = f18;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
|
||||
int idx, n;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
double f6 = 0.111111111111111111111111 - dist[5*Np+n];
|
||||
double f12 = 0.05555555555555555555556 - dist[11*Np+n];
|
||||
double f13 = 0.05555555555555555555556 - dist[14*Np+n] ;
|
||||
double f16 = 0.05555555555555555555556 - dist[15*Np+n];
|
||||
double f17 = 0.05555555555555555555556 - dist[18*Np+n];
|
||||
|
||||
dist[5*Np+n] = f6;
|
||||
dist[11*Np+n] = f12;
|
||||
dist[14*Np+n] = f13;
|
||||
dist[15*Np+n] = f16;
|
||||
dist[18*Np+n] = f17;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_Pressure_BC_z(int *d_neighborList, int *list, double *dist, double din, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
@@ -2605,6 +2659,24 @@ extern "C" double ScaLBL_D3Q19_Flux_BC_Z(double *disteven, double *distodd, doub
|
||||
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Reflection_BC_z(int *list, double *dist, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Reflection_BC_z<<<GRID,512>>>(list, dist, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Reflection_BC_Z(int *list, double *dist, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Reflection_BC_Z<<<GRID,512>>>(list, dist, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("HIP error in ScaLBL_D3Q19_Reflection_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" double deviceReduce(double *in, double* out, int N) {
|
||||
int threads = 512;
|
||||
int blocks = min((N + threads - 1) / threads, 1024);
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
/* Implement Mixed Gradient (Lee et al. JCP 2016)*/
|
||||
#include <cuda.h>
|
||||
#include <stdio.h>
|
||||
//#include <cuda_profiler_api.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
@@ -10,7 +9,7 @@
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
|
||||
{
|
||||
static int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1},
|
||||
static const int D3Q19[18][3]={{1,0,0},{-1,0,0},{0,1,0},{0,-1,0},{0,0,1},{0,0,-1},
|
||||
{1,1,0},{-1,-1,0},{1,-1,0},{-1,1,0},
|
||||
{1,0,1},{-1,0,-1},{1,0,-1},{-1,0,1},
|
||||
{0,1,1},{0,-1,-1},{0,1,-1},{0,-1,1}};
|
||||
@@ -66,13 +65,13 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gr
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
|
||||
{
|
||||
cudaProfilerStart();
|
||||
hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_MixedGradient<<<NBLOCKS,NTHREADS >>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
cudaProfilerStop();
|
||||
hipProfilerStop();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user