update gpu routines first try

This commit is contained in:
James McClure 2022-05-12 06:54:55 -04:00
parent ad8c5f6e26
commit 0e769186a5
2 changed files with 93 additions and 111 deletions

View File

@ -91,7 +91,7 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
// swap rule means that the distributions in recvbuf are OPPOSITE of q // swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout // dist may be even or odd distributions stored by stream layout
//.................................................................................... //....................................................................................
int n, idx, link, nqm, npm, i, j, k; int n, idx, nqm, npm, i, j, k;
double distanceLocal, distanceNonlocal; double distanceLocal, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential; double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient double ap,aq; // coefficient
@ -100,15 +100,15 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1; int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){ for (int s=0; s<S; s++){
//........Get 1-D index for this thread.................... //........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks; idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < count) { if (idx < count) {
// get the index for the recv list (deal with reordering of links) n = d3q7_recvlist[idx];
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE label = d3q7_linkList[idx];
// get the distribution index ap = 1.0; // regular streaming rule
n = d3q7_recvlist[start+idx]; aq = 1.0;
// get the index in strided layout if (label > 0 && !(n < 0)){
nqm = Map[n]; nqm = Map[n];
distanceLocal = Distance[nqm]; distanceLocal = Distance[nqm];
psiLocal = Psi[nqm]; psiLocal = Psi[nqm];
@ -140,17 +140,16 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
aq = ThresholdMassFractionIn; aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut; ap = ThresholdMassFractionOut;
} }
}
// update link based on mass transfer coefficients coef[2*idx]=aq;
coef[2*(link-nlinks)] = aq; coef[2*idx+1]=ap;
coef[2*(link-nlinks)+1] = ap;
} }
} }
} }
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q, __global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count, int *d3q7_recvlist, double *recvbuf, int count,
double *recvbuf, double *dist, int N, double *coef) { double *dist, int N, double *coef) {
//.................................................................................... //....................................................................................
// Unack distribution from the recv buffer // Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz // Distribution q matche Cqx, Cqy, Cqz
@ -164,31 +163,15 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
int S = count/NBLOCKS/NTHREADS + 1; int S = count/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){ for (int s=0; s<S; s++){
//........Get 1-D index for this thread.................... //........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x; idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (idx < count){
/* First unpack the regular links */ n = d3q7_recvlist[idx];
if (link < nlinks) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link];
// get the distribution index
n = d3q7_recvlist[start+idx];
if (!(n < 0)){
fp = recvbuf[start + idx];
dist[q * N + n] = fp;
}
}
else if (link < count){
/* second enforce custom rule for membrane links */
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link];
// get the distribution index
n = d3q7_recvlist[start+idx];
// update link based on mass transfer coefficients // update link based on mass transfer coefficients
if (!(n < 0)){ if (!(n < 0)){
aq = coef[2*(link-nlinks)]; aq = coef[2*idx];
ap = coef[2*(link-nlinks)+1]; ap = coef[2*idx+1];
fq = dist[q * N + n]; fq = dist[q * N + n];
fp = recvbuf[start + idx]; fp = recvbuf[idx];
fqq = (1-aq)*fq+ap*fp; fqq = (1-aq)*fq+ap*fp;
dist[q * N + n] = fqq; dist[q * N + n] = fqq;
} }
@ -972,13 +955,13 @@ extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q, extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count, int *d3q7_recvlist, double *recvbuf, int count,
double *recvbuf, double *dist, int N, double *coef) { double *dist, int N, double *coef){
int GRID = count / NTHREADS + 1; int GRID = count / NTHREADS + 1;
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<GRID,NTHREADS >>>(q, d3q7_recvlist, d3q7_linkList, start, nlinks, count, dvc_ScaLBL_D3Q7_Membrane_Unpack<<<GRID,NTHREADS >>>(q, d3q7_recvlist, recvbuf,count,
recvbuf, dist, N, coef) ; dist, N, coef);
cudaError_t err = cudaGetLastError(); cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){ if (cudaSuccess != err){

View File

@ -91,7 +91,7 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
// swap rule means that the distributions in recvbuf are OPPOSITE of q // swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout // dist may be even or odd distributions stored by stream layout
//.................................................................................... //....................................................................................
int n, idx, link, nqm, npm, i, j, k; int n, idx, nqm, npm, i, j, k;
double distanceLocal, distanceNonlocal; double distanceLocal, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential; double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient double ap,aq; // coefficient
@ -100,15 +100,15 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1; int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){ for (int s=0; s<S; s++){
//........Get 1-D index for this thread.................... //........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks; idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < count) { if (idx < count) {
// get the index for the recv list (deal with reordering of links) n = d3q7_recvlist[idx];
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE label = d3q7_linkList[idx];
// get the distribution index ap = 1.0; // regular streaming rule
n = d3q7_recvlist[start+idx]; aq = 1.0;
// get the index in strided layout if (label > 0 && !(n < 0)){
nqm = Map[n]; nqm = Map[n];
distanceLocal = Distance[nqm]; distanceLocal = Distance[nqm];
psiLocal = Psi[nqm]; psiLocal = Psi[nqm];
@ -140,10 +140,9 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
aq = ThresholdMassFractionIn; aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut; ap = ThresholdMassFractionOut;
} }
}
// update link based on mass transfer coefficients coef[2*idx]=aq;
coef[2*(link-nlinks)] = aq; coef[2*idx+1]=ap;
coef[2*(link-nlinks)+1] = ap;
} }
} }
} }