update gpu routines first try
This commit is contained in:
parent
ad8c5f6e26
commit
0e769186a5
125
cuda/Ion.cu
125
cuda/Ion.cu
@ -91,7 +91,7 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
|||||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||||
// dist may be even or odd distributions stored by stream layout
|
// dist may be even or odd distributions stored by stream layout
|
||||||
//....................................................................................
|
//....................................................................................
|
||||||
int n, idx, link, nqm, npm, i, j, k;
|
int n, idx, nqm, npm, i, j, k;
|
||||||
double distanceLocal, distanceNonlocal;
|
double distanceLocal, distanceNonlocal;
|
||||||
double psiLocal, psiNonlocal, membranePotential;
|
double psiLocal, psiNonlocal, membranePotential;
|
||||||
double ap,aq; // coefficient
|
double ap,aq; // coefficient
|
||||||
@ -100,57 +100,56 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
|||||||
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
|
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
|
||||||
for (int s=0; s<S; s++){
|
for (int s=0; s<S; s++){
|
||||||
//........Get 1-D index for this thread....................
|
//........Get 1-D index for this thread....................
|
||||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks;
|
idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
if (link < count) {
|
if (idx < count) {
|
||||||
|
|
||||||
// get the index for the recv list (deal with reordering of links)
|
n = d3q7_recvlist[idx];
|
||||||
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
|
label = d3q7_linkList[idx];
|
||||||
// get the distribution index
|
ap = 1.0; // regular streaming rule
|
||||||
n = d3q7_recvlist[start+idx];
|
aq = 1.0;
|
||||||
// get the index in strided layout
|
if (label > 0 && !(n < 0)){
|
||||||
nqm = Map[n];
|
nqm = Map[n];
|
||||||
distanceLocal = Distance[nqm];
|
distanceLocal = Distance[nqm];
|
||||||
psiLocal = Psi[nqm];
|
psiLocal = Psi[nqm];
|
||||||
|
|
||||||
// Get the 3-D indices from the send process
|
// Get the 3-D indices from the send process
|
||||||
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
|
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
|
||||||
// Streaming link the non-local distribution
|
// Streaming link the non-local distribution
|
||||||
i -= Cqx; j -= Cqy; k -= Cqz;
|
i -= Cqx; j -= Cqy; k -= Cqz;
|
||||||
npm = k*Nx*Ny + j*Nx + i;
|
npm = k*Nx*Ny + j*Nx + i;
|
||||||
distanceNonlocal = Distance[npm];
|
distanceNonlocal = Distance[npm];
|
||||||
psiNonlocal = Psi[npm];
|
psiNonlocal = Psi[npm];
|
||||||
|
|
||||||
membranePotential = psiLocal - psiNonlocal;
|
membranePotential = psiLocal - psiNonlocal;
|
||||||
aq = MassFractionIn;
|
aq = MassFractionIn;
|
||||||
ap = MassFractionOut;
|
ap = MassFractionOut;
|
||||||
|
|
||||||
/* link is inside membrane */
|
/* link is inside membrane */
|
||||||
if (distanceLocal > 0.0){
|
if (distanceLocal > 0.0){
|
||||||
if (membranePotential < Threshold*(-1.0)){
|
if (membranePotential < Threshold*(-1.0)){
|
||||||
ap = MassFractionIn;
|
ap = MassFractionIn;
|
||||||
aq = MassFractionOut;
|
aq = MassFractionOut;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ap = ThresholdMassFractionIn;
|
||||||
|
aq = ThresholdMassFractionOut;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else if (membranePotential > Threshold){
|
||||||
ap = ThresholdMassFractionIn;
|
aq = ThresholdMassFractionIn;
|
||||||
aq = ThresholdMassFractionOut;
|
ap = ThresholdMassFractionOut;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (membranePotential > Threshold){
|
coef[2*idx]=aq;
|
||||||
aq = ThresholdMassFractionIn;
|
coef[2*idx+1]=ap;
|
||||||
ap = ThresholdMassFractionOut;
|
|
||||||
}
|
|
||||||
|
|
||||||
// update link based on mass transfer coefficients
|
|
||||||
coef[2*(link-nlinks)] = aq;
|
|
||||||
coef[2*(link-nlinks)+1] = ap;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
|
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
int *d3q7_recvlist, double *recvbuf, int count,
|
||||||
double *recvbuf, double *dist, int N, double *coef) {
|
double *dist, int N, double *coef) {
|
||||||
//....................................................................................
|
//....................................................................................
|
||||||
// Unack distribution from the recv buffer
|
// Unack distribution from the recv buffer
|
||||||
// Distribution q matche Cqx, Cqy, Cqz
|
// Distribution q matche Cqx, Cqy, Cqz
|
||||||
@ -164,34 +163,18 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
|
|||||||
int S = count/NBLOCKS/NTHREADS + 1;
|
int S = count/NBLOCKS/NTHREADS + 1;
|
||||||
for (int s=0; s<S; s++){
|
for (int s=0; s<S; s++){
|
||||||
//........Get 1-D index for this thread....................
|
//........Get 1-D index for this thread....................
|
||||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||||
|
if (idx < count){
|
||||||
/* First unpack the regular links */
|
n = d3q7_recvlist[idx];
|
||||||
if (link < nlinks) {
|
// update link based on mass transfer coefficients
|
||||||
// get the index for the recv list (deal with reordering of links)
|
if (!(n < 0)){
|
||||||
idx = d3q7_linkList[link];
|
aq = coef[2*idx];
|
||||||
// get the distribution index
|
ap = coef[2*idx+1];
|
||||||
n = d3q7_recvlist[start+idx];
|
fq = dist[q * N + n];
|
||||||
if (!(n < 0)){
|
fp = recvbuf[idx];
|
||||||
fp = recvbuf[start + idx];
|
fqq = (1-aq)*fq+ap*fp;
|
||||||
dist[q * N + n] = fp;
|
dist[q * N + n] = fqq;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else if (link < count){
|
|
||||||
/* second enforce custom rule for membrane links */
|
|
||||||
// get the index for the recv list (deal with reordering of links)
|
|
||||||
idx = d3q7_linkList[link];
|
|
||||||
// get the distribution index
|
|
||||||
n = d3q7_recvlist[start+idx];
|
|
||||||
// update link based on mass transfer coefficients
|
|
||||||
if (!(n < 0)){
|
|
||||||
aq = coef[2*(link-nlinks)];
|
|
||||||
ap = coef[2*(link-nlinks)+1];
|
|
||||||
fq = dist[q * N + n];
|
|
||||||
fp = recvbuf[start + idx];
|
|
||||||
fqq = (1-aq)*fq+ap*fp;
|
|
||||||
dist[q * N + n] = fqq;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -972,13 +955,13 @@ extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
|||||||
|
|
||||||
|
|
||||||
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
|
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
int *d3q7_recvlist, double *recvbuf, int count,
|
||||||
double *recvbuf, double *dist, int N, double *coef) {
|
double *dist, int N, double *coef){
|
||||||
|
|
||||||
int GRID = count / NTHREADS + 1;
|
int GRID = count / NTHREADS + 1;
|
||||||
|
|
||||||
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<GRID,NTHREADS >>>(q, d3q7_recvlist, d3q7_linkList, start, nlinks, count,
|
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<GRID,NTHREADS >>>(q, d3q7_recvlist, recvbuf,count,
|
||||||
recvbuf, dist, N, coef) ;
|
dist, N, coef);
|
||||||
|
|
||||||
cudaError_t err = cudaGetLastError();
|
cudaError_t err = cudaGetLastError();
|
||||||
if (cudaSuccess != err){
|
if (cudaSuccess != err){
|
||||||
|
79
hip/Ion.hip
79
hip/Ion.hip
@ -91,7 +91,7 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
|||||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||||
// dist may be even or odd distributions stored by stream layout
|
// dist may be even or odd distributions stored by stream layout
|
||||||
//....................................................................................
|
//....................................................................................
|
||||||
int n, idx, link, nqm, npm, i, j, k;
|
int n, idx, nqm, npm, i, j, k;
|
||||||
double distanceLocal, distanceNonlocal;
|
double distanceLocal, distanceNonlocal;
|
||||||
double psiLocal, psiNonlocal, membranePotential;
|
double psiLocal, psiNonlocal, membranePotential;
|
||||||
double ap,aq; // coefficient
|
double ap,aq; // coefficient
|
||||||
@ -100,50 +100,49 @@ __global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
|||||||
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
|
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
|
||||||
for (int s=0; s<S; s++){
|
for (int s=0; s<S; s++){
|
||||||
//........Get 1-D index for this thread....................
|
//........Get 1-D index for this thread....................
|
||||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks;
|
idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
if (link < count) {
|
if (idx < count) {
|
||||||
|
|
||||||
// get the index for the recv list (deal with reordering of links)
|
n = d3q7_recvlist[idx];
|
||||||
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
|
label = d3q7_linkList[idx];
|
||||||
// get the distribution index
|
ap = 1.0; // regular streaming rule
|
||||||
n = d3q7_recvlist[start+idx];
|
aq = 1.0;
|
||||||
// get the index in strided layout
|
if (label > 0 && !(n < 0)){
|
||||||
nqm = Map[n];
|
nqm = Map[n];
|
||||||
distanceLocal = Distance[nqm];
|
distanceLocal = Distance[nqm];
|
||||||
psiLocal = Psi[nqm];
|
psiLocal = Psi[nqm];
|
||||||
|
|
||||||
// Get the 3-D indices from the send process
|
// Get the 3-D indices from the send process
|
||||||
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
|
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
|
||||||
// Streaming link the non-local distribution
|
// Streaming link the non-local distribution
|
||||||
i -= Cqx; j -= Cqy; k -= Cqz;
|
i -= Cqx; j -= Cqy; k -= Cqz;
|
||||||
npm = k*Nx*Ny + j*Nx + i;
|
npm = k*Nx*Ny + j*Nx + i;
|
||||||
distanceNonlocal = Distance[npm];
|
distanceNonlocal = Distance[npm];
|
||||||
psiNonlocal = Psi[npm];
|
psiNonlocal = Psi[npm];
|
||||||
|
|
||||||
membranePotential = psiLocal - psiNonlocal;
|
membranePotential = psiLocal - psiNonlocal;
|
||||||
aq = MassFractionIn;
|
aq = MassFractionIn;
|
||||||
ap = MassFractionOut;
|
ap = MassFractionOut;
|
||||||
|
|
||||||
/* link is inside membrane */
|
/* link is inside membrane */
|
||||||
if (distanceLocal > 0.0){
|
if (distanceLocal > 0.0){
|
||||||
if (membranePotential < Threshold*(-1.0)){
|
if (membranePotential < Threshold*(-1.0)){
|
||||||
ap = MassFractionIn;
|
ap = MassFractionIn;
|
||||||
aq = MassFractionOut;
|
aq = MassFractionOut;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
ap = ThresholdMassFractionIn;
|
ap = ThresholdMassFractionIn;
|
||||||
aq = ThresholdMassFractionOut;
|
aq = ThresholdMassFractionOut;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (membranePotential > Threshold){
|
else if (membranePotential > Threshold){
|
||||||
aq = ThresholdMassFractionIn;
|
aq = ThresholdMassFractionIn;
|
||||||
ap = ThresholdMassFractionOut;
|
ap = ThresholdMassFractionOut;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// update link based on mass transfer coefficients
|
coef[2*idx]=aq;
|
||||||
coef[2*(link-nlinks)] = aq;
|
coef[2*idx+1]=ap;
|
||||||
coef[2*(link-nlinks)+1] = ap;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user