/* Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University Copyright Equnior ASA This file is part of the Open Porous Media project (OPM). OPM is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OPM is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OPM. If not, see . */ #include #include #include #define NBLOCKS 1024 #define NTHREADS 256 __global__ void dvc_ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz) { //int i,j,k; int n,N; char id; N = Nx*Ny*Nz; int S = N/NBLOCKS/NTHREADS + 1; for (int s=0; s 0){ // Retrieve the color gradient nx = ColorGrad[n]; ny = ColorGrad[N+n]; nz = ColorGrad[2*N+n]; //...........Normalize the Color Gradient................................. C = sqrt(nx*nx+ny*ny+nz*nz); nx = nx/C; ny = ny/C; nz = nz/C; //......No color gradient at z-boundary if pressure BC are set............. // if (pBC && k==0) nx = ny = nz = 0.f; // if (pBC && k==Nz-1) nx = ny = nz = 0.f; //........................................................................ // READ THE DISTRIBUTIONS // (read from opposite array due to previous swap operation) //........................................................................ f2 = distodd[n]; f4 = distodd[N+n]; f6 = distodd[2*N+n]; f8 = distodd[3*N+n]; f10 = distodd[4*N+n]; f12 = distodd[5*N+n]; f14 = distodd[6*N+n]; f16 = distodd[7*N+n]; f18 = distodd[8*N+n]; //........................................................................ f0 = disteven[n]; f1 = disteven[N+n]; f3 = disteven[2*N+n]; f5 = disteven[3*N+n]; f7 = disteven[4*N+n]; f9 = disteven[5*N+n]; f11 = disteven[6*N+n]; f13 = disteven[7*N+n]; f15 = disteven[8*N+n]; f17 = disteven[9*N+n]; //........................................................................ // PERFORM RELAXATION PROCESS //........................................................................ //....................compute the moments............................................... rho = f0+f2+f1+f4+f3+f6+f5+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; m1 = -30*f0-11*(f2+f1+f4+f3+f6+f5)+8*(f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18 +f17); m2 = 12*f0-4*(f2+f1 +f4+f3+f6 +f5)+f8+f7+f10+f9+f12+f11+f14+f13+f16+f15+f18+f17; jx = f1-f2+f7-f8+f9-f10+f11-f12+f13-f14; m4 = 4*(-f1+f2)+f7-f8+f9-f10+f11-f12+f13-f14; jy = f3-f4+f7-f8-f9+f10+f15-f16+f17-f18; m6 = -4*(f3-f4)+f7-f8-f9+f10+f15-f16+f17-f18; jz = f5-f6+f11-f12-f13+f14+f15-f16-f17+f18; m8 = -4*(f5-f6)+f11-f12-f13+f14+f15-f16-f17+f18; m9 = 2*(f1+f2)-f3-f4-f5-f6+f7+f8+f9+f10+f11+f12+f13+f14-2*(f15+f16+f17+f18); m10 = -4*(f1+f2)+2*(f4+f3+f6+f5)+f8+f7+f10+f9+f12+f11+f14+f13-2*(f16+f15+f18+f17); m11 = f4+f3-f6-f5+f8+f7+f10+f9-f12-f11-f14-f13; m12 = -2*(f4+f3-f6-f5)+f8+f7+f10+f9-f12-f11-f14-f13; m13 = f8+f7-f10-f9; m14 = f16+f15-f18-f17; m15 = f12+f11-f14-f13; m16 = f7-f8+f9-f10-f11+f12-f13+f14; m17 = -f7+f8+f9-f10+f15-f16+f17-f18; m18 = f11-f12-f13+f14-f15+f16+f17-f18; //..........Toelke, Fruediger et. al. 2006............... if (C == 0.0) nx = ny = nz = 1.0; m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho - 11*rho) -alpha*C - m1); m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho)- m2); m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); m10 = m10 + rlx_setA*(-0.5*((2*jx*jx-jy*jy-jz*jz)/rho) - m10); m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); m12 = m12 + rlx_setA*( -0.5*((jy*jy-jz*jz)/rho) - m12); m13 = m13 + rlx_setA*( (jx*jy/rho) + 0.5*alpha*C*nx*ny - m13); m14 = m14 + rlx_setA*( (jy*jz/rho) + 0.5*alpha*C*ny*nz - m14); m15 = m15 + rlx_setA*( (jx*jz/rho) + 0.5*alpha*C*nx*nz - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //.................inverse transformation...................................................... f0 = 0.05263157894736842*rho-0.012531328320802*m1+0.04761904761904762*m2; f1 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 +0.1*(jx-m4)+0.0555555555555555555555555*(m9-m10); f2 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 +0.1*(m4-jx)+0.0555555555555555555555555*(m9-m10); f3 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 +0.1*(jy-m6)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); f4 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 +0.1*(m6-jy)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m11-m12); f5 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 +0.1*(jz-m8)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); f6 = 0.05263157894736842*rho-0.004594820384294068*m1-0.01587301587301587*m2 +0.1*(m8-jz)+0.02777777777777778*(m10-m9)+0.08333333333333333*(m12-m11); f7 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx+jy)+0.025*(m4+m6) +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 +0.04166666666666666*m12+0.25*m13+0.125*(m16-m17); f8 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2-0.1*(jx+jy)-0.025*(m4+m6) +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 +0.04166666666666666*m12+0.25*m13+0.125*(m17-m16); f9 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jx-jy)+0.025*(m4-m6) +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 +0.04166666666666666*m12-0.25*m13+0.125*(m16+m17); f10 = 0.05263157894736842*rho+0.003341687552213868*m1+0.003968253968253968*m2+0.1*(jy-jx)+0.025*(m6-m4) +0.02777777777777778*m9+0.01388888888888889*m10+0.08333333333333333*m11 +0.04166666666666666*m12-0.25*m13-0.125*(m16+m17); f11 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2+0.1*(jx+jz)+0.025*(m4+m8) +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 -0.04166666666666666*m12+0.25*m15+0.125*(m18-m16); f12 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2-0.1*(jx+jz)-0.025*(m4+m8) +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 -0.04166666666666666*m12+0.25*m15+0.125*(m16-m18); f13 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2+0.1*(jx-jz)+0.025*(m4-m8) +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 -0.04166666666666666*m12-0.25*m15-0.125*(m16+m18); f14 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2+0.1*(jz-jx)+0.025*(m8-m4) +0.02777777777777778*m9+0.01388888888888889*m10-0.08333333333333333*m11 -0.04166666666666666*m12-0.25*m15+0.125*(m16+m18); f15 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2+0.1*(jy+jz)+0.025*(m6+m8) -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m17-m18); f16 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2-0.1*(jy+jz)-0.025*(m6+m8) -0.0555555555555555555555555*m9-0.02777777777777778*m10+0.25*m14+0.125*(m18-m17); f17 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2+0.1*(jy-jz)+0.025*(m6-m8) -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14+0.125*(m17+m18); f18 = 0.05263157894736842*rho+0.003341687552213868*m1 +0.003968253968253968*m2+0.1*(jz-jy)+0.025*(m8-m6) -0.0555555555555555555555555*m9-0.02777777777777778*m10-0.25*m14-0.125*(m17+m18); //....................................................................................................... // incorporate external force f1 += 0.16666666*Fx; f2 -= 0.16666666*Fx; f3 += 0.16666666*Fy; f4 -= 0.16666666*Fy; f5 += 0.16666666*Fz; f6 -= 0.16666666*Fz; f7 += 0.08333333333*(Fx+Fy); f8 -= 0.08333333333*(Fx+Fy); f9 += 0.08333333333*(Fx-Fy); f10 -= 0.08333333333*(Fx-Fy); f11 += 0.08333333333*(Fx+Fz); f12 -= 0.08333333333*(Fx+Fz); f13 += 0.08333333333*(Fx-Fz); f14 -= 0.08333333333*(Fx-Fz); f15 += 0.08333333333*(Fy+Fz); f16 -= 0.08333333333*(Fy+Fz); f17 += 0.08333333333*(Fy-Fz); f18 -= 0.08333333333*(Fy-Fz); //*********** WRITE UPDATED VALUES TO MEMORY ****************** // Write the updated distributions //....EVEN..................................... disteven[n] = f0; disteven[N+n] = f2; disteven[2*N+n] = f4; disteven[3*N+n] = f6; disteven[4*N+n] = f8; disteven[5*N+n] = f10; disteven[6*N+n] = f12; disteven[7*N+n] = f14; disteven[8*N+n] = f16; disteven[9*N+n] = f18; //....ODD...................................... distodd[n] = f1; distodd[N+n] = f3; distodd[2*N+n] = f5; distodd[3*N+n] = f7; distodd[4*N+n] = f9; distodd[5*N+n] = f11; distodd[6*N+n] = f13; distodd[7*N+n] = f15; distodd[8*N+n] = f17; //...Store the Velocity.......................... Velocity[n] = jx; Velocity[N+n] = jy; Velocity[2*N+n] = jz; /* Velocity[3*n] = jx; Velocity[3*n+1] = jy; Velocity[3*n+2] = jz; */ //...Store the Color Gradient.................... // ColorGrad[3*n] = nx*C; // ColorGrad[3*n+1] = ny*C; // ColorGrad[3*n+2] = nz*C; //............................................... //*************************************************************** } // check if n is in the solid } // loop over n } __global__ void __launch_bounds__(512,2) dvc_ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB, double alpha, double beta, double Fx, double Fy, double Fz) { int i,j,k,n,nn,N; // distributions double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9; double f10,f11,f12,f13,f14,f15,f16,f17,f18; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; // additional variables needed for computations double rho,jx,jy,jz,C,nx,ny,nz; char id; N = Nx*Ny*Nz; int S = N/NBLOCKS/NTHREADS + 1; for (int s=0; s 0){ //.......Back out the 3-D indices for node n.............. k = n/(Nx*Ny); j = (n-Nx*Ny*k)/Nx; i = n-Nx*Ny*k-Nx*j; //........................................................................ //........Get 1-D index for this thread.................... // n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x; //........................................................................ // COMPUTE THE COLOR GRADIENT //........................................................................ //.................Read Phase Indicator Values............................ //........................................................................ nn = n-1; // neighbor index (get convention) if (i-1<0) nn += Nx; // periodic BC along the x-boundary f1 = phi[nn]; // get neighbor for phi - 1 //........................................................................ nn = n+1; // neighbor index (get convention) if (!(i+10)) delta=0; a1 = na*(0.1111111111111111*(1+4.5*ux))+delta; b1 = nb*(0.1111111111111111*(1+4.5*ux))-delta; a2 = na*(0.1111111111111111*(1-4.5*ux))-delta; b2 = nb*(0.1111111111111111*(1-4.5*ux))+delta; A_odd[n] = a1; A_even[N+n] = a2; B_odd[n] = b1; B_even[N+n] = b2; //............................................... // q = 2 // Cq = {0,1,0} delta = beta*na*nb*nab*0.1111111111111111*ny; if (!(na*nb*nab>0)) delta=0; a1 = na*(0.1111111111111111*(1+4.5*uy))+delta; b1 = nb*(0.1111111111111111*(1+4.5*uy))-delta; a2 = na*(0.1111111111111111*(1-4.5*uy))-delta; b2 = nb*(0.1111111111111111*(1-4.5*uy))+delta; A_odd[N+n] = a1; A_even[2*N+n] = a2; B_odd[N+n] = b1; B_even[2*N+n] = b2; //............................................... // q = 4 // Cq = {0,0,1} delta = beta*na*nb*nab*0.1111111111111111*nz; if (!(na*nb*nab>0)) delta=0; a1 = na*(0.1111111111111111*(1+4.5*uz))+delta; b1 = nb*(0.1111111111111111*(1+4.5*uz))-delta; a2 = na*(0.1111111111111111*(1-4.5*uz))-delta; b2 = nb*(0.1111111111111111*(1-4.5*uz))+delta; A_odd[2*N+n] = a1; A_even[3*N+n] = a2; B_odd[2*N+n] = b1; B_even[3*N+n] = b2; } } } //************************************************************************* __global__ void dvc_DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, double beta, int Nx, int Ny, int Nz, bool pBC) { char id; int idx; int in,jn,kn,n,nn,N; int q,Cqx,Cqy,Cqz; // int sendLoc; double na,nb; // density values double ux,uy,uz; // flow velocity double nx,ny,nz,C; // color gradient components double a1,a2,b1,b2; double sp,delta; double feq[6]; // equilibrium distributions // Set of Discrete velocities for the D3Q19 Model int D3Q7[3][3]={{1,0,0},{0,1,0},{0,0,1}}; N = Nx*Ny*Nz; int S = N/NBLOCKS/NTHREADS + 1; for (int s=0; s 0 && na+nb > 0.0){ //.......Back out the 3-D indices for node n.............. int k = n/(Nx*Ny); int j = (n-Nx*Ny*k)/Nx; int i = n-Nx*Ny*k-Nx*j; //.....Load the Color gradient......... nx = ColorGrad[n]; ny = ColorGrad[N+n]; nz = ColorGrad[2*N+n]; C = sqrt(nx*nx+ny*ny+nz*nz); if (C == 0.0) C=1.0; nx = nx/C; ny = ny/C; nz = nz/C; //....Load the flow velocity........... ux = Velocity[n]; uy = Velocity[N+n]; uz = Velocity[2*N+n]; //....Instantiate the density distributions // Generate Equilibrium Distributions and stream // Stationary value - distribution 0 // Den[2*n] += 0.3333333333333333*na; // Den[2*n+1] += 0.3333333333333333*nb; Den[2*n] += 0.3333333333333333*na; Den[2*n+1] += 0.3333333333333333*nb; // Non-Stationary equilibrium distributions feq[0] = 0.1111111111111111*(1+3*ux); feq[1] = 0.1111111111111111*(1-3*ux); feq[2] = 0.1111111111111111*(1+3*uy); feq[3] = 0.1111111111111111*(1-3*uy); feq[4] = 0.1111111111111111*(1+3*uz); feq[5] = 0.1111111111111111*(1-3*uz); // Construction and streaming for the components for (idx=0; idx<3; idx++){ // Distribution index q = 2*idx; // Associated discrete velocity Cqx = D3Q7[idx][0]; Cqy = D3Q7[idx][1]; Cqz = D3Q7[idx][2]; // Generate the Equilibrium Distribution a1 = na*feq[q]; b1 = nb*feq[q]; a2 = na*feq[q+1]; b2 = nb*feq[q+1]; // Recolor the distributions if (C > 0.0){ sp = nx*double(Cqx)+ny*double(Cqy)+nz*double(Cqz); //if (idx > 2) sp = 0.7071067811865475*sp; //delta = sp*min( min(a1,a2), min(b1,b2) ); delta = na*nb/(na+nb)*0.1111111111111111*sp; //if (a1>0 && b1>0){ a1 += beta*delta; a2 -= beta*delta; b1 -= beta*delta; b2 += beta*delta; } // .......Get the neighbor node.............. //nn = n + Stride[idx]; in = i+Cqx; jn = j+Cqy; kn = k+Cqz; // Adjust for periodic BC, if necessary // if (in<0) in+= Nx; // if (jn<0) jn+= Ny; // if (kn<0) kn+= Nz; // if (!(in 0){ // Get the density value (Streaming already performed) Na = Den[n]; Nb = Den[N+n]; Phi[n] = (Na-Nb)/(Na+Nb); } } } //................................................................... } __global__ void dvc_ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice) { int n = Slice*Nx*Ny + blockIdx.x*blockDim.x + threadIdx.x; if (n < (Slice+1)*Nx*Ny){ Phi[n] = value; } } __global__ void dvc_ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ double value; int n = blockIdx.x*blockDim.x + threadIdx.x; if (n < Nx*Ny){ value = Phi[Source*Nx*Ny+n]; Phi[Dest*Nx*Ny+n] = value; } } __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ int ijk,nn,n; double fq; // conserved momemnts double rho,jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; double m3,m5,m7; double nA,nB; // number density double a1,b1,a2,b2,nAB,delta; double C,nx,ny,nz; //color gradient magnitude and direction double ux,uy,uz; double phi,tau,rho0,rlx_setA,rlx_setB; const double mrt_V1=0.05263157894736842; const double mrt_V2=0.012531328320802; const double mrt_V3=0.04761904761904762; const double mrt_V4=0.004594820384294068; const double mrt_V5=0.01587301587301587; const double mrt_V6=0.0555555555555555555555555; const double mrt_V7=0.02777777777777778; const double mrt_V8=0.08333333333333333; const double mrt_V9=0.003341687552213868; const double mrt_V10=0.003968253968253968; const double mrt_V11=0.01388888888888889; const double mrt_V12=0.04166666666666666; int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; Aq[1*Np+n] = a1; Bq[1*Np+n] = b1; Aq[2*Np+n] = a2; Bq[2*Np+n] = b2; //............................................... // q = 2 // Cq = {0,1,0} delta = beta*nA*nB*nAB*0.1111111111111111*ny; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; Aq[3*Np+n] = a1; Bq[3*Np+n] = b1; Aq[4*Np+n] = a2; Bq[4*Np+n] = b2; //............................................... // q = 4 // Cq = {0,0,1} delta = beta*nA*nB*nAB*0.1111111111111111*nz; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; Aq[5*Np+n] = a1; Bq[5*Np+n] = b1; Aq[6*Np+n] = a2; Bq[6*Np+n] = b2; //............................................... } } } __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ int n,nn,ijk,nread; int nr1,nr2,nr3,nr4,nr5,nr6; int nr7,nr8,nr9,nr10; int nr11,nr12,nr13,nr14; //int nr15,nr16,nr17,nr18; double fq; // conserved momemnts double rho,jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; double m3,m5,m7; double nA,nB; // number density double a1,b1,a2,b2,nAB,delta; double C,nx,ny,nz; //color gradient magnitude and direction double ux,uy,uz; double phi,tau,rho0,rlx_setA,rlx_setB; const double mrt_V1=0.05263157894736842; const double mrt_V2=0.012531328320802; const double mrt_V3=0.04761904761904762; const double mrt_V4=0.004594820384294068; const double mrt_V5=0.01587301587301587; const double mrt_V6=0.0555555555555555555555555; const double mrt_V7=0.02777777777777778; const double mrt_V8=0.08333333333333333; const double mrt_V9=0.003341687552213868; const double mrt_V10=0.003968253968253968; const double mrt_V11=0.01388888888888889; const double mrt_V12=0.04166666666666666; int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s even part of dist) //fq = dist[nread]; // reading the f2 data into register fq nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) fq = dist[nr2]; // reading the f2 data into register fq rho += fq; m1 -= 11.0*(fq); m2 -= 4.0*(fq); jx -= fq; m4 += 4.0*(fq); m9 += 2.0*(fq); m10 -= 4.0*(fq); // q=3 //nread = neighborList[n+2*Np]; // neighbor 4 //fq = dist[nread]; nr3 = neighborList[n+2*Np]; // neighbor 4 fq = dist[nr3]; rho += fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jy = fq; m6 = -4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 = fq; m12 = -2.0*fq; // q = 4 //nread = neighborList[n+3*Np]; // neighbor 3 //fq = dist[nread]; nr4 = neighborList[n+3*Np]; // neighbor 3 fq = dist[nr4]; rho+= fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jy -= fq; m6 += 4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 += fq; m12 -= 2.0*fq; // q=5 //nread = neighborList[n+4*Np]; //fq = dist[nread]; nr5 = neighborList[n+4*Np]; fq = dist[nr5]; rho += fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jz = fq; m8 = -4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 -= fq; m12 += 2.0*fq; // q = 6 //nread = neighborList[n+5*Np]; //fq = dist[nread]; nr6 = neighborList[n+5*Np]; fq = dist[nr6]; rho+= fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jz -= fq; m8 += 4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 -= fq; m12 += 2.0*fq; // q=7 //nread = neighborList[n+6*Np]; //fq = dist[nread]; nr7 = neighborList[n+6*Np]; fq = dist[nr7]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jy += fq; m6 += fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 = fq; m16 = fq; m17 = -fq; // q = 8 //nread = neighborList[n+7*Np]; //fq = dist[nread]; nr8 = neighborList[n+7*Np]; fq = dist[nr8]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jy -= fq; m6 -= fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 += fq; m16 -= fq; m17 += fq; // q=9 //nread = neighborList[n+8*Np]; //fq = dist[nread]; nr9 = neighborList[n+8*Np]; fq = dist[nr9]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jy -= fq; m6 -= fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 -= fq; m16 += fq; m17 += fq; // q = 10 //nread = neighborList[n+9*Np]; //fq = dist[nread]; nr10 = neighborList[n+9*Np]; fq = dist[nr10]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jy += fq; m6 += fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 -= fq; m16 -= fq; m17 -= fq; // q=11 //nread = neighborList[n+10*Np]; //fq = dist[nread]; nr11 = neighborList[n+10*Np]; fq = dist[nr11]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jz += fq; m8 += fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 = fq; m16 -= fq; m18 = fq; // q=12 //nread = neighborList[n+11*Np]; //fq = dist[nread]; nr12 = neighborList[n+11*Np]; fq = dist[nr12]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jz -= fq; m8 -= fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 += fq; m16 += fq; m18 -= fq; // q=13 //nread = neighborList[n+12*Np]; //fq = dist[nread]; nr13 = neighborList[n+12*Np]; fq = dist[nr13]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jz -= fq; m8 -= fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 -= fq; m16 -= fq; m18 -= fq; // q=14 //nread = neighborList[n+13*Np]; //fq = dist[nread]; nr14 = neighborList[n+13*Np]; fq = dist[nr14]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jz += fq; m8 += fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 -= fq; m16 += fq; m18 += fq; // q=15 nread = neighborList[n+14*Np]; fq = dist[nread]; //fq = dist[17*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jy += fq; m6 += fq; jz += fq; m8 += fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 = fq; m17 += fq; m18 -= fq; // q=16 nread = neighborList[n+15*Np]; fq = dist[nread]; //fq = dist[8*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jy -= fq; m6 -= fq; jz -= fq; m8 -= fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 += fq; m17 -= fq; m18 += fq; // q=17 //fq = dist[18*Np+n]; nread = neighborList[n+16*Np]; fq = dist[nread]; rho += fq; m1 += 8.0*fq; m2 += fq; jy += fq; m6 += fq; jz -= fq; m8 -= fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 -= fq; m17 += fq; m18 += fq; // q=18 nread = neighborList[n+17*Np]; fq = dist[nread]; //fq = dist[9*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jy -= fq; m6 -= fq; jz += fq; m8 += fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 -= fq; m17 -= fq; m18 -= fq; //........................................................................ //..............carry out relaxation process.............................. //..........Toelke, Fruediger et. al. 2006................................ if (C == 0.0) nx = ny = nz = 0.0; m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1); m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); m10 = m10 + rlx_setA*( - m10); m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); m12 = m12 + rlx_setA*( - m12); m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //.................inverse transformation...................................................... // q=0 fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; dist[n] = fq; // q = 1 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; //nread = neighborList[n+Np]; dist[nr2] = fq; // q=2 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; //nread = neighborList[n]; dist[nr1] = fq; // q = 3 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; //nread = neighborList[n+3*Np]; dist[nr4] = fq; // q = 4 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; //nread = neighborList[n+2*Np]; dist[nr3] = fq; // q = 5 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; //nread = neighborList[n+5*Np]; dist[nr6] = fq; // q = 6 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; //nread = neighborList[n+4*Np]; dist[nr5] = fq; // q = 7 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); //nread = neighborList[n+7*Np]; dist[nr8] = fq; // q = 8 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); //nread = neighborList[n+6*Np]; dist[nr7] = fq; // q = 9 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); //nread = neighborList[n+9*Np]; dist[nr10] = fq; // q = 10 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); //nread = neighborList[n+8*Np]; dist[nr9] = fq; // q = 11 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); //nread = neighborList[n+11*Np]; dist[nr12] = fq; // q = 12 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); //nread = neighborList[n+10*Np]; dist[nr11]= fq; // q = 13 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); //nread = neighborList[n+13*Np]; dist[nr14] = fq; // q= 14 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); //nread = neighborList[n+12*Np]; dist[nr13] = fq; // q = 15 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); nread = neighborList[n+15*Np]; dist[nread] = fq; // q = 16 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); nread = neighborList[n+14*Np]; dist[nread] = fq; // q = 17 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); nread = neighborList[n+17*Np]; dist[nread] = fq; // q = 18 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); nread = neighborList[n+16*Np]; dist[nread] = fq; // write the velocity ux = jx / rho0; uy = jy / rho0; uz = jz / rho0; Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; // Instantiate mass transport distributions // Stationary value - distribution 0 nAB = 1.0/(nA+nB); Aq[n] = 0.3333333333333333*nA; Bq[n] = 0.3333333333333333*nB; //............................................... // q = 0,2,4 // Cq = {1,0,0}, {0,1,0}, {0,0,1} delta = beta*nA*nB*nAB*0.1111111111111111*nx; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; // q = 1 //nread = neighborList[n+Np]; Aq[nr2] = a1; Bq[nr2] = b1; // q=2 //nread = neighborList[n]; Aq[nr1] = a2; Bq[nr1] = b2; //............................................... // Cq = {0,1,0} delta = beta*nA*nB*nAB*0.1111111111111111*ny; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; // q = 3 //nread = neighborList[n+3*Np]; Aq[nr4] = a1; Bq[nr4] = b1; // q = 4 //nread = neighborList[n+2*Np]; Aq[nr3] = a2; Bq[nr3] = b2; //............................................... // q = 4 // Cq = {0,0,1} delta = beta*nA*nB*nAB*0.1111111111111111*nz; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; // q = 5 //nread = neighborList[n+5*Np]; Aq[nr6] = a1; Bq[nr6] = b1; // q = 6 //nread = neighborList[n+4*Np]; Aq[nr5] = a2; Bq[nr5] = b2; //............................................... } } } __global__ void dvc_ScaLBL_D3Q19_AAodd_ColorMomentum(int *neighborList, double *dist, double *Den, double *Velocity, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ int n,nread; double fq; // conserved momemnts double rho,jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; double nA,nB; // number density double C,nx,ny,nz; //color gradient magnitude and direction double ux,uy,uz; double phi,tau,rho0,rlx_setA,rlx_setB; const double mrt_V1=0.05263157894736842; const double mrt_V2=0.012531328320802; const double mrt_V3=0.04761904761904762; const double mrt_V4=0.004594820384294068; const double mrt_V5=0.01587301587301587; const double mrt_V6=0.0555555555555555555555555; const double mrt_V7=0.02777777777777778; const double mrt_V8=0.08333333333333333; const double mrt_V9=0.003341687552213868; const double mrt_V10=0.003968253968253968; const double mrt_V11=0.01388888888888889; const double mrt_V12=0.04166666666666666; int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s 10Np => odd part of dist) fq = dist[nread]; // reading the f1 data into register fq //fp = dist[10*Np+n]; rho += fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jx = fq; m4 = -4.0*fq; m9 = 2.0*fq; m10 = -4.0*fq; // f2 = dist[10*Np+n]; nread = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist) fq = dist[nread]; // reading the f2 data into register fq //fq = dist[Np+n]; rho += fq; m1 -= 11.0*(fq); m2 -= 4.0*(fq); jx -= fq; m4 += 4.0*(fq); m9 += 2.0*(fq); m10 -= 4.0*(fq); // q=3 nread = neighborList[n+2*Np]; // neighbor 4 fq = dist[nread]; //fq = dist[11*Np+n]; rho += fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jy = fq; m6 = -4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 = fq; m12 = -2.0*fq; // q = 4 nread = neighborList[n+3*Np]; // neighbor 3 fq = dist[nread]; //fq = dist[2*Np+n]; rho+= fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jy -= fq; m6 += 4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 += fq; m12 -= 2.0*fq; // q=5 nread = neighborList[n+4*Np]; fq = dist[nread]; //fq = dist[12*Np+n]; rho += fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jz = fq; m8 = -4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 -= fq; m12 += 2.0*fq; // q = 6 nread = neighborList[n+5*Np]; fq = dist[nread]; //fq = dist[3*Np+n]; rho+= fq; m1 -= 11.0*fq; m2 -= 4.0*fq; jz -= fq; m8 += 4.0*fq; m9 -= fq; m10 += 2.0*fq; m11 -= fq; m12 += 2.0*fq; // q=7 nread = neighborList[n+6*Np]; fq = dist[nread]; //fq = dist[13*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jy += fq; m6 += fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 = fq; m16 = fq; m17 = -fq; // q = 8 nread = neighborList[n+7*Np]; fq = dist[nread]; //fq = dist[4*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jy -= fq; m6 -= fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 += fq; m16 -= fq; m17 += fq; // q=9 nread = neighborList[n+8*Np]; fq = dist[nread]; //fq = dist[14*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jy -= fq; m6 -= fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 -= fq; m16 += fq; m17 += fq; // q = 10 nread = neighborList[n+9*Np]; fq = dist[nread]; //fq = dist[5*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jy += fq; m6 += fq; m9 += fq; m10 += fq; m11 += fq; m12 += fq; m13 -= fq; m16 -= fq; m17 -= fq; // q=11 nread = neighborList[n+10*Np]; fq = dist[nread]; //fq = dist[15*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jz += fq; m8 += fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 = fq; m16 -= fq; m18 = fq; // q=12 nread = neighborList[n+11*Np]; fq = dist[nread]; //fq = dist[6*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jz -= fq; m8 -= fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 += fq; m16 += fq; m18 -= fq; // q=13 nread = neighborList[n+12*Np]; fq = dist[nread]; //fq = dist[16*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx += fq; m4 += fq; jz -= fq; m8 -= fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 -= fq; m16 -= fq; m18 -= fq; // q=14 nread = neighborList[n+13*Np]; fq = dist[nread]; //fq = dist[7*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jx -= fq; m4 -= fq; jz += fq; m8 += fq; m9 += fq; m10 += fq; m11 -= fq; m12 -= fq; m15 -= fq; m16 += fq; m18 += fq; // q=15 nread = neighborList[n+14*Np]; fq = dist[nread]; //fq = dist[17*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jy += fq; m6 += fq; jz += fq; m8 += fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 = fq; m17 += fq; m18 -= fq; // q=16 nread = neighborList[n+15*Np]; fq = dist[nread]; //fq = dist[8*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jy -= fq; m6 -= fq; jz -= fq; m8 -= fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 += fq; m17 -= fq; m18 += fq; // q=17 //fq = dist[18*Np+n]; nread = neighborList[n+16*Np]; fq = dist[nread]; rho += fq; m1 += 8.0*fq; m2 += fq; jy += fq; m6 += fq; jz -= fq; m8 -= fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 -= fq; m17 += fq; m18 += fq; // q=18 nread = neighborList[n+17*Np]; fq = dist[nread]; //fq = dist[9*Np+n]; rho += fq; m1 += 8.0*fq; m2 += fq; jy -= fq; m6 -= fq; jz += fq; m8 += fq; m9 -= 2.0*fq; m10 -= 2.0*fq; m14 -= fq; m17 -= fq; m18 -= fq; //........................................................................ //..............carry out relaxation process.............................. //..........Toelke, Fruediger et. al. 2006................................ if (C == 0.0) nx = ny = nz = 0.0; m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -alpha*C - m1); m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2); m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4); m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6); m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8); m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9); m10 = m10 + rlx_setA*( - m10); m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11); m12 = m12 + rlx_setA*( - m12); m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13); m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14); m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15); m16 = m16 + rlx_setB*( - m16); m17 = m17 + rlx_setB*( - m17); m18 = m18 + rlx_setB*( - m18); //.................inverse transformation...................................................... // q=0 fq = mrt_V1*rho-mrt_V2*m1+mrt_V3*m2; dist[n] = fq; // q = 1 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jx-m4)+mrt_V6*(m9-m10)+0.16666666*Fx; nread = neighborList[n+Np]; dist[nread] = fq; // q=2 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m4-jx)+mrt_V6*(m9-m10) - 0.16666666*Fx; nread = neighborList[n]; dist[nread] = fq; // q = 3 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jy-m6)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) + 0.16666666*Fy; nread = neighborList[n+3*Np]; dist[nread] = fq; // q = 4 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m6-jy)+mrt_V7*(m10-m9)+mrt_V8*(m11-m12) - 0.16666666*Fy; nread = neighborList[n+2*Np]; dist[nread] = fq; // q = 5 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(jz-m8)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) + 0.16666666*Fz; nread = neighborList[n+5*Np]; dist[nread] = fq; // q = 6 fq = mrt_V1*rho-mrt_V4*m1-mrt_V5*m2+0.1*(m8-jz)+mrt_V7*(m10-m9)+mrt_V8*(m12-m11) - 0.16666666*Fz; nread = neighborList[n+4*Np]; dist[nread] = fq; // q = 7 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx+jy)+0.025*(m4+m6)+ mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12+0.25*m13+0.125*(m16-m17) + 0.08333333333*(Fx+Fy); nread = neighborList[n+7*Np]; dist[nread] = fq; // q = 8 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jy)-0.025*(m4+m6) +mrt_V7*m9+mrt_V11*m10+mrt_V8*m11 +mrt_V12*m12+0.25*m13+0.125*(m17-m16) - 0.08333333333*(Fx+Fy); nread = neighborList[n+6*Np]; dist[nread] = fq; // q = 9 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jx-jy)+0.025*(m4-m6)+ mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13+0.125*(m16+m17) + 0.08333333333*(Fx-Fy); nread = neighborList[n+9*Np]; dist[nread] = fq; // q = 10 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2+0.1*(jy-jx)+0.025*(m6-m4)+ mrt_V7*m9+mrt_V11*m10+mrt_V8*m11+mrt_V12*m12-0.25*m13-0.125*(m16+m17)- 0.08333333333*(Fx-Fy); nread = neighborList[n+8*Np]; dist[nread] = fq; // q = 11 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jx+jz)+0.025*(m4+m8) +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 -mrt_V12*m12+0.25*m15+0.125*(m18-m16) + 0.08333333333*(Fx+Fz); nread = neighborList[n+11*Np]; dist[nread] = fq; // q = 12 fq = mrt_V1*rho+mrt_V9*m1+mrt_V10*m2-0.1*(jx+jz)-0.025*(m4+m8)+ mrt_V7*m9+mrt_V11*m10-mrt_V8*m11-mrt_V12*m12+0.25*m15+0.125*(m16-m18) - 0.08333333333*(Fx+Fz); nread = neighborList[n+10*Np]; dist[nread]= fq; // q = 13 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jx-jz)+0.025*(m4-m8) +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 -mrt_V12*m12-0.25*m15-0.125*(m16+m18) + 0.08333333333*(Fx-Fz); nread = neighborList[n+13*Np]; dist[nread] = fq; // q= 14 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jz-jx)+0.025*(m8-m4) +mrt_V7*m9+mrt_V11*m10-mrt_V8*m11 -mrt_V12*m12-0.25*m15+0.125*(m16+m18) - 0.08333333333*(Fx-Fz); nread = neighborList[n+12*Np]; dist[nread] = fq; // q = 15 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jy+jz)+0.025*(m6+m8) -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m17-m18) + 0.08333333333*(Fy+Fz); nread = neighborList[n+15*Np]; dist[nread] = fq; // q = 16 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2-0.1*(jy+jz)-0.025*(m6+m8) -mrt_V6*m9-mrt_V7*m10+0.25*m14+0.125*(m18-m17)- 0.08333333333*(Fy+Fz); nread = neighborList[n+14*Np]; dist[nread] = fq; // q = 17 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jy-jz)+0.025*(m6-m8) -mrt_V6*m9-mrt_V7*m10-0.25*m14+0.125*(m17+m18) + 0.08333333333*(Fy-Fz); nread = neighborList[n+17*Np]; dist[nread] = fq; // q = 18 fq = mrt_V1*rho+mrt_V9*m1 +mrt_V10*m2+0.1*(jz-jy)+0.025*(m8-m6) -mrt_V6*m9-mrt_V7*m10-0.25*m14-0.125*(m17+m18) - 0.08333333333*(Fy-Fz); nread = neighborList[n+16*Np]; dist[nread] = fq; // write the velocity ux = jx / rho0; uy = jy / rho0; uz = jz / rho0; Velocity[n] = ux; Velocity[Np+n] = uy; Velocity[2*Np+n] = uz; } } } __global__ void dvc_ScaLBL_D3Q19_AAeven_ColorMomentum(double *dist, double *Den, double *Velocity, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ int n; double fq; // conserved momemnts double rho,jx,jy,jz; // non-conserved moments double m1,m2,m4,m6,m8,m9,m10,m11,m12,m13,m14,m15,m16,m17,m18; double nA,nB; // number density double C,nx,ny,nz; //color gradient magnitude and direction double ux,uy,uz; double phi,tau,rho0,rlx_setA,rlx_setB; const double mrt_V1=0.05263157894736842; const double mrt_V2=0.012531328320802; const double mrt_V3=0.04761904761904762; const double mrt_V4=0.004594820384294068; const double mrt_V5=0.01587301587301587; const double mrt_V6=0.0555555555555555555555555; const double mrt_V7=0.02777777777777778; const double mrt_V8=0.08333333333333333; const double mrt_V9=0.003341687552213868; const double mrt_V10=0.003968253968253968; const double mrt_V11=0.01388888888888889; const double mrt_V12=0.04166666666666666; int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; Aq[1*Np+n] = a1; Bq[1*Np+n] = b1; Aq[2*Np+n] = a2; Bq[2*Np+n] = b2; //............................................... // q = 2 // Cq = {0,1,0} delta = beta*nA*nB*nAB*0.1111111111111111*ny; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; Aq[3*Np+n] = a1; Bq[3*Np+n] = b1; Aq[4*Np+n] = a2; Bq[4*Np+n] = b2; //............................................... // q = 4 // Cq = {0,0,1} delta = beta*nA*nB*nAB*0.1111111111111111*nz; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; Aq[5*Np+n] = a1; Bq[5*Np+n] = b1; Aq[6*Np+n] = a2; Bq[6*Np+n] = b2; //............................................... } } } __global__ void dvc_ScaLBL_D3Q19_AAodd_ColorMass(int *neighborList, double *Aq, double *Bq, double *Den, double *Velocity, double *ColorGrad, double beta, int start, int finish, int Np){ int n,nread; // non-conserved moments double nA,nB; // number density double a1,b1,a2,b2,nAB,delta; double C,nx,ny,nz; //color gradient magnitude and direction double ux,uy,uz; int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*ux))+delta; b1 = nB*(0.1111111111111111*(1+4.5*ux))-delta; a2 = nA*(0.1111111111111111*(1-4.5*ux))-delta; b2 = nB*(0.1111111111111111*(1-4.5*ux))+delta; // q = 1 nread = neighborList[n+Np]; Aq[nread] = a1; Bq[nread] = b1; // q=2 nread = neighborList[n]; Aq[nread] = a2; Bq[nread] = b2; //............................................... // Cq = {0,1,0} delta = beta*nA*nB*nAB*0.1111111111111111*ny; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uy))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uy))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uy))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uy))+delta; // q = 3 nread = neighborList[n+3*Np]; Aq[nread] = a1; Bq[nread] = b1; // q = 4 nread = neighborList[n+2*Np]; Aq[nread] = a2; Bq[nread] = b2; //............................................... // q = 4 // Cq = {0,0,1} delta = beta*nA*nB*nAB*0.1111111111111111*nz; if (!(nA*nB*nAB>0)) delta=0; a1 = nA*(0.1111111111111111*(1+4.5*uz))+delta; b1 = nB*(0.1111111111111111*(1+4.5*uz))-delta; a2 = nA*(0.1111111111111111*(1-4.5*uz))-delta; b2 = nB*(0.1111111111111111*(1-4.5*uz))+delta; // q = 5 nread = neighborList[n+5*Np]; Aq[nread] = a1; Bq[nread] = b1; // q = 6 nread = neighborList[n+4*Np]; Aq[nread] = a2; Bq[nread] = b2; //............................................... } } } __global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np){ int idx,n,nread; double fq,nA,nB; int S = Np/NBLOCKS/NTHREADS + 1; for (int s=0; s 1.f){ nA = 1.0; nB = 0.f; } else if (phi < -1.f){ nB = 1.0; nA = 0.f; } else{ nA=0.5*(phi+1.f); nB=0.5*(1.f-phi); } Den[idx] = nA; Den[Np+idx] = nB; Aq[idx]=0.3333333333333333*nA; Aq[Np+idx]=0.1111111111111111*nA; Aq[2*Np+idx]=0.1111111111111111*nA; Aq[3*Np+idx]=0.1111111111111111*nA; Aq[4*Np+idx]=0.1111111111111111*nA; Aq[5*Np+idx]=0.1111111111111111*nA; Aq[6*Np+idx]=0.1111111111111111*nA; Bq[idx]=0.3333333333333333*nB; Bq[Np+idx]=0.1111111111111111*nB; Bq[2*Np+idx]=0.1111111111111111*nB; Bq[3*Np+idx]=0.1111111111111111*nB; Bq[4*Np+idx]=0.1111111111111111*nB; Bq[5*Np+idx]=0.1111111111111111*nB; Bq[6*Np+idx]=0.1111111111111111*nB; } } } extern "C" void ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny, int Nz, int Slice){ int GRID = Nx*Ny / 512 + 1; dvc_ScaLBL_SetSlice_z<<>>(Phi,value,Nx,Ny,Nz,Slice); } extern "C" void ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz){ dvc_ScaLBL_Color_Init<<>>(ID, Den, Phi, das, dbs, Nx, Ny, Nz); } extern "C" void ScaLBL_Color_InitDistance(char *ID, double *Den, double *Phi, double *Distance, double das, double dbs, double beta, double xp, int Nx, int Ny, int Nz){ dvc_ScaLBL_Color_InitDistance<<>>(ID, Den, Phi, Distance, das, dbs, beta, xp, Nx, Ny, Nz); } extern "C" void ScaLBL_D3Q19_ColorGradient(char *ID, double *phi, double *ColorGrad, int Nx, int Ny, int Nz){ dvc_ScaLBL_D3Q19_ColorGradient<<>>(ID, phi, ColorGrad, Nx, Ny, Nz); } extern "C" void ColorCollide( char *ID, double *disteven, double *distodd, double *ColorGrad, double *Velocity, int Nx, int Ny, int Nz,double rlx_setA, double rlx_setB, double alpha, double beta, double Fx, double Fy, double Fz, bool pBC){ dvc_ColorCollide<<>>( ID, disteven, distodd, ColorGrad,Velocity, Nx, Ny, Nz,rlx_setA, rlx_setB, alpha, beta, Fx, Fy, Fz, pBC); } extern "C" void ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad, double *Velocity, int Nx, int Ny, int Nz,double rlx_setA, double rlx_setB, double alpha, double beta, double Fx, double Fy, double Fz){ dvc_ScaLBL_D3Q19_ColorCollide<<>>(ID, disteven, distodd, phi, ColorGrad, Velocity, Nx, Ny, Nz, rlx_setA, rlx_setB, alpha, beta, Fx, Fy, Fz); } extern "C" void DensityStreamD3Q7(char *ID, double *Den, double *Copy, double *Phi, double *ColorGrad, double *Velocity, double beta, int Nx, int Ny, int Nz, bool pBC){ dvc_DensityStreamD3Q7<<>>(ID, Den, Copy, Phi, ColorGrad, Velocity, beta, Nx, Ny, Nz, pBC); } extern "C" void ScaLBL_ComputePhaseField(char *ID, double *Phi, double *Den, int N){ dvc_ScaLBL_ComputePhaseField<<>>(ID, Phi, Den, N); } extern "C" void ScaLBL_D3Q7_ColorCollideMass(char *ID, double *A_even, double *A_odd, double *B_even, double *B_odd, double *Den, double *Phi, double *ColorGrad, double *Velocity, double beta, int N, bool pBC){ dvc_ScaLBL_D3Q7_ColorCollideMass<<>>(ID, A_even, A_odd, B_even, B_odd, Den, Phi, ColorGrad, Velocity, beta, N, pBC); } // Pressure Boundary Conditions Functions extern "C" void ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ cudaProfilerStart(); cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_Color, cudaFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAeven_Color<<>>(Map, dist, Aq, Bq, Den, Phi, Vel, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_Color: %s \n",cudaGetErrorString(err)); } cudaProfilerStop(); } extern "C" void ScaLBL_D3Q19_AAodd_Color(int *d_neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi, double *Vel, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){ cudaProfilerStart(); cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_Color, cudaFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAodd_Color<<>>(d_neighborList, Map, dist, Aq, Bq, Den, Phi, Vel, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, strideY, strideZ, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAodd_Color: %s \n",cudaGetErrorString(err)); } cudaProfilerStop(); } extern "C" void ScaLBL_D3Q7_AAodd_PhaseField(int *NeighborList, int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np){ cudaProfilerStart(); dvc_ScaLBL_D3Q7_AAodd_PhaseField<<>>(NeighborList, Map, Aq, Bq, Den, Phi, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q7_AAodd_PhaseField: %s \n",cudaGetErrorString(err)); } cudaProfilerStop(); } extern "C" void ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi, int start, int finish, int Np){ cudaProfilerStart(); dvc_ScaLBL_D3Q7_AAeven_PhaseField<<>>(Map, Aq, Bq, Den, Phi, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q7_AAeven_PhaseField: %s \n",cudaGetErrorString(err)); } cudaProfilerStop(); } extern "C" void ScaLBL_D3Q19_Gradient(int *Map, double *Phi, double *ColorGrad, int start, int finish, int Np, int Nx, int Ny, int Nz){ int strideY=Nx; int strideZ=Nx*Ny; dvc_ScaLBL_D3Q19_Gradient<<>>(Map, Phi, ColorGrad, start, finish, Np, strideY, strideZ); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_ColorGrad: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_PhaseField_Init(int *Map, double *Phi, double *Den, double *Aq, double *Bq, int start, int finish, int Np){ dvc_ScaLBL_PhaseField_Init<<>>(Map, Phi, Den, Aq, Bq, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_PhaseField_Init: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_D3Q19_AAeven_ColorMomentum(double *dist, double *Den, double *Vel, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_ColorMomentum, cudaFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAeven_ColorMomentum<<>>(dist, Den, Vel, ColorGrad, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_ColorMomentum: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_D3Q19_AAodd_ColorMomentum(int *d_neighborList, double *dist, double *Den, double *Vel, double *ColorGrad, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta, double Fx, double Fy, double Fz, int start, int finish, int Np){ cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_ColorMomentum, cudaFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAodd_ColorMomentum<<>>(d_neighborList, dist, Den, Vel, ColorGrad, rhoA, rhoB, tauA, tauB, alpha, beta, Fx, Fy, Fz, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAodd_ColorMomentum: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_D3Q19_AAeven_ColorMass(double *Aq, double *Bq, double *Den, double *Vel, double *ColorGrad, double beta, int start, int finish, int Np){ cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_ColorMass, cudaFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAeven_ColorMass<<>>(Aq, Bq, Den, Vel, ColorGrad, beta, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAeven_Color: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_D3Q19_AAodd_ColorMass(int *d_neighborList, double *Aq, double *Bq, double *Den, double *Vel, double *ColorGrad, double beta, int start, int finish, int Np){ cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_ColorMass, cudaFuncCachePreferL1); dvc_ScaLBL_D3Q19_AAodd_ColorMass<<>>(d_neighborList, Aq, Bq, Den, Vel, ColorGrad, beta, start, finish, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_D3Q19_AAodd_Color: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_Color_BC_z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np){ int GRID = count / 512 + 1; dvc_ScaLBL_Color_BC_z<<>>(list, Map, Phi, Den, vA, vB, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_Color_BC_z: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_Color_BC_Z(int *list, int *Map, double *Phi, double *Den, double vA, double vB, int count, int Np){ int GRID = count / 512 + 1; dvc_ScaLBL_Color_BC_Z<<>>(list, Map, Phi, Den, vA, vB, count, Np); cudaError_t err = cudaGetLastError(); if (cudaSuccess != err){ printf("CUDA error in ScaLBL_Color_BC_Z: %s \n",cudaGetErrorString(err)); } } extern "C" void ScaLBL_CopySlice_z(double *Phi, int Nx, int Ny, int Nz, int Source, int Dest){ int GRID = Nx*Ny / 512 + 1; dvc_ScaLBL_CopySlice_z<<>>(Phi,Nx,Ny,Nz,Source,Dest); }