merge the latest membrane into test_poisson
This commit is contained in:
@@ -124,21 +124,7 @@ IF ( USE_CUDA )
|
||||
ADD_DEFINITIONS( -DUSE_CUDA )
|
||||
ENABLE_LANGUAGE( CUDA )
|
||||
ELSEIF ( USE_HIP )
|
||||
IF ( NOT DEFINED HIP_PATH )
|
||||
IF ( NOT DEFINED ENV{HIP_PATH} )
|
||||
SET( HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed" )
|
||||
ELSE()
|
||||
SET( HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed" )
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
SET( CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH} )
|
||||
FIND_PACKAGE( HIP REQUIRED )
|
||||
FIND_PACKAGE( CUDA QUIET )
|
||||
MESSAGE( "HIP Found")
|
||||
MESSAGE( " HIP version: ${HIP_VERSION_STRING}")
|
||||
MESSAGE( " HIP platform: ${HIP_PLATFORM}")
|
||||
MESSAGE( " HIP Include Path: ${HIP_INCLUDE_DIRS}")
|
||||
MESSAGE( " HIP Libraries: ${HIP_LIBRARIES}")
|
||||
ENABLE_LANGUAGE( HIP )
|
||||
ADD_DEFINITIONS( -DUSE_HIP )
|
||||
ENDIF()
|
||||
|
||||
@@ -180,8 +166,7 @@ IF ( NOT ONLY_BUILD_DOCS )
|
||||
IF ( USE_CUDA )
|
||||
ADD_PACKAGE_SUBDIRECTORY( cuda )
|
||||
ELSEIF ( USE_HIP )
|
||||
ADD_SUBDIRECTORY( hip )
|
||||
SET( LBPM_LIBRARIES lbpm-hip lbpm-wia )
|
||||
ADD_PACKAGE_SUBDIRECTORY( hip )
|
||||
ELSE()
|
||||
ADD_PACKAGE_SUBDIRECTORY( cpu )
|
||||
ENDIF()
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#include "IO/PackData.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
//! Template function to return the buffer size required to pack a class
|
||||
template<class TYPE>
|
||||
|
||||
@@ -1263,7 +1263,7 @@ static int backtrace_thread(
|
||||
if ( tid == pthread_self() ) {
|
||||
count = ::backtrace( buffer, size );
|
||||
} else {
|
||||
// Note: this will get the backtrace, but terminates the thread in the process!!!
|
||||
// Send a signal to the desired thread to get the call stack
|
||||
StackTrace_mutex.lock();
|
||||
struct sigaction sa;
|
||||
sigfillset( &sa.sa_mask );
|
||||
|
||||
@@ -193,6 +193,9 @@ MACRO( FIND_FILES )
|
||||
# Find the CUDA sources
|
||||
SET( T_CUDASOURCES "" )
|
||||
FILE( GLOB T_CUDASOURCES "*.cu" )
|
||||
# Find the HIP sources
|
||||
SET( T_HIPSOURCES "" )
|
||||
FILE( GLOB T_HIPSOURCES "*.hip" )
|
||||
# Find the C sources
|
||||
SET( T_CSOURCES "" )
|
||||
FILE( GLOB T_CSOURCES "*.c" )
|
||||
@@ -212,10 +215,11 @@ MACRO( FIND_FILES )
|
||||
SET( HEADERS ${HEADERS} ${T_HEADERS} )
|
||||
SET( CXXSOURCES ${CXXSOURCES} ${T_CXXSOURCES} )
|
||||
SET( CUDASOURCES ${CUDASOURCES} ${T_CUDASOURCES} )
|
||||
SET( HIPSOURCES ${HIPSOURCES} ${T_HIPSOURCES} )
|
||||
SET( CSOURCES ${CSOURCES} ${T_CSOURCES} )
|
||||
SET( FSOURCES ${FSOURCES} ${T_FSOURCES} )
|
||||
SET( M4FSOURCES ${M4FSOURCES} ${T_M4FSOURCES} )
|
||||
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${T_M4FSOURCES} ${CUDASOURCES} )
|
||||
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${T_M4FSOURCES} ${CUDASOURCES} ${HIPSOURCES} )
|
||||
ENDMACRO()
|
||||
|
||||
|
||||
@@ -227,6 +231,9 @@ MACRO( FIND_FILES_PATH IN_PATH )
|
||||
# Find the CUDA sources
|
||||
SET( T_CUDASOURCES "" )
|
||||
FILE( GLOB T_CUDASOURCES "${IN_PATH}/*.cu" )
|
||||
# Find the HIP sources
|
||||
SET( T_HIPSOURCES "" )
|
||||
FILE( GLOB T_HIPSOURCES "${IN_PATH}/*.hip" )
|
||||
# Find the C sources
|
||||
SET( T_CSOURCES "" )
|
||||
FILE( GLOB T_CSOURCES "${IN_PATH}/*.c" )
|
||||
@@ -246,9 +253,10 @@ MACRO( FIND_FILES_PATH IN_PATH )
|
||||
SET( HEADERS ${HEADERS} ${T_HEADERS} )
|
||||
SET( CXXSOURCES ${CXXSOURCES} ${T_CXXSOURCES} )
|
||||
SET( CUDASOURCES ${CUDASOURCES} ${T_CUDASOURCES} )
|
||||
SET( HIPSOURCES ${HIPSOURCES} ${T_HIPSOURCES} )
|
||||
SET( CSOURCES ${CSOURCES} ${T_CSOURCES} )
|
||||
SET( FSOURCES ${FSOURCES} ${T_FSOURCES} )
|
||||
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${CUDASOURCES} )
|
||||
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${CUDASOURCES} ${HIPSOURCES} )
|
||||
ENDMACRO()
|
||||
|
||||
|
||||
|
||||
@@ -20,10 +20,12 @@
|
||||
#include "common/ArraySize.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <initializer_list>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@@ -4,11 +4,13 @@
|
||||
#include "common/Utilities.h"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <initializer_list>
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
#if defined(__CUDA_ARCH__)
|
||||
|
||||
@@ -208,72 +208,68 @@ inline void CommunicateSendRecvCounts(
|
||||
}
|
||||
|
||||
//***************************************************************************************
|
||||
inline void CommunicateRecvLists(
|
||||
const Utilities::MPI &comm, int sendtag, int recvtag, int *sendList_x,
|
||||
int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y,
|
||||
int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY,
|
||||
int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ,
|
||||
int *sendList_Xz, int *sendList_yz, int *sendList_YZ, int *sendList_yZ,
|
||||
int *sendList_Yz, int sendCount_x, int sendCount_y, int sendCount_z,
|
||||
int sendCount_X, int sendCount_Y, int sendCount_Z, int sendCount_xy,
|
||||
int sendCount_XY, int sendCount_xY, int sendCount_Xy, int sendCount_xz,
|
||||
int sendCount_XZ, int sendCount_xZ, int sendCount_Xz, int sendCount_yz,
|
||||
int sendCount_YZ, int sendCount_yZ, int sendCount_Yz, int *recvList_x,
|
||||
int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y,
|
||||
int *recvList_Z, int *recvList_xy, int *recvList_XY, int *recvList_xY,
|
||||
int *recvList_Xy, int *recvList_xz, int *recvList_XZ, int *recvList_xZ,
|
||||
int *recvList_Xz, int *recvList_yz, int *recvList_YZ, int *recvList_yZ,
|
||||
int *recvList_Yz, int recvCount_x, int recvCount_y, int recvCount_z,
|
||||
int recvCount_X, int recvCount_Y, int recvCount_Z, int recvCount_xy,
|
||||
int recvCount_XY, int recvCount_xY, int recvCount_Xy, int recvCount_xz,
|
||||
int recvCount_XZ, int recvCount_xZ, int recvCount_Xz, int recvCount_yz,
|
||||
int recvCount_YZ, int recvCount_yZ, int recvCount_Yz, int rank_x,
|
||||
int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy,
|
||||
int rank_XY, int rank_xY, int rank_Xy, int rank_xz, int rank_XZ,
|
||||
int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ,
|
||||
int rank_Yz) {
|
||||
MPI_Request req1[18], req2[18];
|
||||
req1[0] = comm.Isend(sendList_x, sendCount_x, rank_x, sendtag);
|
||||
req2[0] = comm.Irecv(recvList_X, recvCount_X, rank_X, recvtag);
|
||||
req1[1] = comm.Isend(sendList_X, sendCount_X, rank_X, sendtag);
|
||||
req2[1] = comm.Irecv(recvList_x, recvCount_x, rank_x, recvtag);
|
||||
req1[2] = comm.Isend(sendList_y, sendCount_y, rank_y, sendtag);
|
||||
req2[2] = comm.Irecv(recvList_Y, recvCount_Y, rank_Y, recvtag);
|
||||
req1[3] = comm.Isend(sendList_Y, sendCount_Y, rank_Y, sendtag);
|
||||
req2[3] = comm.Irecv(recvList_y, recvCount_y, rank_y, recvtag);
|
||||
req1[4] = comm.Isend(sendList_z, sendCount_z, rank_z, sendtag);
|
||||
req2[4] = comm.Irecv(recvList_Z, recvCount_Z, rank_Z, recvtag);
|
||||
req1[5] = comm.Isend(sendList_Z, sendCount_Z, rank_Z, sendtag);
|
||||
req2[5] = comm.Irecv(recvList_z, recvCount_z, rank_z, recvtag);
|
||||
inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int recvtag,
|
||||
int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z,
|
||||
int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy,
|
||||
int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz,
|
||||
int *sendList_yz, int *sendList_YZ, int *sendList_yZ, int *sendList_Yz,
|
||||
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z,
|
||||
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
|
||||
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
|
||||
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
|
||||
int *recvList_x, int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y, int *recvList_Z,
|
||||
int *recvList_xy, int *recvList_XY, int *recvList_xY, int *recvList_Xy,
|
||||
int *recvList_xz, int *recvList_XZ, int *recvList_xZ, int *recvList_Xz,
|
||||
int *recvList_yz, int *recvList_YZ, int *recvList_yZ, int *recvList_Yz,
|
||||
int recvCount_x, int recvCount_y, int recvCount_z, int recvCount_X, int recvCount_Y, int recvCount_Z,
|
||||
int recvCount_xy, int recvCount_XY, int recvCount_xY, int recvCount_Xy,
|
||||
int recvCount_xz, int recvCount_XZ, int recvCount_xZ, int recvCount_Xz,
|
||||
int recvCount_yz, int recvCount_YZ, int recvCount_yZ, int recvCount_Yz,
|
||||
int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY,
|
||||
int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz)
|
||||
{
|
||||
MPI_Request req1[18], req2[18];
|
||||
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag+0);
|
||||
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag+0);
|
||||
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag+1);
|
||||
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag+1);
|
||||
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag+2);
|
||||
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag+2);
|
||||
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag+3);
|
||||
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag+3);
|
||||
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag+4);
|
||||
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag+4);
|
||||
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag+5);
|
||||
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag+5);
|
||||
|
||||
req1[6] = comm.Isend(sendList_xy, sendCount_xy, rank_xy, sendtag);
|
||||
req2[6] = comm.Irecv(recvList_XY, recvCount_XY, rank_XY, recvtag);
|
||||
req1[7] = comm.Isend(sendList_XY, sendCount_XY, rank_XY, sendtag);
|
||||
req2[7] = comm.Irecv(recvList_xy, recvCount_xy, rank_xy, recvtag);
|
||||
req1[8] = comm.Isend(sendList_Xy, sendCount_Xy, rank_Xy, sendtag);
|
||||
req2[8] = comm.Irecv(recvList_xY, recvCount_xY, rank_xY, recvtag);
|
||||
req1[9] = comm.Isend(sendList_xY, sendCount_xY, rank_xY, sendtag);
|
||||
req2[9] = comm.Irecv(recvList_Xy, recvCount_Xy, rank_Xy, recvtag);
|
||||
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag+6);
|
||||
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag+6);
|
||||
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag+7);
|
||||
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag+7);
|
||||
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag+8);
|
||||
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag+8);
|
||||
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag+9);
|
||||
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag+9);
|
||||
|
||||
req1[10] = comm.Isend(sendList_xz, sendCount_xz, rank_xz, sendtag);
|
||||
req2[10] = comm.Irecv(recvList_XZ, recvCount_XZ, rank_XZ, recvtag);
|
||||
req1[11] = comm.Isend(sendList_XZ, sendCount_XZ, rank_XZ, sendtag);
|
||||
req2[11] = comm.Irecv(recvList_xz, recvCount_xz, rank_xz, recvtag);
|
||||
req1[12] = comm.Isend(sendList_Xz, sendCount_Xz, rank_Xz, sendtag);
|
||||
req2[12] = comm.Irecv(recvList_xZ, recvCount_xZ, rank_xZ, recvtag);
|
||||
req1[13] = comm.Isend(sendList_xZ, sendCount_xZ, rank_xZ, sendtag);
|
||||
req2[13] = comm.Irecv(recvList_Xz, recvCount_Xz, rank_Xz, recvtag);
|
||||
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag+10);
|
||||
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag+10);
|
||||
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag+11);
|
||||
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag+11);
|
||||
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz,sendtag+12);
|
||||
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag+12);
|
||||
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag+13);
|
||||
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag+13);
|
||||
|
||||
req1[14] = comm.Isend(sendList_yz, sendCount_yz, rank_yz, sendtag);
|
||||
req2[14] = comm.Irecv(recvList_YZ, recvCount_YZ, rank_YZ, recvtag);
|
||||
req1[15] = comm.Isend(sendList_YZ, sendCount_YZ, rank_YZ, sendtag);
|
||||
req2[15] = comm.Irecv(recvList_yz, recvCount_yz, rank_yz, recvtag);
|
||||
req1[16] = comm.Isend(sendList_Yz, sendCount_Yz, rank_Yz, sendtag);
|
||||
req2[16] = comm.Irecv(recvList_yZ, recvCount_yZ, rank_yZ, recvtag);
|
||||
req1[17] = comm.Isend(sendList_yZ, sendCount_yZ, rank_yZ, sendtag);
|
||||
req2[17] = comm.Irecv(recvList_Yz, recvCount_Yz, rank_Yz, recvtag);
|
||||
comm.waitAll(18, req1);
|
||||
comm.waitAll(18, req2);
|
||||
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag+14);
|
||||
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag+14);
|
||||
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag+15);
|
||||
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag+15);
|
||||
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag+16);
|
||||
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag+16);
|
||||
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag+17);
|
||||
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag+17);
|
||||
comm.waitAll( 18, req1 );
|
||||
comm.waitAll( 18, req2 );
|
||||
}
|
||||
|
||||
//***************************************************************************************
|
||||
|
||||
@@ -1543,7 +1543,7 @@ void Domain::ReadFromFile(const std::string &Filename,
|
||||
} else {
|
||||
// Recieve the subdomain from rank = 0
|
||||
//printf("Ready to recieve data %i at process %i \n", N,rank);
|
||||
Comm.recv(id.data(), N, 0, 15);
|
||||
Comm.recv(UserData, N, 0, 15);
|
||||
}
|
||||
Comm.barrier();
|
||||
}
|
||||
|
||||
@@ -93,12 +93,11 @@ template<> long double genRand<long double>()
|
||||
* axpy *
|
||||
********************************************************/
|
||||
template <>
|
||||
void call_axpy<float>(size_t N, const float alpha, const float *x, float *y) {
|
||||
void call_axpy<float>(size_t, const float, const float*, float*) {
|
||||
ERROR("Not finished");
|
||||
}
|
||||
template <>
|
||||
void call_axpy<double>(size_t N, const double alpha, const double *x,
|
||||
double *y) {
|
||||
void call_axpy<double>(size_t, const double, const double*, double*) {
|
||||
ERROR("Not finished");
|
||||
}
|
||||
|
||||
@@ -106,22 +105,22 @@ void call_axpy<double>(size_t N, const double alpha, const double *x,
|
||||
* Multiply two arrays *
|
||||
********************************************************/
|
||||
template <>
|
||||
void call_gemv<double>(size_t M, size_t N, double alpha, double beta,
|
||||
const double *A, const double *x, double *y) {
|
||||
void call_gemv<double>(size_t, size_t, double, double,
|
||||
const double*, const double*, double*) {
|
||||
ERROR("Not finished");
|
||||
}
|
||||
template <>
|
||||
void call_gemv<float>(size_t M, size_t N, float alpha, float beta,
|
||||
const float *A, const float *x, float *y) {
|
||||
void call_gemv<float>(size_t, size_t, float, float,
|
||||
const float*, const float*, float*) {
|
||||
ERROR("Not finished");
|
||||
}
|
||||
template <>
|
||||
void call_gemm<double>(size_t M, size_t N, size_t K, double alpha, double beta,
|
||||
const double *A, const double *B, double *C) {
|
||||
void call_gemm<double>(size_t, size_t, size_t, double, double,
|
||||
const double*, const double*, double*) {
|
||||
ERROR("Not finished");
|
||||
}
|
||||
template <>
|
||||
void call_gemm<float>(size_t M, size_t N, size_t K, float alpha, float beta,
|
||||
const float *A, const float *B, float *C) {
|
||||
void call_gemm<float>(size_t, size_t, size_t, float, float,
|
||||
const float*, const float*, float*) {
|
||||
ERROR("Not finished");
|
||||
}
|
||||
|
||||
@@ -297,10 +297,10 @@ TYPE FunctionTable::sum(const Array<TYPE, FUN, ALLOC> &A) {
|
||||
}
|
||||
|
||||
template <class TYPE>
|
||||
inline void FunctionTable::gemmWrapper(char TRANSA, char TRANSB, int M, int N,
|
||||
int K, TYPE alpha, const TYPE *A,
|
||||
int LDA, const TYPE *B, int LDB,
|
||||
TYPE beta, TYPE *C, int LDC) {
|
||||
inline void FunctionTable::gemmWrapper(char, char, int, int,
|
||||
int, TYPE, const TYPE*,
|
||||
int, const TYPE*, int,
|
||||
TYPE, TYPE*, int) {
|
||||
ERROR("Not finished");
|
||||
}
|
||||
|
||||
|
||||
1597
common/MPI.I
1597
common/MPI.I
File diff suppressed because it is too large
Load Diff
284
common/MPI.cpp
284
common/MPI.cpp
@@ -1115,15 +1115,14 @@ bool MPI_CLASS::anyReduce(const bool value) const {
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<unsigned char>(const unsigned char *send,
|
||||
unsigned char *recv,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
PROFILE_START("sumReduce1<unsigned char>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<unsigned char>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
|
||||
const int n) const {
|
||||
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x, int n) const {
|
||||
PROFILE_START("sumReduce2<unsigned char>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new unsigned char[n];
|
||||
@@ -1136,13 +1135,13 @@ void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
|
||||
// char
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<char>(const char *send, char *recv,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
PROFILE_START("sumReduce1<char>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<char>", profile_level);
|
||||
}
|
||||
template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
|
||||
template <> void MPI_CLASS::call_sumReduce<char>(char *x, int n) const {
|
||||
PROFILE_START("sumReduce2<char>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new char[n];
|
||||
@@ -1155,16 +1154,14 @@ template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
|
||||
// unsigned int
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<unsigned int>(const unsigned int *send,
|
||||
unsigned int *recv,
|
||||
const int n) const {
|
||||
unsigned int *recv, int n) const {
|
||||
PROFILE_START("sumReduce1<unsigned int>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<unsigned int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
|
||||
const int n) const {
|
||||
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x, int n) const {
|
||||
PROFILE_START("sumReduce2<unsigned int>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new unsigned int[n];
|
||||
@@ -1176,14 +1173,13 @@ void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv,
|
||||
const int n) const {
|
||||
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv, int n) const {
|
||||
PROFILE_START("sumReduce1<int>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_INT, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<int>", profile_level);
|
||||
}
|
||||
template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
|
||||
template <> void MPI_CLASS::call_sumReduce<int>(int *x, int n) const {
|
||||
PROFILE_START("sumReduce2<int>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new int[n];
|
||||
@@ -1196,14 +1192,13 @@ template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
|
||||
// long int
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<long int>(const long int *send, long int *recv,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
PROFILE_START("sumReduce1<long int>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_LONG, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<long int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
|
||||
template <> void MPI_CLASS::call_sumReduce<long int>(long int *x, int n) const {
|
||||
PROFILE_START("sumReduce2<long int>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new long int[n];
|
||||
@@ -1217,15 +1212,14 @@ void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<unsigned long>(const unsigned long *send,
|
||||
unsigned long *recv,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
PROFILE_START("sumReduce1<unsigned long>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<unsigned long>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
|
||||
const int n) const {
|
||||
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x, int n) const {
|
||||
PROFILE_START("sumReduce2<unsigned long>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new unsigned long int[n];
|
||||
@@ -1239,15 +1233,14 @@ void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
|
||||
#ifdef USE_WINDOWS
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<size_t>(const size_t *send, size_t *recv,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
MPI_ASSERT(MPI_SIZE_T != 0);
|
||||
PROFILE_START("sumReduce1<size_t>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<size_t>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
|
||||
template <> void MPI_CLASS::call_sumReduce<size_t>(size_t *x, int n) const {
|
||||
MPI_ASSERT(MPI_SIZE_T != 0);
|
||||
PROFILE_START("sumReduce2<size_t>", profile_level);
|
||||
auto send = x;
|
||||
@@ -1263,13 +1256,13 @@ void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
|
||||
// float
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<float>(const float *send, float *recv,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
PROFILE_START("sumReduce1<float>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_FLOAT, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<float>", profile_level);
|
||||
}
|
||||
template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
|
||||
template <> void MPI_CLASS::call_sumReduce<float>(float *x, int n) const {
|
||||
PROFILE_START("sumReduce2<float>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new float[n];
|
||||
@@ -1282,14 +1275,13 @@ template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
|
||||
// double
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<double>(const double *send, double *recv,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
PROFILE_START("sumReduce1<double>", profile_level);
|
||||
MPI_Allreduce((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_SUM,
|
||||
communicator);
|
||||
PROFILE_STOP("sumReduce1<double>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
|
||||
template <> void MPI_CLASS::call_sumReduce<double>(double *x, int n) const {
|
||||
PROFILE_START("sumReduce2<double>", profile_level);
|
||||
auto send = x;
|
||||
auto recv = new double[n];
|
||||
@@ -1302,7 +1294,7 @@ void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
|
||||
// std::complex<double>
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<std::complex<double>>(
|
||||
const std::complex<double> *x, std::complex<double> *y, const int n) const {
|
||||
const std::complex<double> *x, std::complex<double> *y, int n) const {
|
||||
PROFILE_START("sumReduce1<complex double>", profile_level);
|
||||
auto send = new double[2 * n];
|
||||
auto recv = new double[2 * n];
|
||||
@@ -1320,7 +1312,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
|
||||
const int n) const {
|
||||
int n) const {
|
||||
PROFILE_START("sumReduce2<complex double>", profile_level);
|
||||
auto send = new double[2 * n];
|
||||
auto recv = new double[2 * n];
|
||||
@@ -1345,7 +1337,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
|
||||
// unsigned char
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
|
||||
unsigned char *recv, const int n,
|
||||
unsigned char *recv, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce1<unsigned char>", profile_level);
|
||||
@@ -1363,7 +1355,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce2<unsigned char>", profile_level);
|
||||
@@ -1386,7 +1378,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
|
||||
}
|
||||
// char
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
|
||||
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce1<char>", profile_level);
|
||||
@@ -1404,7 +1396,7 @@ void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<char>(char *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<char>(char *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce2<char>", profile_level);
|
||||
@@ -1428,7 +1420,7 @@ void MPI_CLASS::call_minReduce<char>(char *x, const int n,
|
||||
// unsigned int
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
|
||||
unsigned int *recv, const int n,
|
||||
unsigned int *recv, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce1<unsigned int>", profile_level);
|
||||
@@ -1446,7 +1438,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce2<unsigned int>", profile_level);
|
||||
@@ -1469,7 +1461,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
|
||||
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce1<int>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1492,7 +1484,7 @@ void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
|
||||
PROFILE_STOP("minReduce1<int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<int>(int *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<int>(int *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce2<int>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1523,7 +1515,7 @@ void MPI_CLASS::call_minReduce<int>(int *x, const int n,
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
|
||||
unsigned long int *recv,
|
||||
const int n,
|
||||
int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce1<unsigned long>", profile_level);
|
||||
@@ -1541,8 +1533,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
|
||||
const int n,
|
||||
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
PROFILE_START("minReduce2<unsigned long>", profile_level);
|
||||
@@ -1565,8 +1556,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
|
||||
}
|
||||
// long int
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
|
||||
const int n,
|
||||
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce1<long int>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1589,7 +1579,7 @@ void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
|
||||
PROFILE_STOP("minReduce1<long int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<long int>(long int *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce2<long int>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1619,8 +1609,8 @@ void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
|
||||
// unsigned long long int
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned long long int>(
|
||||
const unsigned long long int *send, unsigned long long int *recv,
|
||||
const int n, int *comm_rank_of_min) const {
|
||||
const unsigned long long int *send, unsigned long long int *recv, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce1<long int>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
auto x = new long long int[n];
|
||||
@@ -1647,7 +1637,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<unsigned long long int>(
|
||||
unsigned long long int *x, const int n, int *comm_rank_of_min) const {
|
||||
unsigned long long int *x, int n, int *comm_rank_of_min) const {
|
||||
auto recv = new unsigned long long int[n];
|
||||
call_minReduce<unsigned long long int>(x, recv, n, comm_rank_of_min);
|
||||
for (int i = 0; i < n; i++)
|
||||
@@ -1657,7 +1647,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
|
||||
// long long int
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
|
||||
long long int *y, const int n,
|
||||
long long int *y, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce1<long int>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1676,7 +1666,7 @@ void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
|
||||
PROFILE_STOP("minReduce1<long int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<long long int>(long long int *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
auto recv = new long long int[n];
|
||||
call_minReduce<long long int>(x, recv, n, comm_rank_of_min);
|
||||
@@ -1686,7 +1676,7 @@ void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
|
||||
}
|
||||
// float
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
|
||||
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce1<float>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1709,7 +1699,7 @@ void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
|
||||
PROFILE_STOP("minReduce1<float>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<float>(float *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<float>(float *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce2<float>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1738,7 +1728,7 @@ void MPI_CLASS::call_minReduce<float>(float *x, const int n,
|
||||
}
|
||||
// double
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
|
||||
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce1<double>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1762,7 +1752,7 @@ void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
|
||||
PROFILE_STOP("minReduce1<double>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_minReduce<double>(double *x, const int n,
|
||||
void MPI_CLASS::call_minReduce<double>(double *x, int n,
|
||||
int *comm_rank_of_min) const {
|
||||
PROFILE_START("minReduce2<double>", profile_level);
|
||||
if (comm_rank_of_min == nullptr) {
|
||||
@@ -1799,7 +1789,7 @@ void MPI_CLASS::call_minReduce<double>(double *x, const int n,
|
||||
// unsigned char
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
|
||||
unsigned char *recv, const int n,
|
||||
unsigned char *recv, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce1<unsigned char>", profile_level);
|
||||
@@ -1817,7 +1807,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce2<unsigned char>", profile_level);
|
||||
@@ -1840,7 +1830,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
|
||||
}
|
||||
// char
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
|
||||
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce1<char>", profile_level);
|
||||
@@ -1858,7 +1848,7 @@ void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<char>(char *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce2<char>", profile_level);
|
||||
@@ -1882,7 +1872,7 @@ void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
|
||||
// unsigned int
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
|
||||
unsigned int *recv, const int n,
|
||||
unsigned int *recv, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce1<unsigned int>", profile_level);
|
||||
@@ -1900,7 +1890,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce2<unsigned int>", profile_level);
|
||||
@@ -1923,7 +1913,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
|
||||
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce1<int>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -1946,7 +1936,7 @@ void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
|
||||
PROFILE_STOP("maxReduce1<int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<int>(int *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce2<int>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -1975,8 +1965,7 @@ void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
|
||||
}
|
||||
// long int
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
|
||||
const int n,
|
||||
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce1<lond int>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -1999,7 +1988,7 @@ void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
|
||||
PROFILE_STOP("maxReduce1<lond int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<long int>(long int *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce2<lond int>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -2030,7 +2019,7 @@ void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
|
||||
unsigned long int *recv,
|
||||
const int n,
|
||||
int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce1<unsigned long>", profile_level);
|
||||
@@ -2048,8 +2037,7 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
|
||||
}
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
|
||||
const int n,
|
||||
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
PROFILE_START("maxReduce2<unsigned long>", profile_level);
|
||||
@@ -2073,8 +2061,8 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
|
||||
// unsigned long long int
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned long long int>(
|
||||
const unsigned long long int *send, unsigned long long int *recv,
|
||||
const int n, int *comm_rank_of_max) const {
|
||||
const unsigned long long int *send, unsigned long long int *recv, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce1<long int>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
auto x = new long long int[n];
|
||||
@@ -2101,7 +2089,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<unsigned long long int>(
|
||||
unsigned long long int *x, const int n, int *comm_rank_of_max) const {
|
||||
unsigned long long int *x, int n, int *comm_rank_of_max) const {
|
||||
auto recv = new unsigned long long int[n];
|
||||
call_maxReduce<unsigned long long int>(x, recv, n, comm_rank_of_max);
|
||||
for (int i = 0; i < n; i++)
|
||||
@@ -2111,7 +2099,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
|
||||
// long long int
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
|
||||
long long int *y, const int n,
|
||||
long long int *y, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce1<long int>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -2130,7 +2118,7 @@ void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
|
||||
PROFILE_STOP("maxReduce1<long int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
auto recv = new long long int[n];
|
||||
call_maxReduce<long long int>(x, recv, n, comm_rank_of_max);
|
||||
@@ -2140,7 +2128,7 @@ void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
|
||||
}
|
||||
// float
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
|
||||
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce1<float>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -2164,7 +2152,7 @@ void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
|
||||
PROFILE_STOP("maxReduce1<float>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<float>(float *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce2<float>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -2193,7 +2181,7 @@ void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
|
||||
}
|
||||
// double
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
|
||||
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce1<double>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -2217,7 +2205,7 @@ void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
|
||||
PROFILE_STOP("maxReduce1<double>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
|
||||
void MPI_CLASS::call_maxReduce<double>(double *x, int n,
|
||||
int *comm_rank_of_max) const {
|
||||
PROFILE_START("maxReduce2<double>", profile_level);
|
||||
if (comm_rank_of_max == nullptr) {
|
||||
@@ -2253,51 +2241,46 @@ void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
|
||||
#ifdef USE_MPI
|
||||
// char
|
||||
template <>
|
||||
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, const int n,
|
||||
const int root) const {
|
||||
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, int n,
|
||||
int root) const {
|
||||
PROFILE_START("bcast<unsigned char>", profile_level);
|
||||
MPI_Bcast(x, n, MPI_UNSIGNED_CHAR, root, communicator);
|
||||
PROFILE_STOP("bcast<unsigned char>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_bcast<char>(char *x, const int n, const int root) const {
|
||||
template <> void MPI_CLASS::call_bcast<char>(char *x, int n, int root) const {
|
||||
PROFILE_START("bcast<char>", profile_level);
|
||||
MPI_Bcast(x, n, MPI_CHAR, root, communicator);
|
||||
PROFILE_STOP("bcast<char>", profile_level);
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, const int n,
|
||||
const int root) const {
|
||||
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, int n,
|
||||
int root) const {
|
||||
PROFILE_START("bcast<unsigned int>", profile_level);
|
||||
MPI_Bcast(x, n, MPI_UNSIGNED, root, communicator);
|
||||
PROFILE_STOP("bcast<unsigned int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::call_bcast<int>(int *x, const int n, const int root) const {
|
||||
template <> void MPI_CLASS::call_bcast<int>(int *x, int n, int root) const {
|
||||
PROFILE_START("bcast<int>", profile_level);
|
||||
MPI_Bcast(x, n, MPI_INT, root, communicator);
|
||||
PROFILE_STOP("bcast<int>", profile_level);
|
||||
}
|
||||
// float
|
||||
template <>
|
||||
void MPI_CLASS::call_bcast<float>(float *x, const int n, const int root) const {
|
||||
template <> void MPI_CLASS::call_bcast<float>(float *x, int n, int root) const {
|
||||
PROFILE_START("bcast<float>", profile_level);
|
||||
MPI_Bcast(x, n, MPI_FLOAT, root, communicator);
|
||||
PROFILE_STOP("bcast<float>", profile_level);
|
||||
}
|
||||
// double
|
||||
template <>
|
||||
void MPI_CLASS::call_bcast<double>(double *x, const int n,
|
||||
const int root) const {
|
||||
void MPI_CLASS::call_bcast<double>(double *x, int n, int root) const {
|
||||
PROFILE_START("bcast<double>", profile_level);
|
||||
MPI_Bcast(x, n, MPI_DOUBLE, root, communicator);
|
||||
PROFILE_STOP("bcast<double>", profile_level);
|
||||
}
|
||||
#else
|
||||
// We need a concrete instantiation of bcast<char>(x,n,root);
|
||||
template <>
|
||||
void MPI_CLASS::call_bcast<char>(char *, const int, const int) const {}
|
||||
template <> void MPI_CLASS::call_bcast<char>(char *, int, int) const {}
|
||||
#endif
|
||||
|
||||
/************************************************************************
|
||||
@@ -2316,8 +2299,8 @@ void MPI_CLASS::barrier() const {
|
||||
#ifdef USE_MPI
|
||||
// char
|
||||
template <>
|
||||
void MPI_CLASS::send<char>(const char *buf, const int length,
|
||||
const int recv_proc_number, int tag) const {
|
||||
void MPI_CLASS::send<char>(const char *buf, int length, int recv_proc_number,
|
||||
int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
@@ -2329,8 +2312,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length,
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
void MPI_CLASS::send<int>(const int *buf, const int length,
|
||||
const int recv_proc_number, int tag) const {
|
||||
void MPI_CLASS::send<int>(const int *buf, int length, int recv_proc_number,
|
||||
int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
@@ -2341,8 +2324,8 @@ void MPI_CLASS::send<int>(const int *buf, const int length,
|
||||
}
|
||||
// float
|
||||
template <>
|
||||
void MPI_CLASS::send<float>(const float *buf, const int length,
|
||||
const int recv_proc_number, int tag) const {
|
||||
void MPI_CLASS::send<float>(const float *buf, int length, int recv_proc_number,
|
||||
int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
@@ -2354,8 +2337,8 @@ void MPI_CLASS::send<float>(const float *buf, const int length,
|
||||
}
|
||||
// double
|
||||
template <>
|
||||
void MPI_CLASS::send<double>(const double *buf, const int length,
|
||||
const int recv_proc_number, int tag) const {
|
||||
void MPI_CLASS::send<double>(const double *buf, int length,
|
||||
int recv_proc_number, int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
@@ -2368,8 +2351,7 @@ void MPI_CLASS::send<double>(const double *buf, const int length,
|
||||
#else
|
||||
// We need a concrete instantiation of send for use without MPI
|
||||
template <>
|
||||
void MPI_CLASS::send<char>(const char *buf, const int length, const int,
|
||||
int tag) const {
|
||||
void MPI_CLASS::send<char>(const char *buf, int length, int, int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
PROFILE_START("send<char>", profile_level);
|
||||
@@ -2391,8 +2373,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length, const int,
|
||||
#ifdef USE_MPI
|
||||
// char
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
|
||||
const int recv_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int recv_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2404,8 +2386,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
|
||||
const int recv_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Isend<int>(const int *buf, int length, int recv_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2417,8 +2399,8 @@ MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
|
||||
}
|
||||
// float
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
|
||||
const int recv_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Isend<float>(const float *buf, int length, int recv_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2430,8 +2412,8 @@ MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
|
||||
}
|
||||
// double
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
|
||||
const int recv_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Isend<double>(const double *buf, int length,
|
||||
int recv_proc, int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2444,8 +2426,8 @@ MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
|
||||
#else
|
||||
// We need a concrete instantiation of send for use without mpi
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
|
||||
const int tag) const {
|
||||
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
PROFILE_START("Isend<char>", profile_level);
|
||||
@@ -2472,8 +2454,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
|
||||
/************************************************************************
|
||||
* Send byte array to another processor. *
|
||||
************************************************************************/
|
||||
void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
|
||||
const int recv_proc_number, int tag) const {
|
||||
void MPI_CLASS::sendBytes(const void *buf, int number_bytes,
|
||||
int recv_proc_number, int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
send<char>((const char *)buf, number_bytes, recv_proc_number, tag);
|
||||
@@ -2482,7 +2464,7 @@ void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
|
||||
/************************************************************************
|
||||
* Non-blocking send byte array to another processor. *
|
||||
************************************************************************/
|
||||
MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
|
||||
MPI_Request MPI_CLASS::IsendBytes(const void *buf, int number_bytes,
|
||||
const int recv_proc, const int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
@@ -2496,7 +2478,7 @@ MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
|
||||
#ifdef USE_MPI
|
||||
// char
|
||||
template <>
|
||||
void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
|
||||
void MPI_CLASS::recv<char>(char *buf, int &length, int send_proc_number,
|
||||
const bool get_length, int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
@@ -2518,7 +2500,7 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
|
||||
void MPI_CLASS::recv<int>(int *buf, int &length, int send_proc_number,
|
||||
const bool get_length, int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
@@ -2540,7 +2522,7 @@ void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
|
||||
}
|
||||
// float
|
||||
template <>
|
||||
void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
|
||||
void MPI_CLASS::recv<float>(float *buf, int &length, int send_proc_number,
|
||||
const bool get_length, int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
@@ -2562,9 +2544,8 @@ void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
|
||||
}
|
||||
// double
|
||||
template <>
|
||||
void MPI_CLASS::recv<double>(double *buf, int &length,
|
||||
const int send_proc_number, const bool get_length,
|
||||
int tag) const {
|
||||
void MPI_CLASS::recv<double>(double *buf, int &length, int send_proc_number,
|
||||
const bool get_length, int tag) const {
|
||||
// Set the tag to 0 if it is < 0
|
||||
tag = (tag >= 0) ? tag : 0;
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
@@ -2586,7 +2567,7 @@ void MPI_CLASS::recv<double>(double *buf, int &length,
|
||||
#else
|
||||
// We need a concrete instantiation of recv for use without mpi
|
||||
template <>
|
||||
void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
|
||||
void MPI_CLASS::recv<char>(char *buf, int &length, int, const bool,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
@@ -2609,8 +2590,8 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
|
||||
#ifdef USE_MPI
|
||||
// char
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
|
||||
const int send_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int send_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2622,8 +2603,8 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
|
||||
}
|
||||
// int
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
|
||||
const int send_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Irecv<int>(int *buf, int length, int send_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2635,8 +2616,8 @@ MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
|
||||
}
|
||||
// float
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
|
||||
const int send_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Irecv<float>(float *buf, int length, int send_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2648,8 +2629,8 @@ MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
|
||||
}
|
||||
// double
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
|
||||
const int send_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::Irecv<double>(double *buf, int length, int send_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
MPI_Request request;
|
||||
@@ -2662,8 +2643,7 @@ MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
|
||||
#else
|
||||
// We need a concrete instantiation of irecv for use without mpi
|
||||
template <>
|
||||
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
|
||||
const int tag) const {
|
||||
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int, int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
PROFILE_START("Irecv<char>", profile_level);
|
||||
@@ -2690,7 +2670,7 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
|
||||
/************************************************************************
|
||||
* Recieve byte array to another processor. *
|
||||
************************************************************************/
|
||||
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
|
||||
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, int send_proc,
|
||||
int tag) const {
|
||||
recv<char>((char *)buf, number_bytes, send_proc, false, tag);
|
||||
}
|
||||
@@ -2698,8 +2678,8 @@ void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
|
||||
/************************************************************************
|
||||
* Recieve byte array to another processor. *
|
||||
************************************************************************/
|
||||
MPI_Request MPI_CLASS::IrecvBytes(void *buf, const int number_bytes,
|
||||
const int send_proc, const int tag) const {
|
||||
MPI_Request MPI_CLASS::IrecvBytes(void *buf, int number_bytes, int send_proc,
|
||||
int tag) const {
|
||||
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
|
||||
MPI_INSIST(tag >= 0, "tag must be >= 0");
|
||||
return Irecv<char>((char *)buf, number_bytes, send_proc, tag);
|
||||
@@ -2913,7 +2893,7 @@ void MPI_CLASS::call_allGather<char>(const char *, int, char *, int *,
|
||||
************************************************************************/
|
||||
#ifdef USE_MPI
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
|
||||
void MPI_CLASS::allToAll<unsigned char>(int n, const unsigned char *send,
|
||||
unsigned char *recv) const {
|
||||
PROFILE_START("allToAll<unsigned char>", profile_level);
|
||||
MPI_Alltoall((void *)send, n, MPI_UNSIGNED_CHAR, (void *)recv, n,
|
||||
@@ -2921,15 +2901,14 @@ void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
|
||||
PROFILE_STOP("allToAll<unsigned char>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<char>(const int n, const char *send,
|
||||
char *recv) const {
|
||||
void MPI_CLASS::allToAll<char>(int n, const char *send, char *recv) const {
|
||||
PROFILE_START("allToAll<char>", profile_level);
|
||||
MPI_Alltoall((void *)send, n, MPI_CHAR, (void *)recv, n, MPI_CHAR,
|
||||
communicator);
|
||||
PROFILE_STOP("allToAll<char>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
|
||||
void MPI_CLASS::allToAll<unsigned int>(int n, const unsigned int *send,
|
||||
unsigned int *recv) const {
|
||||
PROFILE_START("allToAll<unsigned int>", profile_level);
|
||||
MPI_Alltoall((void *)send, n, MPI_UNSIGNED, (void *)recv, n, MPI_UNSIGNED,
|
||||
@@ -2937,14 +2916,14 @@ void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
|
||||
PROFILE_STOP("allToAll<unsigned int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<int>(const int n, const int *send, int *recv) const {
|
||||
void MPI_CLASS::allToAll<int>(int n, const int *send, int *recv) const {
|
||||
PROFILE_START("allToAll<int>", profile_level);
|
||||
MPI_Alltoall((void *)send, n, MPI_INT, (void *)recv, n, MPI_INT,
|
||||
communicator);
|
||||
PROFILE_STOP("allToAll<int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<unsigned long int>(const int n,
|
||||
void MPI_CLASS::allToAll<unsigned long int>(int n,
|
||||
const unsigned long int *send,
|
||||
unsigned long int *recv) const {
|
||||
PROFILE_START("allToAll<unsigned long>", profile_level);
|
||||
@@ -2953,7 +2932,7 @@ void MPI_CLASS::allToAll<unsigned long int>(const int n,
|
||||
PROFILE_STOP("allToAll<unsigned long>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
|
||||
void MPI_CLASS::allToAll<long int>(int n, const long int *send,
|
||||
long int *recv) const {
|
||||
PROFILE_START("allToAll<long int>", profile_level);
|
||||
MPI_Alltoall((void *)send, n, MPI_LONG, (void *)recv, n, MPI_LONG,
|
||||
@@ -2961,15 +2940,14 @@ void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
|
||||
PROFILE_STOP("allToAll<long int>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<float>(const int n, const float *send,
|
||||
float *recv) const {
|
||||
void MPI_CLASS::allToAll<float>(int n, const float *send, float *recv) const {
|
||||
PROFILE_START("allToAll<float>", profile_level);
|
||||
MPI_Alltoall((void *)send, n, MPI_FLOAT, (void *)recv, n, MPI_FLOAT,
|
||||
communicator);
|
||||
PROFILE_STOP("allToAll<float>", profile_level);
|
||||
}
|
||||
template <>
|
||||
void MPI_CLASS::allToAll<double>(const int n, const double *send,
|
||||
void MPI_CLASS::allToAll<double>(int n, const double *send,
|
||||
double *recv) const {
|
||||
PROFILE_START("allToAll<double>", profile_level);
|
||||
MPI_Alltoall((void *)send, n, MPI_DOUBLE, (void *)recv, n, MPI_DOUBLE,
|
||||
@@ -3713,4 +3691,28 @@ MPI MPI::loadBalance(double local, std::vector<double> work) {
|
||||
return split(0, key[getRank()]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/****************************************************************************
|
||||
* Function Persistent Communication *
|
||||
****************************************************************************/
|
||||
template <>
|
||||
std::shared_ptr<MPI_Request> MPI::Isend_init<double>(const double *buf, int N, int proc, int tag) const
|
||||
{
|
||||
std::shared_ptr<MPI_Request> obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } );
|
||||
MPI_Send_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
|
||||
return obj;
|
||||
}
|
||||
template<>
|
||||
std::shared_ptr<MPI_Request> MPI::Irecv_init<double>(double *buf, int N, int proc, int tag) const
|
||||
{
|
||||
std::shared_ptr<MPI_Request> obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } );
|
||||
MPI_Recv_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
|
||||
return obj;
|
||||
}
|
||||
void MPI::Start( MPI_Request &request )
|
||||
{
|
||||
MPI_Start( &request );
|
||||
}
|
||||
|
||||
} // namespace Utilities
|
||||
|
||||
115
common/MPI.h
115
common/MPI.h
@@ -26,6 +26,7 @@ redistribution is prohibited.
|
||||
#include <atomic>
|
||||
#include <complex>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@@ -173,10 +174,9 @@ public: // Member functions
|
||||
*
|
||||
*/
|
||||
static void
|
||||
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD),
|
||||
const int method = 1,
|
||||
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD), int method = 1,
|
||||
const std::vector<int> &procs = std::vector<int>(),
|
||||
const int N_min = 1, const int N_max = -1);
|
||||
int N_min = 1, int N_max = -1);
|
||||
|
||||
//! Query the level of thread support
|
||||
static ThreadSupport queryThreadSupport();
|
||||
@@ -420,7 +420,7 @@ public: // Member functions
|
||||
* \param x The input/output array for the reduce
|
||||
* \param n The number of values in the array (must match on all nodes)
|
||||
*/
|
||||
template <class type> void sumReduce(type *x, const int n = 1) const;
|
||||
template <class type> void sumReduce(type *x, int n = 1) const;
|
||||
|
||||
/**
|
||||
* \brief Sum Reduce
|
||||
@@ -432,7 +432,7 @@ public: // Member functions
|
||||
* \param n The number of values in the array (must match on all nodes)
|
||||
*/
|
||||
template <class type>
|
||||
void sumReduce(const type *x, type *y, const int n = 1) const;
|
||||
void sumReduce(const type *x, type *y, int n = 1) const;
|
||||
|
||||
/**
|
||||
* \brief Min Reduce
|
||||
@@ -457,7 +457,7 @@ public: // Member functions
|
||||
* minimum value
|
||||
*/
|
||||
template <class type>
|
||||
void minReduce(type *x, const int n = 1, int *rank_of_min = nullptr) const;
|
||||
void minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
|
||||
|
||||
/**
|
||||
* \brief Sum Reduce
|
||||
@@ -475,7 +475,7 @@ public: // Member functions
|
||||
* minimum value
|
||||
*/
|
||||
template <class type>
|
||||
void minReduce(const type *x, type *y, const int n = 1,
|
||||
void minReduce(const type *x, type *y, int n = 1,
|
||||
int *rank_of_min = nullptr) const;
|
||||
|
||||
/**
|
||||
@@ -501,7 +501,7 @@ public: // Member functions
|
||||
* minimum value
|
||||
*/
|
||||
template <class type>
|
||||
void maxReduce(type *x, const int n = 1, int *rank_of_max = nullptr) const;
|
||||
void maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
|
||||
|
||||
/**
|
||||
* \brief Sum Reduce
|
||||
@@ -519,7 +519,7 @@ public: // Member functions
|
||||
* minimum value
|
||||
*/
|
||||
template <class type>
|
||||
void maxReduce(const type *x, type *y, const int n = 1,
|
||||
void maxReduce(const type *x, type *y, int n = 1,
|
||||
int *rank_of_max = nullptr) const;
|
||||
|
||||
/**
|
||||
@@ -530,8 +530,7 @@ public: // Member functions
|
||||
* \param y The output array for the scan
|
||||
* \param n The number of values in the array (must match on all nodes)
|
||||
*/
|
||||
template <class type>
|
||||
void sumScan(const type *x, type *y, const int n = 1) const;
|
||||
template <class type> void sumScan(const type *x, type *y, int n = 1) const;
|
||||
|
||||
/**
|
||||
* \brief Scan Min Reduce
|
||||
@@ -541,8 +540,7 @@ public: // Member functions
|
||||
* \param y The output array for the scan
|
||||
* \param n The number of values in the array (must match on all nodes)
|
||||
*/
|
||||
template <class type>
|
||||
void minScan(const type *x, type *y, const int n = 1) const;
|
||||
template <class type> void minScan(const type *x, type *y, int n = 1) const;
|
||||
|
||||
/**
|
||||
* \brief Scan Max Reduce
|
||||
@@ -552,8 +550,7 @@ public: // Member functions
|
||||
* \param y The output array for the scan
|
||||
* \param n The number of values in the array (must match on all nodes)
|
||||
*/
|
||||
template <class type>
|
||||
void maxScan(const type *x, type *y, const int n = 1) const;
|
||||
template <class type> void maxScan(const type *x, type *y, int n = 1) const;
|
||||
|
||||
/**
|
||||
* \brief Broadcast
|
||||
@@ -561,7 +558,7 @@ public: // Member functions
|
||||
* \param value The input value for the broadcast.
|
||||
* \param root The processor performing the broadcast
|
||||
*/
|
||||
template <class type> type bcast(const type &value, const int root) const;
|
||||
template <class type> type bcast(const type &value, int root) const;
|
||||
|
||||
/**
|
||||
* \brief Broadcast
|
||||
@@ -570,8 +567,7 @@ public: // Member functions
|
||||
* \param n The number of values in the array (must match on all nodes)
|
||||
* \param root The processor performing the broadcast
|
||||
*/
|
||||
template <class type>
|
||||
void bcast(type *value, const int n, const int root) const;
|
||||
template <class type> void bcast(type *value, int n, int root) const;
|
||||
|
||||
/**
|
||||
* Perform a global barrier across all processors.
|
||||
@@ -595,8 +591,7 @@ public: // Member functions
|
||||
* The matching recv must share this tag.
|
||||
*/
|
||||
template <class type>
|
||||
void send(const type *buf, const int length, const int recv,
|
||||
int tag = 0) const;
|
||||
void send(const type *buf, int length, int recv, int tag = 0) const;
|
||||
|
||||
/*!
|
||||
* @brief This function sends an MPI message with an array of bytes
|
||||
@@ -611,8 +606,7 @@ public: // Member functions
|
||||
* to be sent with this message. Default tag is 0.
|
||||
* The matching recv must share this tag.
|
||||
*/
|
||||
void sendBytes(const void *buf, const int N_bytes, const int recv,
|
||||
int tag = 0) const;
|
||||
void sendBytes(const void *buf, int N_bytes, int recv, int tag = 0) const;
|
||||
|
||||
/*!
|
||||
* @brief This function sends an MPI message with an array
|
||||
@@ -627,8 +621,8 @@ public: // Member functions
|
||||
* to be sent with this message.
|
||||
*/
|
||||
template <class type>
|
||||
MPI_Request Isend(const type *buf, const int length, const int recv_proc,
|
||||
const int tag) const;
|
||||
MPI_Request Isend(const type *buf, int length, int recv_proc,
|
||||
int tag) const;
|
||||
|
||||
/*!
|
||||
* @brief This function sends an MPI message with an array of bytes
|
||||
@@ -642,8 +636,8 @@ public: // Member functions
|
||||
* @param tag Integer argument specifying an integer tag
|
||||
* to be sent with this message.
|
||||
*/
|
||||
MPI_Request IsendBytes(const void *buf, const int N_bytes,
|
||||
const int recv_proc, const int tag) const;
|
||||
MPI_Request IsendBytes(const void *buf, int N_bytes, int recv_proc,
|
||||
int tag) const;
|
||||
|
||||
/*!
|
||||
* @brief This function receives an MPI message with a data
|
||||
@@ -662,7 +656,7 @@ public: // Member functions
|
||||
* by the tag of the incoming message. Default tag is 0.
|
||||
*/
|
||||
template <class type>
|
||||
inline void recv(type *buf, int length, const int send, int tag) const {
|
||||
inline void recv(type *buf, int length, int send, int tag) const {
|
||||
int length2 = length;
|
||||
recv(buf, length2, send, false, tag);
|
||||
}
|
||||
@@ -687,7 +681,7 @@ public: // Member functions
|
||||
* by the tag of the incoming message. Default tag is 0.
|
||||
*/
|
||||
template <class type>
|
||||
void recv(type *buf, int &length, const int send, const bool get_length,
|
||||
void recv(type *buf, int &length, int send, const bool get_length,
|
||||
int tag) const;
|
||||
|
||||
/*!
|
||||
@@ -703,7 +697,7 @@ public: // Member functions
|
||||
* must be matched by the tag of the incoming message. Default
|
||||
* tag is 0.
|
||||
*/
|
||||
void recvBytes(void *buf, int &N_bytes, const int send, int tag = 0) const;
|
||||
void recvBytes(void *buf, int &N_bytes, int send, int tag = 0) const;
|
||||
|
||||
/*!
|
||||
* @brief This function receives an MPI message with a data
|
||||
@@ -716,8 +710,7 @@ public: // Member functions
|
||||
* be matched by the tag of the incoming message.
|
||||
*/
|
||||
template <class type>
|
||||
MPI_Request Irecv(type *buf, const int length, const int send_proc,
|
||||
const int tag) const;
|
||||
MPI_Request Irecv(type *buf, int length, int send_proc, int tag) const;
|
||||
|
||||
/*!
|
||||
* @brief This function receives an MPI message with an array of
|
||||
@@ -731,8 +724,8 @@ public: // Member functions
|
||||
* @param tag Integer argument specifying a tag which must
|
||||
* be matched by the tag of the incoming message.
|
||||
*/
|
||||
MPI_Request IrecvBytes(void *buf, const int N_bytes, const int send_proc,
|
||||
const int tag) const;
|
||||
MPI_Request IrecvBytes(void *buf, int N_bytes, int send_proc,
|
||||
int tag) const;
|
||||
|
||||
/*!
|
||||
* @brief This function sends and recieves data using a blocking call
|
||||
@@ -741,6 +734,39 @@ public: // Member functions
|
||||
void sendrecv(const type *sendbuf, int sendcount, int dest, int sendtag,
|
||||
type *recvbuf, int recvcount, int source, int recvtag) const;
|
||||
|
||||
/*!
|
||||
* @brief This function sets up an Isend call (see MPI_Send_init)
|
||||
* @param buf Pointer to array buffer with length integers.
|
||||
* @param length Number of integers in buf that we want to send.
|
||||
* @param recv_proc Receiving processor number.
|
||||
* @param tag Tag to send
|
||||
* @return Returns an MPI_Request.
|
||||
* Note this returns a unique pointer so the user does not
|
||||
* need to manually free the request
|
||||
*/
|
||||
template <class type>
|
||||
std::shared_ptr<MPI_Request> Isend_init(const type *buf, int length, int recv_proc,
|
||||
int tag) const;
|
||||
|
||||
/*!
|
||||
* @brief This function sets up an Irecv call (see MPI_Recv_init)
|
||||
* @param buf Pointer to integer array buffer with capacity of length integers.
|
||||
* @param length Maximum number of values that can be stored in buf.
|
||||
* @param send_proc Processor number of sender.
|
||||
* @param tag Tag to match
|
||||
* @return Returns an MPI_Request.
|
||||
* Note this returns a unique pointer so the user does not
|
||||
* need to manually free the request
|
||||
*/
|
||||
template <class type>
|
||||
std::shared_ptr<MPI_Request> Irecv_init(type *buf, int length, int send_proc, int tag) const;
|
||||
|
||||
/*!
|
||||
* @brief Start the MPI communication
|
||||
* @param request Request to start
|
||||
*/
|
||||
void Start( MPI_Request &request );
|
||||
|
||||
/*!
|
||||
* Each processor sends every other processor a single value.
|
||||
* @param[in] x Input value for allGather
|
||||
@@ -792,7 +818,7 @@ public: // Member functions
|
||||
* and the sizes and displacements will be returned (if desired).
|
||||
*/
|
||||
template <class type>
|
||||
int allGather(const type *send_data, const int send_cnt, type *recv_data,
|
||||
int allGather(const type *send_data, int send_cnt, type *recv_data,
|
||||
int *recv_cnt = nullptr, int *recv_disp = nullptr,
|
||||
bool known_recv = false) const;
|
||||
|
||||
@@ -822,7 +848,7 @@ public: // Member functions
|
||||
* @param recv_data Output array of received values (nxN)
|
||||
*/
|
||||
template <class type>
|
||||
void allToAll(const int n, const type *send_data, type *recv_data) const;
|
||||
void allToAll(int n, const type *send_data, type *recv_data) const;
|
||||
|
||||
/*!
|
||||
* Each processor sends an array of data to the different processors.
|
||||
@@ -995,23 +1021,20 @@ public: // Member functions
|
||||
MPI loadBalance(double localPerformance, std::vector<double> work);
|
||||
|
||||
private: // Private helper functions for templated MPI operations;
|
||||
template <class type> void call_sumReduce(type *x, const int n = 1) const;
|
||||
template <class type> void call_sumReduce(type *x, int n = 1) const;
|
||||
template <class type>
|
||||
void call_sumReduce(const type *x, type *y, const int n = 1) const;
|
||||
void call_sumReduce(const type *x, type *y, int n = 1) const;
|
||||
template <class type>
|
||||
void call_minReduce(type *x, const int n = 1,
|
||||
void call_minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
|
||||
template <class type>
|
||||
void call_minReduce(const type *x, type *y, int n = 1,
|
||||
int *rank_of_min = nullptr) const;
|
||||
template <class type>
|
||||
void call_minReduce(const type *x, type *y, const int n = 1,
|
||||
int *rank_of_min = nullptr) const;
|
||||
void call_maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
|
||||
template <class type>
|
||||
void call_maxReduce(type *x, const int n = 1,
|
||||
void call_maxReduce(const type *x, type *y, int n = 1,
|
||||
int *rank_of_max = nullptr) const;
|
||||
template <class type>
|
||||
void call_maxReduce(const type *x, type *y, const int n = 1,
|
||||
int *rank_of_max = nullptr) const;
|
||||
template <class type>
|
||||
void call_bcast(type *x, const int n, const int root) const;
|
||||
template <class type> void call_bcast(type *x, int n, int root) const;
|
||||
template <class type>
|
||||
void call_allGather(const type &x_in, type *x_out) const;
|
||||
template <class type>
|
||||
|
||||
1429
common/Membrane.cpp
Normal file
1429
common/Membrane.cpp
Normal file
File diff suppressed because it is too large
Load Diff
186
common/Membrane.h
Normal file
186
common/Membrane.h
Normal file
@@ -0,0 +1,186 @@
|
||||
/* Flow adaptor class for multiphase flow methods */
|
||||
|
||||
#ifndef ScaLBL_Membrane_INC
|
||||
#define ScaLBL_Membrane_INC
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <iostream>
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
|
||||
#include "common/ScaLBL.h"
|
||||
|
||||
/**
|
||||
* \brief Unpack D3Q19 distributions after communication using links determined based on membrane location
|
||||
* @param q - index for distribution based on D3Q19 discrete velocity structure
|
||||
* @param list - list of distributions to communicate
|
||||
* @param links - list of active links based on the membrane location
|
||||
* @param start - index to start parsing the list
|
||||
* @param count - number of values to unppack
|
||||
* @param recvbuf - memory buffer where recieved values have been stored
|
||||
* @param dist - memory buffer to hold the distributions
|
||||
* @param N - size of the distributions (derived from Domain structure)
|
||||
*/
|
||||
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int count, double *recvbuf, double *dist, int N);
|
||||
|
||||
|
||||
/**
|
||||
* \brief Set custom link rules for D3Q19 distribution based on membrane location
|
||||
* @param q - index for distribution based on D3Q19 discrete velocity structure
|
||||
* @param list - list of distributions to communicate
|
||||
* @param links - list of active links based on the membrane location
|
||||
* @param coef - coefficient to determine the local mass transport for each membrane link
|
||||
* @param start - index to start parsing the list
|
||||
* @param offset - offset to start reading membrane links
|
||||
* @param count - number of values to unppack
|
||||
* @param recvbuf - memory buffer where recieved values have been stored
|
||||
* @param dist - memory buffer to hold the distributions
|
||||
* @param N - size of the distributions (derived from Domain structure)
|
||||
*/
|
||||
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
|
||||
int linkCount, double *recvbuf, double *dist, int N);
|
||||
|
||||
/**
|
||||
* \class Membrane
|
||||
* @brief
|
||||
* The Membrane class operates on ScaLBL data structures to insert membrane
|
||||
*
|
||||
*/
|
||||
|
||||
class Membrane {
|
||||
public:
|
||||
int Np;
|
||||
int Nx,Ny,Nz,N;
|
||||
int membraneLinkCount;
|
||||
|
||||
int *initialNeighborList; // original neighborlist
|
||||
int *NeighborList; // modified neighborlist
|
||||
|
||||
/* host data structures */
|
||||
int *membraneLinks; // D3Q19 links that cross membrane
|
||||
int *membraneTag; // label each link in the membrane
|
||||
double *membraneDist; // distance to membrane for each linked site
|
||||
double *membraneOrientation; // distance to membrane for each linked site
|
||||
|
||||
/*
|
||||
* Device data structures
|
||||
*/
|
||||
int *MembraneLinks;
|
||||
double *MembraneCoef; // mass transport coefficient for the membrane
|
||||
double *MembraneDistance;
|
||||
double *MembraneOrientation;
|
||||
|
||||
/**
|
||||
* \brief Create a flow adaptor to operate on the LB model
|
||||
* @param ScaLBL - originating data structures
|
||||
* @param neighborList - list of neighbors for each site
|
||||
*/
|
||||
Membrane(std::shared_ptr <Domain> Dm, int *initialNeighborList, int Nsites);
|
||||
|
||||
/**
|
||||
* \brief Destructor
|
||||
*/
|
||||
~Membrane();
|
||||
|
||||
/**
|
||||
* \brief Create membrane
|
||||
* \details Create membrane structure from signed distance function
|
||||
* @param Dm - domain structure
|
||||
* @param Distance - signed distance to membrane
|
||||
* @param Map - mapping between regular layout and compact layout
|
||||
*/
|
||||
int Create(std::shared_ptr <Domain> Dm, DoubleArray &Distance, IntArray &Map);
|
||||
|
||||
void SendD3Q19AA(double *dist);
|
||||
void RecvD3Q19AA(double *dist);
|
||||
void SendD3Q7AA(double *dist);
|
||||
void RecvD3Q7AA(double *dist);
|
||||
void AssignCoefficients(int *Map, double *Psi, std::string method);
|
||||
void IonTransport(double *dist, double *den);
|
||||
//......................................................................................
|
||||
// Buffers to store data sent and recieved by this MPI process
|
||||
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z;
|
||||
double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ;
|
||||
double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ;
|
||||
double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z;
|
||||
double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ;
|
||||
double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ;
|
||||
//......................................................................................
|
||||
|
||||
private:
|
||||
bool Lock; // use Lock to make sure only one call at a time to protect data in transit
|
||||
int sendtag, recvtag;
|
||||
int iproc,jproc,kproc;
|
||||
int nprocx,nprocy,nprocz;
|
||||
// Give the object it's own MPI communicator
|
||||
RankInfoStruct rank_info;
|
||||
Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain
|
||||
MPI_Request req1[18],req2[18];
|
||||
/**
|
||||
* \brief Set up membrane communication
|
||||
* \details associate p2p communication links to membrane where necessary
|
||||
* returns the number of membrane links
|
||||
* regular communications are stored in the first part of the list
|
||||
* membrane communications are stored in the last part of the list
|
||||
* @param Cqx - discrete velocity (x)
|
||||
* @param Cqy - discrete velocity (y)
|
||||
* @param Cqz - discrete velocity (z)
|
||||
* @param list - list of recieved values
|
||||
* @param count - number recieved values
|
||||
* @param d3q19_recvlist - device array with the saved list
|
||||
* @param d3q19_linkList - sorted list with regular and membrane links
|
||||
* @param Distance - signed distance to membrane
|
||||
* @param Map - data structure used to define mapping between dense and sparse representation
|
||||
* */
|
||||
int D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count,
|
||||
int *d3q19_recvlist, int *d3q19_linkList, DoubleArray &Distance, IntArray &Map);
|
||||
//......................................................................................
|
||||
// MPI ranks for all 18 neighbors
|
||||
//......................................................................................
|
||||
// These variables are all private to prevent external things from modifying them!!
|
||||
//......................................................................................
|
||||
int rank;
|
||||
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
|
||||
int rank_xy,rank_XY,rank_xY,rank_Xy;
|
||||
int rank_xz,rank_XZ,rank_xZ,rank_Xz;
|
||||
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
|
||||
//......................................................................................
|
||||
int SendCount, RecvCount, CommunicationCount;
|
||||
//......................................................................................
|
||||
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
|
||||
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
|
||||
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
|
||||
//......................................................................................
|
||||
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
|
||||
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
|
||||
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
|
||||
//......................................................................................
|
||||
int linkCount_x[5], linkCount_y[5], linkCount_z[5], linkCount_X[5], linkCount_Y[5], linkCount_Z[5];
|
||||
int linkCount_xy, linkCount_yz, linkCount_xz, linkCount_Xy, linkCount_Yz, linkCount_xZ;
|
||||
int linkCount_xY, linkCount_yZ, linkCount_Xz, linkCount_XY, linkCount_YZ, linkCount_XZ;
|
||||
//......................................................................................
|
||||
// Send buffers that reside on the compute device
|
||||
int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z;
|
||||
int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ;
|
||||
int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ;
|
||||
// Recieve buffers that reside on the compute device
|
||||
int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z;
|
||||
int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ;
|
||||
int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ;
|
||||
// Link lists that reside on the compute device
|
||||
int *dvcRecvLinks_x, *dvcRecvLinks_y, *dvcRecvLinks_z, *dvcRecvLinks_X, *dvcRecvLinks_Y, *dvcRecvLinks_Z;
|
||||
int *dvcRecvLinks_xy, *dvcRecvLinks_yz, *dvcRecvLinks_xz, *dvcRecvLinks_Xy, *dvcRecvLinks_Yz, *dvcRecvLinks_xZ;
|
||||
int *dvcRecvLinks_xY, *dvcRecvLinks_yZ, *dvcRecvLinks_Xz, *dvcRecvLinks_XY, *dvcRecvLinks_YZ, *dvcRecvLinks_XZ;
|
||||
// Recieve buffers for the distributions
|
||||
int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z;
|
||||
int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
|
||||
int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
|
||||
//......................................................................................
|
||||
// mass transfer coefficient arrays
|
||||
double *coefficient_x, *coefficient_X, *coefficient_y, *coefficient_Y, *coefficient_z, *coefficient_Z;
|
||||
//......................................................................................
|
||||
|
||||
};
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
|
||||
Copyright 2013--2022 James E. McClure, Virginia Polytechnic & State University
|
||||
Copyright Equnior ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
@@ -15,10 +15,8 @@
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "common/ScaLBL.h"
|
||||
|
||||
#include <chrono>
|
||||
|
||||
|
||||
ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
|
||||
//......................................................................................
|
||||
Lock=false; // unlock the communicator
|
||||
@@ -309,12 +307,12 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
|
||||
MPI_COMM_SCALBL.barrier();
|
||||
ScaLBL_DeviceBarrier();
|
||||
//......................................................................................
|
||||
SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+
|
||||
SendCount = 5*(sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z)+
|
||||
sendCount_xy+sendCount_Xy+sendCount_xY+sendCount_XY+
|
||||
sendCount_xZ+sendCount_Xz+sendCount_xZ+sendCount_XZ+
|
||||
sendCount_yz+sendCount_Yz+sendCount_yZ+sendCount_YZ;
|
||||
|
||||
RecvCount = recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z+
|
||||
RecvCount = 5*(recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z)+
|
||||
recvCount_xy+recvCount_Xy+recvCount_xY+recvCount_XY+
|
||||
recvCount_xZ+recvCount_Xz+recvCount_xZ+recvCount_XZ+
|
||||
recvCount_yz+recvCount_Yz+recvCount_yZ+recvCount_YZ;
|
||||
@@ -322,8 +320,49 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
|
||||
CommunicationCount = SendCount+RecvCount;
|
||||
//......................................................................................
|
||||
|
||||
}
|
||||
|
||||
//...................................................................................
|
||||
// Set up the persistent communication for D3Q19AA (use tags 130-145)
|
||||
//...................................................................................
|
||||
req_D3Q19AA.clear();
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_x, 5*sendCount_x, rank_x, 130 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_X, 5*recvCount_X, rank_X, 130 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_X, 5*sendCount_X, rank_X, 131 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_x, 5*recvCount_x, rank_x, 131 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_y, 5*sendCount_y, rank_y, 132 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Y, 5*recvCount_Y, rank_Y, 132 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Y, 5*sendCount_Y, rank_Y, 133 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_y, 5*recvCount_y, rank_y, 133 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_z, 5*sendCount_z, rank_z, 134 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Z, 5*recvCount_Z, rank_Z, 134 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Z, 5*sendCount_Z, rank_Z, 135 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_z, 5*recvCount_z, rank_z, 135 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xy, sendCount_xy, rank_xy, 136 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XY, recvCount_XY, rank_XY, 136 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XY, sendCount_XY, rank_XY, 137 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xy, recvCount_xy, rank_xy, 137 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xy, sendCount_Xy, rank_Xy, 138 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xY, recvCount_xY, rank_xY, 138 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xY, sendCount_xY, rank_xY, 139 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xy, recvCount_Xy, rank_Xy, 139 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xz, sendCount_xz, rank_xz, 140 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XZ, recvCount_XZ, rank_XZ, 140 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xZ, sendCount_xZ, rank_xZ, 143 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xz, recvCount_Xz, rank_Xz, 143 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xz, sendCount_Xz, rank_Xz, 142 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xZ, recvCount_xZ, rank_xZ, 142 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XZ, sendCount_XZ, rank_XZ, 141 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xz, recvCount_xz, rank_xz, 141 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yz, sendCount_yz, rank_yz, 144 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_YZ, recvCount_YZ, rank_YZ, 144 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yZ, sendCount_yZ, rank_yZ, 147 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Yz, recvCount_Yz, rank_Yz, 147 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Yz, sendCount_Yz, rank_Yz, 146 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yZ, recvCount_yZ, rank_yZ, 146 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_YZ, sendCount_YZ, rank_YZ, 145 ) );
|
||||
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yz, recvCount_yz, rank_yz, 145 ) );
|
||||
|
||||
}
|
||||
|
||||
ScaLBL_Communicator::~ScaLBL_Communicator()
|
||||
{
|
||||
@@ -419,6 +458,22 @@ ScaLBL_Communicator::~ScaLBL_Communicator()
|
||||
ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz );
|
||||
ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ );
|
||||
}
|
||||
|
||||
|
||||
void ScaLBL_Communicator::start( std::vector<std::shared_ptr<MPI_Request>>& requests )
|
||||
{
|
||||
for ( auto& req : requests )
|
||||
MPI_COMM_SCALBL.Start( *req );
|
||||
}
|
||||
void ScaLBL_Communicator::wait( std::vector<std::shared_ptr<MPI_Request>>& requests )
|
||||
{
|
||||
std::vector<MPI_Request> request2;
|
||||
for ( auto& req : requests )
|
||||
request2.push_back( *req );
|
||||
MPI_COMM_SCALBL.waitAll( request2.size(), request2.data() );
|
||||
}
|
||||
|
||||
|
||||
double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){
|
||||
/* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/
|
||||
/* use MRT kernels to check performance without communication / synchronization */
|
||||
@@ -830,8 +885,6 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis
|
||||
idx=Map(n);
|
||||
//if (rank == 0) printf("r: mapped n=%d\n",idx);
|
||||
TempBuffer[i]=idx;
|
||||
|
||||
|
||||
}
|
||||
ScaLBL_CopyToDevice(dvcRecvDist_x,TempBuffer,5*recvCount_x*sizeof(int));
|
||||
|
||||
@@ -988,7 +1041,6 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis
|
||||
return(Np);
|
||||
}
|
||||
|
||||
|
||||
void ScaLBL_Communicator::SetupBounceBackList(IntArray &Map, signed char *id, int Np, bool SlippingVelBC)
|
||||
{
|
||||
|
||||
@@ -1390,15 +1442,12 @@ void ScaLBL_Communicator::SolidSlippingVelocityBCD3Q19(double *fq, double *zeta_
|
||||
|
||||
void ScaLBL_Communicator::SendD3Q19AA(double *dist){
|
||||
|
||||
// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
|
||||
if (Lock==true){
|
||||
ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?");
|
||||
}
|
||||
else{
|
||||
Lock=true;
|
||||
}
|
||||
// assign tag of 19 to D3Q19 communication
|
||||
sendtag = recvtag = 19;
|
||||
ScaLBL_DeviceBarrier();
|
||||
// Pack the distributions
|
||||
//...Packing for x face(2,8,10,12,14)................................
|
||||
@@ -1408,8 +1457,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
|
||||
ScaLBL_D3Q19_Pack(12,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,dist,N);
|
||||
ScaLBL_D3Q19_Pack(14,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,dist,N);
|
||||
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag);
|
||||
//...Packing for X face(1,7,9,11,13)................................
|
||||
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,N);
|
||||
ScaLBL_D3Q19_Pack(7,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,dist,N);
|
||||
@@ -1417,8 +1464,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
|
||||
ScaLBL_D3Q19_Pack(11,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,dist,N);
|
||||
ScaLBL_D3Q19_Pack(13,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,dist,N);
|
||||
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag);
|
||||
//...Packing for y face(4,8,9,16,18).................................
|
||||
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,N);
|
||||
ScaLBL_D3Q19_Pack(8,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,dist,N);
|
||||
@@ -1426,8 +1471,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
|
||||
ScaLBL_D3Q19_Pack(16,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,dist,N);
|
||||
ScaLBL_D3Q19_Pack(18,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,dist,N);
|
||||
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag);
|
||||
//...Packing for Y face(3,7,10,15,17).................................
|
||||
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,N);
|
||||
ScaLBL_D3Q19_Pack(7,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
|
||||
@@ -1435,78 +1478,52 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
|
||||
ScaLBL_D3Q19_Pack(15,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
|
||||
ScaLBL_D3Q19_Pack(17,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
|
||||
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag);
|
||||
//...Packing for z face(6,12,13,16,17)................................
|
||||
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(12,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(13,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(16,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(17,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,dist,N);
|
||||
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag);
|
||||
|
||||
|
||||
//...Packing for Z face(5,11,14,15,18)................................
|
||||
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(11,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(14,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(15,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
|
||||
ScaLBL_D3Q19_Pack(18,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
|
||||
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag);
|
||||
|
||||
//...Pack the xy edge (8)................................
|
||||
|
||||
//...Pack the xy edge (8)................................
|
||||
ScaLBL_D3Q19_Pack(8,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,dist,N);
|
||||
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag);
|
||||
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag);
|
||||
//...Pack the Xy edge (9)................................
|
||||
ScaLBL_D3Q19_Pack(9,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,dist,N);
|
||||
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag);
|
||||
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag);
|
||||
//...Pack the xY edge (10)................................
|
||||
ScaLBL_D3Q19_Pack(10,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,dist,N);
|
||||
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag);
|
||||
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag);
|
||||
//...Pack the XY edge (7)................................
|
||||
ScaLBL_D3Q19_Pack(7,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,dist,N);
|
||||
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag);
|
||||
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag);
|
||||
//...Pack the xz edge (12)................................
|
||||
ScaLBL_D3Q19_Pack(12,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,dist,N);
|
||||
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag);
|
||||
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag);
|
||||
|
||||
//...Pack the xZ edge (14)................................
|
||||
ScaLBL_D3Q19_Pack(14,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,dist,N);
|
||||
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag);
|
||||
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag);
|
||||
//...Pack the Xz edge (13)................................
|
||||
ScaLBL_D3Q19_Pack(13,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,dist,N);
|
||||
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag);
|
||||
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag);
|
||||
|
||||
//...Pack the XZ edge (11)................................
|
||||
ScaLBL_D3Q19_Pack(11,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,dist,N);
|
||||
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag);
|
||||
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag);
|
||||
//...Pack the yz edge (16)................................
|
||||
ScaLBL_D3Q19_Pack(16,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,dist,N);
|
||||
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag);
|
||||
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag);
|
||||
//...Pack the yZ edge (18)................................
|
||||
ScaLBL_D3Q19_Pack(18,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,dist,N);
|
||||
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag);
|
||||
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag);
|
||||
//...Pack the Yz edge (17)................................
|
||||
ScaLBL_D3Q19_Pack(17,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,dist,N);
|
||||
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag);
|
||||
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag);
|
||||
//...Pack the YZ edge (15)................................
|
||||
ScaLBL_D3Q19_Pack(15,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,dist,N);
|
||||
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag);
|
||||
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag);
|
||||
|
||||
//...................................................................................
|
||||
|
||||
ScaLBL_DeviceBarrier();
|
||||
start( req_D3Q19AA );
|
||||
|
||||
}
|
||||
|
||||
void ScaLBL_Communicator::RecvD3Q19AA(double *dist){
|
||||
@@ -1514,8 +1531,7 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){
|
||||
// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
|
||||
//...................................................................................
|
||||
// Wait for completion of D3Q19 communication
|
||||
MPI_COMM_SCALBL.waitAll(18,req1);
|
||||
MPI_COMM_SCALBL.waitAll(18,req2);
|
||||
wait( req_D3Q19AA );
|
||||
ScaLBL_DeviceBarrier();
|
||||
|
||||
//...................................................................................
|
||||
@@ -1685,43 +1701,43 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){
|
||||
Lock=true;
|
||||
}
|
||||
// assign tag of 19 to D3Q19 communication
|
||||
sendtag = recvtag = 14;
|
||||
sendtag = recvtag = 148;
|
||||
ScaLBL_DeviceBarrier();
|
||||
// Pack the distributions
|
||||
//...Packing for x face(2,8,10,12,14)................................
|
||||
ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N);
|
||||
ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N);
|
||||
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag);
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x, rank_x,sendtag+0);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X, rank_X,recvtag+0);
|
||||
|
||||
//...Packing for X face(1,7,9,11,13)................................
|
||||
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N);
|
||||
ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N);
|
||||
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag);
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X, rank_X,sendtag+1);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x, rank_x,recvtag+1);
|
||||
|
||||
//...Packing for y face(4,8,9,16,18).................................
|
||||
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N);
|
||||
ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N);
|
||||
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag);
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y, rank_y,sendtag+2);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y, rank_Y,recvtag+2);
|
||||
|
||||
//...Packing for Y face(3,7,10,15,17).................................
|
||||
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N);
|
||||
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N);
|
||||
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag);
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y, rank_Y,sendtag+3);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y, rank_y,recvtag+3);
|
||||
|
||||
//...Packing for z face(6,12,13,16,17)................................
|
||||
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N);
|
||||
ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N);
|
||||
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag);
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z, rank_z,sendtag+4);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z, rank_Z,recvtag+4);
|
||||
|
||||
//...Packing for Z face(5,11,14,15,18)................................
|
||||
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N);
|
||||
@@ -1729,8 +1745,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){
|
||||
|
||||
//...................................................................................
|
||||
// Send all the distributions
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag);
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z, rank_Z,sendtag+5);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z, rank_z,recvtag+5);
|
||||
|
||||
}
|
||||
|
||||
@@ -1801,39 +1817,39 @@ void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){
|
||||
else{
|
||||
Lock=true;
|
||||
}
|
||||
// assign tag of 19 to D3Q19 communication
|
||||
sendtag = recvtag = 7;
|
||||
// assign tag of 154 to D3Q19 communication
|
||||
sendtag = recvtag = 154;
|
||||
ScaLBL_DeviceBarrier();
|
||||
// Pack the distributions
|
||||
//...Packing for x face(2,8,10,12,14)................................
|
||||
ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*7*N],N);
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag);
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0);
|
||||
|
||||
//...Packing for X face(1,7,9,11,13)................................
|
||||
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*7*N],N);
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag);
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1);
|
||||
|
||||
//...Packing for y face(4,8,9,16,18).................................
|
||||
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*7*N],N);
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag);
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2);
|
||||
|
||||
//...Packing for Y face(3,7,10,15,17).................................
|
||||
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*7*N],N);
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag);
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3);
|
||||
|
||||
//...Packing for z face(6,12,13,16,17)................................
|
||||
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*7*N],N);
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag);
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4);
|
||||
|
||||
//...Packing for Z face(5,11,14,15,18)................................
|
||||
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*7*N],N);
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag);
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5);
|
||||
}
|
||||
|
||||
|
||||
@@ -1896,7 +1912,7 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){
|
||||
Lock=true;
|
||||
}
|
||||
// assign tag of 19 to D3Q19 communication
|
||||
sendtag = recvtag = 15;
|
||||
sendtag = recvtag = 162;
|
||||
ScaLBL_DeviceBarrier();
|
||||
// Pack the distributions
|
||||
//...Packing for x face(2,8,10,12,14)................................
|
||||
@@ -1926,18 +1942,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){
|
||||
|
||||
//...................................................................................
|
||||
// Send all the distributions
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag);
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag);
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag);
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag);
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag);
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag);
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x, rank_x,sendtag+0);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X, rank_X,recvtag+0);
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X, rank_X,sendtag+1);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x, rank_x,recvtag+1);
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y, rank_y,sendtag+2);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y, rank_Y,recvtag+2);
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y, rank_Y,sendtag+3);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y, rank_y,recvtag+3);
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z, rank_z,sendtag+4);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z, rank_Z,recvtag+4);
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z, rank_Z,sendtag+5);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z, rank_z,recvtag+5);
|
||||
|
||||
}
|
||||
|
||||
@@ -2018,7 +2034,7 @@ void ScaLBL_Communicator::SendHalo(double *data){
|
||||
}
|
||||
ScaLBL_DeviceBarrier();
|
||||
//...................................................................................
|
||||
sendtag = recvtag = 1;
|
||||
sendtag = recvtag = 168;
|
||||
//...................................................................................
|
||||
ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x,sendbuf_x, data, N);
|
||||
ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y,sendbuf_y, data, N);
|
||||
@@ -2042,42 +2058,42 @@ void ScaLBL_Communicator::SendHalo(double *data){
|
||||
// Send / Recv all the phase indcator field values
|
||||
//...................................................................................
|
||||
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag);
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag);
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag);
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag);
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag);
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag);
|
||||
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag);
|
||||
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag);
|
||||
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag);
|
||||
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag);
|
||||
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag);
|
||||
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag);
|
||||
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag);
|
||||
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag);
|
||||
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag);
|
||||
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag);
|
||||
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag);
|
||||
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag);
|
||||
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag);
|
||||
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag);
|
||||
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag);
|
||||
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag);
|
||||
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag);
|
||||
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag);
|
||||
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag);
|
||||
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag);
|
||||
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag);
|
||||
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag);
|
||||
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag);
|
||||
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag);
|
||||
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0);
|
||||
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0);
|
||||
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1);
|
||||
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1);
|
||||
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2);
|
||||
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2);
|
||||
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3);
|
||||
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3);
|
||||
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4);
|
||||
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4);
|
||||
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5);
|
||||
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5);
|
||||
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy, rank_xy,sendtag+6);
|
||||
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY, rank_XY,recvtag+6);
|
||||
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY, rank_XY,sendtag+7);
|
||||
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy, rank_xy,recvtag+7);
|
||||
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy, rank_Xy,sendtag+8);
|
||||
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY, rank_xY,recvtag+8);
|
||||
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY, rank_xY,sendtag+9);
|
||||
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy, rank_Xy,recvtag+9);
|
||||
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz, rank_xz,sendtag+10);
|
||||
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ, rank_XZ,recvtag+10);
|
||||
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ, rank_XZ,sendtag+11);
|
||||
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz, rank_xz,recvtag+11);
|
||||
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz, rank_Xz,sendtag+12);
|
||||
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ, rank_xZ,recvtag+12);
|
||||
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ, rank_xZ,sendtag+13);
|
||||
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz, rank_Xz,recvtag+13);
|
||||
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz, rank_yz,sendtag+14);
|
||||
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ, rank_YZ,recvtag+14);
|
||||
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ, rank_YZ,sendtag+15);
|
||||
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz, rank_yz,recvtag+15);
|
||||
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz, rank_Yz,sendtag+16);
|
||||
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ, rank_yZ,recvtag+16);
|
||||
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ, rank_yZ,sendtag+17);
|
||||
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz, rank_Yz,recvtag+17);
|
||||
//...................................................................................
|
||||
}
|
||||
void ScaLBL_Communicator::RecvHalo(double *data){
|
||||
|
||||
@@ -217,6 +217,25 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int
|
||||
*/
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz);
|
||||
|
||||
// MEMBRANE MODEL
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef, double *dist, double *Den, int memLinks, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
|
||||
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int memLinks, int Nx, int Ny, int Nz, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
||||
const int Cqx, const int Cqy, int const Cqz,
|
||||
int *Map, double *Distance, double *Psi, double Threshold,
|
||||
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
|
||||
const int N, const int Nx, const int Ny, const int Nz);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
||||
double *recvbuf, double *dist, int N, double *coef);
|
||||
|
||||
// GREYSCALE MODEL (Single-component)
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *Dist, int Np, double Den);
|
||||
@@ -262,6 +281,10 @@ extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor_CP(int *d_neighborList, int *M
|
||||
//extern "C" void ScaLBL_Update_GreyscalePotential(int *Map, double *Phi, double *Psi, double *Poro, double *Perm, double alpha, double W,
|
||||
// int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np);
|
||||
|
||||
// ION TRANSPORT MODEL
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np);
|
||||
@@ -278,7 +301,8 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDi
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np);
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np);
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np);
|
||||
|
||||
|
||||
// LBM Poisson solver
|
||||
|
||||
@@ -349,7 +373,22 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d
|
||||
* @param finish - lattice node to finish loop
|
||||
* @param Np - size of local sub-domain (derived from Domain structure)
|
||||
*/
|
||||
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
|
||||
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
||||
double *dist, double *Den_charge,
|
||||
double *Psi, double *ElectricField,
|
||||
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
||||
double *Den_charge, double *Psi,
|
||||
double *ElectricField, double *Error, double tau,
|
||||
double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np);
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np);
|
||||
|
||||
// LBM Stokes Model (adapted from MRT model)
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB,
|
||||
@@ -702,6 +741,14 @@ public:
|
||||
|
||||
double GetPerformance(int *NeighborList, double *fq, int Np);
|
||||
int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np, int width);
|
||||
/**
|
||||
* \brief Create membrane data structure
|
||||
* - cut lattice links based on distance map
|
||||
* @param Distance - signed distance to membrane
|
||||
* @param neighborList - data structure that retains lattice links
|
||||
* @param Np - number of lattice sites
|
||||
* @param width - halo width for the model
|
||||
*/
|
||||
void Barrier(){
|
||||
ScaLBL_DeviceBarrier();
|
||||
MPI_COMM_SCALBL.barrier();
|
||||
@@ -782,7 +829,6 @@ private:
|
||||
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
|
||||
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
|
||||
//......................................................................................
|
||||
|
||||
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
|
||||
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
|
||||
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
|
||||
@@ -799,6 +845,12 @@ private:
|
||||
int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z;
|
||||
int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
|
||||
int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
|
||||
// MPI requests for persistent communications
|
||||
std::vector<std::shared_ptr<MPI_Request>> req_D3Q19AA;
|
||||
std::vector<std::shared_ptr<MPI_Request>> req_BiD3Q19AA;
|
||||
std::vector<std::shared_ptr<MPI_Request>> req_TriD3Q19AA;
|
||||
void start( std::vector<std::shared_ptr<MPI_Request>>& requests );
|
||||
void wait( std::vector<std::shared_ptr<MPI_Request>>& requests );
|
||||
//......................................................................................
|
||||
int *bb_dist;
|
||||
int *bb_interactions;
|
||||
|
||||
@@ -69,7 +69,7 @@ void Utilities::startup(int argc, char **argv, bool multiple) {
|
||||
"thread support, thread support will be disabled"
|
||||
<< std::endl;
|
||||
}
|
||||
StackTrace::globalCallStackInitialize(MPI_COMM_WORLD);
|
||||
//StackTrace::globalCallStackInitialize(MPI_COMM_WORLD);
|
||||
} else {
|
||||
MPI_Init(&argc, &argv);
|
||||
}
|
||||
@@ -86,7 +86,7 @@ void Utilities::shutdown() {
|
||||
int rank = 0;
|
||||
#ifdef USE_MPI
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
|
||||
StackTrace::globalCallStackFinalize();
|
||||
//StackTrace::globalCallStackFinalize();
|
||||
MPI_Barrier(MPI_COMM_WORLD);
|
||||
MPI_Finalize();
|
||||
#endif
|
||||
|
||||
@@ -173,8 +173,7 @@
|
||||
_Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \
|
||||
_Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \
|
||||
_Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \
|
||||
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \
|
||||
_Pragma( "GCC diagnostic ignored \"-Wterminate\"" )
|
||||
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" )
|
||||
#define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" )
|
||||
#else
|
||||
#define DISABLE_WARNINGS
|
||||
|
||||
@@ -48,6 +48,7 @@ extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np) {
|
||||
int n;
|
||||
for (n = 0; n < Np; n++) {
|
||||
@@ -1883,7 +1884,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborList, double *dist,
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char *ID, double *dist, int Np) {
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Compact(double *dist, int Np) {
|
||||
|
||||
for (int n = 0; n < Np; n++) {
|
||||
|
||||
@@ -1941,7 +1942,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Compact(char *ID, double *dist, int Np) {
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char *ID, int *neighborList,
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Compact(int *neighborList,
|
||||
double *dist, int Np) {
|
||||
int nread;
|
||||
|
||||
|
||||
221
cpu/Ion.cpp
221
cpu/Ion.cpp
@@ -1,4 +1,161 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
|
||||
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int memLinks, int Nx, int Ny, int Nz, int Np){
|
||||
|
||||
int link,iq,ip,nq,np,nqm,npm;
|
||||
double aq, ap, membranePotential;
|
||||
//double dq, dp, dist, orientation;
|
||||
/* Interior Links */
|
||||
for (link=0; link<memLinks; link++){
|
||||
|
||||
// inside //outside
|
||||
aq = MassFractionIn; ap = MassFractionOut;
|
||||
iq = membrane[2*link]; ip = membrane[2*link+1];
|
||||
nq = iq%Np; np = ip%Np;
|
||||
nqm = Map[nq]; npm = Map[np]; // strided layout
|
||||
//dq = Distance[nqm]; dp = Distance[npm];
|
||||
/* orientation for link to distance gradient*/
|
||||
//orientation = 1.0/fabs(dq - dp);
|
||||
|
||||
/* membrane potential for this link */
|
||||
membranePotential = Psi[nqm] - Psi[npm];
|
||||
if (membranePotential > Threshold){
|
||||
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
|
||||
}
|
||||
|
||||
/* Save the mass transfer coefficients */
|
||||
//coef[2*link] = aq*orientation; coef[2*link+1] = ap*orientation;
|
||||
coef[2*link] = aq; coef[2*link+1] = ap;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
||||
const int Cqx, const int Cqy, int const Cqz,
|
||||
int *Map, double *Distance, double *Psi, double Threshold,
|
||||
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
|
||||
const int N, const int Nx, const int Ny, const int Nz) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link, nqm, npm, i, j, k;
|
||||
double distanceLocal, distanceNonlocal;
|
||||
double psiLocal, psiNonlocal, membranePotential;
|
||||
double ap,aq; // coefficient
|
||||
|
||||
/* second enforce custom rule for membrane links */
|
||||
for (link = nlinks; link < count; link++) {
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
// get the index in strided layout
|
||||
nqm = Map[n];
|
||||
distanceLocal = Distance[nqm];
|
||||
psiLocal = Psi[nqm];
|
||||
|
||||
// Get the 3-D indices from the send process
|
||||
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
|
||||
// Streaming link the non-local distribution
|
||||
i -= Cqx; j -= Cqy; k -= Cqz;
|
||||
npm = k*Nx*Ny + j*Nx + i;
|
||||
distanceNonlocal = Distance[npm];
|
||||
psiNonlocal = Psi[npm];
|
||||
|
||||
membranePotential = psiLocal - psiNonlocal;
|
||||
aq = MassFractionIn;
|
||||
ap = MassFractionOut;
|
||||
|
||||
/* link is inside membrane */
|
||||
if (distanceLocal > 0.0){
|
||||
if (membranePotential < Threshold*(-1.0)){
|
||||
ap = MassFractionIn;
|
||||
aq = MassFractionOut;
|
||||
}
|
||||
else {
|
||||
ap = ThresholdMassFractionIn;
|
||||
aq = ThresholdMassFractionOut;
|
||||
}
|
||||
}
|
||||
else if (membranePotential > Threshold){
|
||||
aq = ThresholdMassFractionIn;
|
||||
ap = ThresholdMassFractionOut;
|
||||
}
|
||||
|
||||
// update link based on mass transfer coefficients
|
||||
coef[2*(link-nlinks)] = aq;
|
||||
coef[2*(link-nlinks)+1] = ap;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
||||
double *recvbuf, double *dist, int N, double *coef) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
double fq,fp,fqq,ap,aq; // coefficient
|
||||
/* First unpack the regular links */
|
||||
for (link = 0; link < nlinks; link++) {
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[link];
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
if (!(n < 0)){
|
||||
fp = recvbuf[start + idx];
|
||||
dist[q * N + n] = fp;
|
||||
}
|
||||
//printf(" site=%i, index=%i, value = %e \n",n,idx,fp);
|
||||
}
|
||||
/* second enforce custom rule for membrane links */
|
||||
for (link = nlinks; link < count; link++) {
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[link];
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
// update link based on mass transfer coefficients
|
||||
if (!(n < 0)){
|
||||
aq = coef[2*(link-nlinks)];
|
||||
ap = coef[2*(link-nlinks)+1];
|
||||
fq = dist[q * N + n];
|
||||
fp = recvbuf[start + idx];
|
||||
fqq = (1-aq)*fq+ap*fp;
|
||||
dist[q * N + n] = fqq;
|
||||
}
|
||||
//printf(" LINK: site=%i, index=%i \n", n, idx);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
|
||||
double *dist, double *Den, int memLinks, int Np){
|
||||
|
||||
int link,iq,ip,nq,np;
|
||||
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
|
||||
for (link=0; link<memLinks; link++){
|
||||
// inside //outside
|
||||
aq = coef[2*link]; ap = coef[2*link+1];
|
||||
iq = membrane[2*link]; ip = membrane[2*link+1];
|
||||
nq = iq%Np; np = ip%Np;
|
||||
fq = dist[iq]; fp = dist[ip];
|
||||
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
|
||||
Cq = Den[nq]; Cp = Den[np];
|
||||
Cq += fqq - fq; Cp += fpp - fp;
|
||||
Den[nq] = Cq; Den[np] = Cp;
|
||||
dist[iq] = fqq; dist[ip] = fpp;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList,
|
||||
double *dist, double *Den,
|
||||
@@ -99,12 +256,12 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
|
||||
double Ex, Ey, Ez; //electrical field
|
||||
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
|
||||
double f0, f1, f2, f3, f4, f5, f6;
|
||||
double X,Y,Z,factor_x, factor_y, factor_z;
|
||||
int nr1, nr2, nr3, nr4, nr5, nr6;
|
||||
|
||||
for (n = start; n < finish; n++) {
|
||||
|
||||
//Load data
|
||||
Ci = Den[n];
|
||||
Ex = ElectricField[n + 0 * Np];
|
||||
Ey = ElectricField[n + 1 * Np];
|
||||
Ez = ElectricField[n + 2 * Np];
|
||||
@@ -137,6 +294,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
|
||||
f6 = dist[nr6];
|
||||
|
||||
// compute diffusive flux
|
||||
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
|
||||
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
|
||||
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
|
||||
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
|
||||
@@ -149,33 +307,50 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
|
||||
FluxElectrical[n + 0 * Np] = uEPx * Ci;
|
||||
FluxElectrical[n + 1 * Np] = uEPy * Ci;
|
||||
FluxElectrical[n + 2 * Np] = uEPz * Ci;
|
||||
|
||||
Den[n] = Ci;
|
||||
|
||||
/* use logistic function to prevent negative distributions*/
|
||||
X = 4.0 * (ux + uEPx);
|
||||
Y = 4.0 * (uy + uEPy);
|
||||
Z = 4.0 * (uz + uEPz);
|
||||
factor_x = X / sqrt(1 + X*X);
|
||||
factor_y = Y / sqrt(1 + Y*Y);
|
||||
factor_z = Z / sqrt(1 + Z*Z);
|
||||
|
||||
// q=0
|
||||
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
|
||||
|
||||
// q = 1
|
||||
dist[nr2] =
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
|
||||
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
|
||||
|
||||
// q=2
|
||||
dist[nr1] =
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
|
||||
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 3
|
||||
dist[nr4] =
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
|
||||
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 4
|
||||
dist[nr3] =
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
|
||||
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 5
|
||||
dist[nr6] =
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
|
||||
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
|
||||
// q = 6
|
||||
dist[nr5] =
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,11 +365,12 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
|
||||
double Ex, Ey, Ez; //electrical field
|
||||
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
|
||||
double f0, f1, f2, f3, f4, f5, f6;
|
||||
double X,Y,Z, factor_x, factor_y, factor_z;
|
||||
|
||||
for (n = start; n < finish; n++) {
|
||||
|
||||
//Load data
|
||||
Ci = Den[n];
|
||||
//Ci = Den[n];
|
||||
Ex = ElectricField[n + 0 * Np];
|
||||
Ey = ElectricField[n + 1 * Np];
|
||||
Ez = ElectricField[n + 2 * Np];
|
||||
@@ -214,6 +390,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
|
||||
f6 = dist[5 * Np + n];
|
||||
|
||||
// compute diffusive flux
|
||||
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
|
||||
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
|
||||
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
|
||||
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
|
||||
@@ -226,33 +403,49 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
|
||||
FluxElectrical[n + 0 * Np] = uEPx * Ci;
|
||||
FluxElectrical[n + 1 * Np] = uEPy * Ci;
|
||||
FluxElectrical[n + 2 * Np] = uEPz * Ci;
|
||||
|
||||
Den[n] = Ci;
|
||||
|
||||
/* use logistic function to prevent negative distributions*/
|
||||
X = 4.0 * (ux + uEPx);
|
||||
Y = 4.0 * (uy + uEPy);
|
||||
Z = 4.0 * (uz + uEPz);
|
||||
factor_x = X / sqrt(1 + X*X);
|
||||
factor_y = Y / sqrt(1 + Y*Y);
|
||||
factor_z = Z / sqrt(1 + Z*Z);
|
||||
|
||||
// q=0
|
||||
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
|
||||
|
||||
// q = 1
|
||||
dist[1 * Np + n] =
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
|
||||
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
|
||||
// q=2
|
||||
dist[2 * Np + n] =
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
|
||||
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 3
|
||||
dist[3 * Np + n] =
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
|
||||
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 4
|
||||
dist[4 * Np + n] =
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
|
||||
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 5
|
||||
dist[5 * Np + n] =
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
|
||||
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
|
||||
// q = 6
|
||||
dist[6 * Np + n] =
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
|
||||
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
42
cpu/MembraneHelper.cpp
Normal file
42
cpu/MembraneHelper.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
|
||||
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
|
||||
double *recvbuf, double *dist, int N) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
for (link=0; link<linkCount; link++){
|
||||
|
||||
idx = links[start+link];
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[start + idx];
|
||||
// unpack the distribution to the proper location
|
||||
if (!(n < 0))
|
||||
dist[q * N + n] = recvbuf[start + idx];
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
|
||||
int linkCount, double *recvbuf, double *dist, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
double alpha;
|
||||
for (link=offset; link<linkCount; link++){
|
||||
|
||||
idx = list[start+link];
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[start + idx];
|
||||
alpha = coef[start + idx];
|
||||
// unpack the distribution to the proper location
|
||||
if (!(n < 0))
|
||||
dist[q * N + n] = alpha*recvbuf[start + idx];
|
||||
}
|
||||
}
|
||||
560
cpu/Poisson.cpp
560
cpu/Poisson.cpp
@@ -1,3 +1,4 @@
|
||||
#include <math.h>
|
||||
|
||||
extern "C" void
|
||||
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map,
|
||||
@@ -150,25 +151,25 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map,
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi + rho_e);
|
||||
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[nr2] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[nr1] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[nr4] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[nr3] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[nr6] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[nr5] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
@@ -213,25 +214,25 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist,
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi + rho_e);
|
||||
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 1
|
||||
dist[1 * Np + n] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[1 * Np + n] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[2 * Np + n] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[2 * Np + n] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[3 * Np + n] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[3 * Np + n] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[4 * Np + n] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[4 * Np + n] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[5 * Np + n] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[5 * Np + n] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[6 * Np + n] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
|
||||
dist[6 * Np + n] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
@@ -444,34 +445,503 @@ extern "C" void ScaLBL_D3Q7_PoissonResidualError(
|
||||
// }
|
||||
//}
|
||||
|
||||
//extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
|
||||
// int n;
|
||||
// // distributions
|
||||
// double f1,f2,f3,f4,f5,f6;
|
||||
// double Ex,Ey,Ez;
|
||||
// double rlx=1.0/tau;
|
||||
//
|
||||
// for (n=0; n<Np; n++){
|
||||
// //........................................................................
|
||||
// // Registers to store the distributions
|
||||
// //........................................................................
|
||||
// f1 = dist[Np+n];
|
||||
// f2 = dist[2*Np+n];
|
||||
// f3 = dist[3*Np+n];
|
||||
// f4 = dist[4*Np+n];
|
||||
// f5 = dist[5*Np+n];
|
||||
// f6 = dist[6*Np+n];
|
||||
// //.................Compute the Electric Field...................................
|
||||
// //Ex = (f1-f2)*rlx*4.5;//NOTE the unit of electric field here is V/lu
|
||||
// //Ey = (f3-f4)*rlx*4.5;
|
||||
// //Ez = (f5-f6)*rlx*4.5;
|
||||
// Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
// Ey = (f3-f4)*rlx*4.0;
|
||||
// Ez = (f5-f6)*rlx*4.0;
|
||||
// //..................Write the Electric Field.....................................
|
||||
// ElectricField[0*Np+n] = Ex;
|
||||
// ElectricField[1*Np+n] = Ey;
|
||||
// ElectricField[2*Np+n] = Ez;
|
||||
// //........................................................................
|
||||
// }
|
||||
//}
|
||||
extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
|
||||
int n;
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
double Ex,Ey,Ez;
|
||||
double rlx=1.0/tau;
|
||||
|
||||
for (n=0; n<Np; n++){
|
||||
//........................................................................
|
||||
// Registers to store the distributions
|
||||
//........................................................................
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
f7 = dist[8 * Np + n];
|
||||
f8 = dist[7 * Np + n];
|
||||
f9 = dist[10 * Np + n];
|
||||
f10 = dist[9 * Np + n];
|
||||
f11 = dist[12 * Np + n];
|
||||
f12 = dist[11 * Np + n];
|
||||
f13 = dist[14 * Np + n];
|
||||
f14 = dist[13 * Np + n];
|
||||
f15 = dist[16 * Np + n];
|
||||
f16 = dist[15 * Np + n];
|
||||
f17 = dist[18 * Np + n];
|
||||
f18 = dist[17 * Np + n];
|
||||
//.................Compute the Electric Field...................................
|
||||
Ex = (f1 - f2 + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14)*rlx*3.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4 + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18)*rlx*3.0;
|
||||
Ez = (f5 - f6 + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18)*rlx*3.0;
|
||||
//..................Write the Electric Field.....................................
|
||||
ElectricField[0*Np+n] = Ex;
|
||||
ElectricField[1*Np+n] = Ey;
|
||||
ElectricField[2*Np+n] = Ez;
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map,
|
||||
double *dist, double *Den_charge, double *Psi,
|
||||
double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi,sum; //electric potential
|
||||
double rho_e; //local charge density
|
||||
double Gs;
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
|
||||
nr14, nr15, nr16, nr17, nr18;
|
||||
int idx;
|
||||
|
||||
for (n = start; n < finish; n++) {
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n + 2 * Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n + 3 * Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n + 4 * Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n + 5 * Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=7
|
||||
nr7 = neighborList[n + 6 * Np];
|
||||
f7 = dist[nr7];
|
||||
|
||||
// q = 8
|
||||
nr8 = neighborList[n + 7 * Np];
|
||||
f8 = dist[nr8];
|
||||
|
||||
// q=9
|
||||
nr9 = neighborList[n + 8 * Np];
|
||||
f9 = dist[nr9];
|
||||
|
||||
// q = 10
|
||||
nr10 = neighborList[n + 9 * Np];
|
||||
f10 = dist[nr10];
|
||||
|
||||
// q=11
|
||||
nr11 = neighborList[n + 10 * Np];
|
||||
f11 = dist[nr11];
|
||||
|
||||
// q=12
|
||||
nr12 = neighborList[n + 11 * Np];
|
||||
f12 = dist[nr12];
|
||||
|
||||
// q=13
|
||||
nr13 = neighborList[n + 12 * Np];
|
||||
f13 = dist[nr13];
|
||||
|
||||
// q=14
|
||||
nr14 = neighborList[n + 13 * Np];
|
||||
f14 = dist[nr14];
|
||||
|
||||
// q=15
|
||||
nr15 = neighborList[n + 14 * Np];
|
||||
f15 = dist[nr15];
|
||||
|
||||
// q=16
|
||||
nr16 = neighborList[n + 15 * Np];
|
||||
f16 = dist[nr16];
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n + 16 * Np];
|
||||
f17 = dist[nr17];
|
||||
|
||||
// q=18
|
||||
nr18 = neighborList[n + 17 * Np];
|
||||
f18 = dist[nr18];
|
||||
|
||||
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
|
||||
f11 + f14 + f13 + f16 + f15 + f18 + f17;
|
||||
|
||||
idx = Map[n];
|
||||
|
||||
Psi[idx] = psi - 0.5*rho_e;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(
|
||||
int *Map, double *dist, double *Den_charge, double *Psi, double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi,sum; //electric potential
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
double Gs;
|
||||
int idx;
|
||||
|
||||
for (n = start; n < finish; n++) {
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
//........................................................................
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
f7 = dist[8 * Np + n];
|
||||
f8 = dist[7 * Np + n];
|
||||
f9 = dist[10 * Np + n];
|
||||
f10 = dist[9 * Np + n];
|
||||
f11 = dist[12 * Np + n];
|
||||
f12 = dist[11 * Np + n];
|
||||
f13 = dist[14 * Np + n];
|
||||
f14 = dist[13 * Np + n];
|
||||
f15 = dist[16 * Np + n];
|
||||
f16 = dist[15 * Np + n];
|
||||
f17 = dist[18 * Np + n];
|
||||
f18 = dist[17 * Np + n];
|
||||
|
||||
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
|
||||
f11 + f14 + f13 + f16 + f15 + f18 + f17;
|
||||
|
||||
idx = Map[n];
|
||||
|
||||
Psi[idx] = psi - 0.5*rho_e;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
||||
double *dist, double *Den_charge,
|
||||
double *Psi, double *ElectricField,
|
||||
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi; //electric potential
|
||||
double Ex, Ey, Ez; //electric field
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
|
||||
nr14, nr15, nr16, nr17, nr18;
|
||||
double error,sum_q;
|
||||
double rlx = 1.0 / tau;
|
||||
int idx;
|
||||
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
|
||||
for (n = start; n < finish; n++) {
|
||||
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n + 2 * Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n + 3 * Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n + 4 * Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n + 5 * Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=7
|
||||
nr7 = neighborList[n + 6 * Np];
|
||||
f7 = dist[nr7];
|
||||
|
||||
// q = 8
|
||||
nr8 = neighborList[n + 7 * Np];
|
||||
f8 = dist[nr8];
|
||||
|
||||
// q=9
|
||||
nr9 = neighborList[n + 8 * Np];
|
||||
f9 = dist[nr9];
|
||||
|
||||
// q = 10
|
||||
nr10 = neighborList[n + 9 * Np];
|
||||
f10 = dist[nr10];
|
||||
|
||||
// q=11
|
||||
nr11 = neighborList[n + 10 * Np];
|
||||
f11 = dist[nr11];
|
||||
|
||||
// q=12
|
||||
nr12 = neighborList[n + 11 * Np];
|
||||
f12 = dist[nr12];
|
||||
|
||||
// q=13
|
||||
nr13 = neighborList[n + 12 * Np];
|
||||
f13 = dist[nr13];
|
||||
|
||||
// q=14
|
||||
nr14 = neighborList[n + 13 * Np];
|
||||
f14 = dist[nr14];
|
||||
|
||||
// q=15
|
||||
nr15 = neighborList[n + 14 * Np];
|
||||
f15 = dist[nr15];
|
||||
|
||||
// q=16
|
||||
nr16 = neighborList[n + 15 * Np];
|
||||
f16 = dist[nr16];
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n + 16 * Np];
|
||||
f17 = dist[nr17];
|
||||
|
||||
// q=18
|
||||
nr18 = neighborList[n + 17 * Np];
|
||||
f18 = dist[nr18];
|
||||
|
||||
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
||||
error = 8.0*(sum_q - f0) + rho_e;
|
||||
|
||||
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
||||
|
||||
idx = Map[n];
|
||||
Psi[idx] = psi;
|
||||
|
||||
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
||||
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
||||
ElectricField[n + 0 * Np] = Ex;
|
||||
ElectricField[n + 1 * Np] = Ey;
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
//........................................................................
|
||||
|
||||
// q = 7
|
||||
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 8
|
||||
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 9
|
||||
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 10
|
||||
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 11
|
||||
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 12
|
||||
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 13
|
||||
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q= 14
|
||||
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 15
|
||||
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 16
|
||||
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 17
|
||||
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 18
|
||||
dist[nr17] = W2*psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
||||
double *Den_charge, double *Psi,
|
||||
double *ElectricField, double *Error, double tau,
|
||||
double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi; //electric potential
|
||||
double Ex, Ey, Ez; //electric field
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
double error,sum_q;
|
||||
double rlx = 1.0 / tau;
|
||||
int idx;
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
|
||||
for (n = start; n < finish; n++) {
|
||||
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
|
||||
f7 = dist[8 * Np + n];
|
||||
f8 = dist[7 * Np + n];
|
||||
f9 = dist[10 * Np + n];
|
||||
f10 = dist[9 * Np + n];
|
||||
f11 = dist[12 * Np + n];
|
||||
f12 = dist[11 * Np + n];
|
||||
f13 = dist[14 * Np + n];
|
||||
f14 = dist[13 * Np + n];
|
||||
f15 = dist[16 * Np + n];
|
||||
f16 = dist[15 * Np + n];
|
||||
f17 = dist[18 * Np + n];
|
||||
f18 = dist[17 * Np + n];
|
||||
|
||||
/* Ex = (f1 - f2) * rlx *
|
||||
4.0; //NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4) * rlx *
|
||||
4.0; //factor 4.0 is D3Q7 lattice squared speed of sound
|
||||
Ez = (f5 - f6) * rlx * 4.0;
|
||||
*/
|
||||
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
||||
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
||||
ElectricField[n + 0 * Np] = Ex;
|
||||
ElectricField[n + 1 * Np] = Ey;
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
||||
error = 8.0*(sum_q - f0) + rho_e;
|
||||
|
||||
Error[n] = error;
|
||||
|
||||
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
||||
|
||||
idx = Map[n];
|
||||
Psi[idx] = psi;
|
||||
|
||||
// q = 0
|
||||
dist[n] = W0*psi;//
|
||||
|
||||
// q = 1
|
||||
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
int ijk;
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
for (n = start; n < finish; n++) {
|
||||
ijk = Map[n];
|
||||
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
|
||||
dist[1 * Np + n] = W1 * Psi[ijk];
|
||||
dist[2 * Np + n] = W1 * Psi[ijk];
|
||||
dist[3 * Np + n] = W1 * Psi[ijk];
|
||||
dist[4 * Np + n] = W1 * Psi[ijk];
|
||||
dist[5 * Np + n] = W1 * Psi[ijk];
|
||||
dist[6 * Np + n] = W1 * Psi[ijk];
|
||||
dist[7 * Np + n] = W2* Psi[ijk];
|
||||
dist[8 * Np + n] = W2* Psi[ijk];
|
||||
dist[9 * Np + n] = W2* Psi[ijk];
|
||||
dist[10 * Np + n] = W2* Psi[ijk];
|
||||
dist[11 * Np + n] = W2* Psi[ijk];
|
||||
dist[12 * Np + n] = W2* Psi[ijk];
|
||||
dist[13 * Np + n] = W2* Psi[ijk];
|
||||
dist[14 * Np + n] = W2* Psi[ijk];
|
||||
dist[15 * Np + n] = W2* Psi[ijk];
|
||||
dist[16 * Np + n] = W2* Psi[ijk];
|
||||
dist[17 * Np + n] = W2* Psi[ijk];
|
||||
dist[18 * Np + n] = W2* Psi[ijk];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
#define NTHREADS 512
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
int n;
|
||||
|
||||
@@ -290,7 +290,7 @@ __global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *distev
|
||||
//__launch_bounds__(512,4)
|
||||
|
||||
__global__ void
|
||||
dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
|
||||
dvc_ScaLBL_AAodd_Compact(int *d_neighborList, double *dist, int Np) {
|
||||
|
||||
int n;
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
@@ -1321,7 +1321,7 @@ dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_se
|
||||
|
||||
//__launch_bounds__(512,4)
|
||||
|
||||
__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *dist, int Np) {
|
||||
__global__ void dvc_ScaLBL_AAeven_Compact( double *dist, int Np) {
|
||||
|
||||
int n;
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
@@ -2390,18 +2390,18 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) {
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np) {
|
||||
cudaFuncSetCacheConfig(dvc_ScaLBL_AAeven_Compact, cudaFuncCachePreferL1);
|
||||
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_dist, Np);
|
||||
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(d_dist, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) {
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np) {
|
||||
cudaFuncSetCacheConfig(dvc_ScaLBL_AAodd_Compact, cudaFuncCachePreferL1);
|
||||
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(ID,d_neighborList, d_dist,Np);
|
||||
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(d_neighborList, d_dist,Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
|
||||
|
||||
140
cuda/D3Q7BC.cu
140
cuda/D3Q7BC.cu
@@ -6,6 +6,16 @@
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
#define CHECK_ERROR(KERNEL) \
|
||||
do { \
|
||||
auto err = cudaGetLastError(); \
|
||||
if ( cudaSuccess != err ){ \
|
||||
auto errString = cudaGetErrorString(err); \
|
||||
printf("error in %s (kernel): %s \n",KERNEL,errString); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
@@ -740,28 +750,19 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighbor
|
||||
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_Solid_Dirichlet_D3Q7");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_Solid_Neumann_D3Q7");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_Solid_DirichletAndNeumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_Solid_DirichletAndNeumann_D3Q7");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
|
||||
@@ -775,211 +776,142 @@ extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta
|
||||
BounceBackDist_list, BounceBackSolid_list, FluidBoundary_list,
|
||||
lattice_weight, lattice_cx, lattice_cy, lattice_cz,
|
||||
count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_Solid_SlippingVelocityBC_D3Q19 (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_Solid_SlippingVelocityBC_D3Q19");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z");
|
||||
}
|
||||
//------------Diff-----------------
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z");
|
||||
}
|
||||
//----------DiffAdvc-------------
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z");
|
||||
}
|
||||
//----------DiffAdvcElec-------------
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z");
|
||||
}
|
||||
//-------------------------------
|
||||
|
||||
561
cuda/Ion.cu
561
cuda/Ion.cu
@@ -5,6 +5,224 @@
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
|
||||
double *recvbuf, double *dist, int N) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
for (link=0; link<linkCount; link++){
|
||||
|
||||
idx = links[start+link];
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[start + idx];
|
||||
// unpack the distribution to the proper location
|
||||
if (!(n < 0))
|
||||
dist[q * N + n] = recvbuf[start + idx];
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
|
||||
int linkCount, double *recvbuf, double *dist, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
double alpha;
|
||||
for (link=offset; link<linkCount; link++){
|
||||
|
||||
idx = list[start+link];
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[start + idx];
|
||||
alpha = coef[start + idx];
|
||||
// unpack the distribution to the proper location
|
||||
if (!(n < 0))
|
||||
dist[q * N + n] = alpha*recvbuf[start + idx];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
|
||||
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int memLinks, int Nx, int Ny, int Nz, int Np){
|
||||
|
||||
int link,iq,ip,nq,np,nqm,npm;
|
||||
double aq, ap, membranePotential;
|
||||
/* Interior Links */
|
||||
|
||||
int S = memLinks/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (link < memLinks) {
|
||||
|
||||
// inside //outside
|
||||
aq = MassFractionIn; ap = MassFractionOut;
|
||||
iq = membrane[2*link]; ip = membrane[2*link+1];
|
||||
nq = iq%Np; np = ip%Np;
|
||||
nqm = Map[nq]; npm = Map[np]; // strided layout
|
||||
|
||||
/* membrane potential for this link */
|
||||
membranePotential = Psi[nqm] - Psi[npm];
|
||||
if (membranePotential > Threshold){
|
||||
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
|
||||
}
|
||||
|
||||
/* Save the mass transfer coefficients */
|
||||
coef[2*link] = aq; coef[2*link+1] = ap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
||||
const int Cqx, const int Cqy, int const Cqz,
|
||||
int *Map, double *Distance, double *Psi, double Threshold,
|
||||
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
|
||||
const int N, const int Nx, const int Ny, const int Nz) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link, nqm, npm, i, j, k;
|
||||
double distanceLocal, distanceNonlocal;
|
||||
double psiLocal, psiNonlocal, membranePotential;
|
||||
double ap,aq; // coefficient
|
||||
|
||||
/* second enforce custom rule for membrane links */
|
||||
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks;
|
||||
|
||||
if (link < count) {
|
||||
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
// get the index in strided layout
|
||||
nqm = Map[n];
|
||||
distanceLocal = Distance[nqm];
|
||||
psiLocal = Psi[nqm];
|
||||
|
||||
// Get the 3-D indices from the send process
|
||||
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
|
||||
// Streaming link the non-local distribution
|
||||
i -= Cqx; j -= Cqy; k -= Cqz;
|
||||
npm = k*Nx*Ny + j*Nx + i;
|
||||
distanceNonlocal = Distance[npm];
|
||||
psiNonlocal = Psi[npm];
|
||||
|
||||
membranePotential = psiLocal - psiNonlocal;
|
||||
aq = MassFractionIn;
|
||||
ap = MassFractionOut;
|
||||
|
||||
/* link is inside membrane */
|
||||
if (distanceLocal > 0.0){
|
||||
if (membranePotential < Threshold*(-1.0)){
|
||||
ap = MassFractionIn;
|
||||
aq = MassFractionOut;
|
||||
}
|
||||
else {
|
||||
ap = ThresholdMassFractionIn;
|
||||
aq = ThresholdMassFractionOut;
|
||||
}
|
||||
}
|
||||
else if (membranePotential > Threshold){
|
||||
aq = ThresholdMassFractionIn;
|
||||
ap = ThresholdMassFractionOut;
|
||||
}
|
||||
|
||||
// update link based on mass transfer coefficients
|
||||
coef[2*(link-nlinks)] = aq;
|
||||
coef[2*(link-nlinks)+1] = ap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
||||
double *recvbuf, double *dist, int N, double *coef) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
double fq,fp,fqq,ap,aq; // coefficient
|
||||
|
||||
/* second enforce custom rule for membrane links */
|
||||
int S = count/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
/* First unpack the regular links */
|
||||
if (link < nlinks) {
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[link];
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
if (!(n < 0)){
|
||||
fp = recvbuf[start + idx];
|
||||
dist[q * N + n] = fp;
|
||||
}
|
||||
}
|
||||
else if (link < count){
|
||||
/* second enforce custom rule for membrane links */
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[link];
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
// update link based on mass transfer coefficients
|
||||
if (!(n < 0)){
|
||||
aq = coef[2*(link-nlinks)];
|
||||
ap = coef[2*(link-nlinks)+1];
|
||||
fq = dist[q * N + n];
|
||||
fp = recvbuf[start + idx];
|
||||
fqq = (1-aq)*fq+ap*fp;
|
||||
dist[q * N + n] = fqq;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
|
||||
double *dist, double *Den, int memLinks, int Np){
|
||||
int link,iq,ip,nq,np;
|
||||
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
|
||||
|
||||
int S = memLinks/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (link < memLinks){
|
||||
|
||||
// inside //outside
|
||||
aq = coef[2*link]; ap = coef[2*link+1];
|
||||
iq = membrane[2*link]; ip = membrane[2*link+1];
|
||||
nq = iq%Np; np = ip%Np;
|
||||
fq = dist[iq]; fp = dist[ip];
|
||||
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
|
||||
Cq = Den[nq]; Cp = Den[np];
|
||||
Cq += fqq - fq; Cp += fpp - fp;
|
||||
Den[nq] = Cq; Den[np] = Cp;
|
||||
dist[iq] = fqq; dist[ip] = fpp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
int n,nread;
|
||||
double fq,Ci;
|
||||
@@ -106,6 +324,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double X,Y,Z,factor_x,factor_y,factor_z;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
@@ -114,80 +333,96 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
//Load data
|
||||
Ex = ElectricField[n + 0 * Np];
|
||||
Ey = ElectricField[n + 1 * Np];
|
||||
Ez = ElectricField[n + 2 * Np];
|
||||
ux = Velocity[n + 0 * Np];
|
||||
uy = Velocity[n + 1 * Np];
|
||||
uz = Velocity[n + 2 * Np];
|
||||
uEPx = zi * Di / Vt * Ex;
|
||||
uEPy = zi * Di / Vt * Ey;
|
||||
uEPz = zi * Di / Vt * Ez;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
// q=2
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
// q=6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// compute diffusive flux
|
||||
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
|
||||
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
|
||||
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
|
||||
FluxDiffusive[n+0*Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n+1*Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n+2*Np] = flux_diffusive_z;
|
||||
FluxAdvective[n+0*Np] = ux*Ci;
|
||||
FluxAdvective[n+1*Np] = uy*Ci;
|
||||
FluxAdvective[n+2*Np] = uz*Ci;
|
||||
FluxElectrical[n+0*Np] = uEPx*Ci;
|
||||
FluxElectrical[n+1*Np] = uEPy*Ci;
|
||||
FluxElectrical[n+2*Np] = uEPz*Ci;
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
// q=2
|
||||
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
// q=3
|
||||
nr3 = neighborList[n + 2 * Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=4
|
||||
nr4 = neighborList[n + 3 * Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q=5
|
||||
nr5 = neighborList[n + 4 * Np];
|
||||
f5 = dist[nr5];
|
||||
// q=6
|
||||
nr6 = neighborList[n + 5 * Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// compute diffusive flux
|
||||
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
|
||||
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
|
||||
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
|
||||
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
|
||||
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
|
||||
FluxAdvective[n + 0 * Np] = ux * Ci;
|
||||
FluxAdvective[n + 1 * Np] = uy * Ci;
|
||||
FluxAdvective[n + 2 * Np] = uz * Ci;
|
||||
FluxElectrical[n + 0 * Np] = uEPx * Ci;
|
||||
FluxElectrical[n + 1 * Np] = uEPy * Ci;
|
||||
FluxElectrical[n + 2 * Np] = uEPz * Ci;
|
||||
|
||||
Den[n] = Ci;
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
/* use logistic function to prevent negative distributions*/
|
||||
X = 4.0 * (ux + uEPx);
|
||||
Y = 4.0 * (uy + uEPy);
|
||||
Z = 4.0 * (uz + uEPz);
|
||||
factor_x = X / sqrt(1 + X*X);
|
||||
factor_y = Y / sqrt(1 + Y*Y);
|
||||
factor_z = Z / sqrt(1 + Z*Z);
|
||||
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q=0
|
||||
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q = 1
|
||||
dist[nr2] =
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
|
||||
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q=2
|
||||
dist[nr1] =
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
|
||||
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 3
|
||||
dist[nr4] =
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
|
||||
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 4
|
||||
dist[nr3] =
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
|
||||
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 5
|
||||
dist[nr6] =
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
|
||||
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
|
||||
// q = 6
|
||||
dist[nr5] =
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -201,6 +436,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *F
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double X,Y,Z,factor_x,factor_y,factor_z;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
@@ -208,67 +444,83 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *F
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
//Load data
|
||||
//Ci = Den[n];
|
||||
Ex = ElectricField[n + 0 * Np];
|
||||
Ey = ElectricField[n + 1 * Np];
|
||||
Ez = ElectricField[n + 2 * Np];
|
||||
ux = Velocity[n + 0 * Np];
|
||||
uy = Velocity[n + 1 * Np];
|
||||
uz = Velocity[n + 2 * Np];
|
||||
uEPx = zi * Di / Vt * Ex;
|
||||
uEPy = zi * Di / Vt * Ey;
|
||||
uEPz = zi * Di / Vt * Ez;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
// compute diffusive flux
|
||||
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
|
||||
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
|
||||
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
|
||||
FluxDiffusive[n+0*Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n+1*Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n+2*Np] = flux_diffusive_z;
|
||||
FluxAdvective[n+0*Np] = ux*Ci;
|
||||
FluxAdvective[n+1*Np] = uy*Ci;
|
||||
FluxAdvective[n+2*Np] = uz*Ci;
|
||||
FluxElectrical[n+0*Np] = uEPx*Ci;
|
||||
FluxElectrical[n+1*Np] = uEPy*Ci;
|
||||
FluxElectrical[n+2*Np] = uEPz*Ci;
|
||||
f0 = dist[n];
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// compute diffusive flux
|
||||
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
|
||||
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
|
||||
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
|
||||
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
|
||||
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
|
||||
FluxAdvective[n + 0 * Np] = ux * Ci;
|
||||
FluxAdvective[n + 1 * Np] = uy * Ci;
|
||||
FluxAdvective[n + 2 * Np] = uz * Ci;
|
||||
FluxElectrical[n + 0 * Np] = uEPx * Ci;
|
||||
FluxElectrical[n + 1 * Np] = uEPy * Ci;
|
||||
FluxElectrical[n + 2 * Np] = uEPz * Ci;
|
||||
|
||||
Den[n] = Ci;
|
||||
|
||||
/* use logistic function to prevent negative distributions*/
|
||||
X = 4.0 * (ux + uEPx);
|
||||
Y = 4.0 * (uy + uEPy);
|
||||
Z = 4.0 * (uz + uEPz);
|
||||
factor_x = X / sqrt(1 + X*X);
|
||||
factor_y = Y / sqrt(1 + Y*Y);
|
||||
factor_z = Z / sqrt(1 + Z*Z);
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q=0
|
||||
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
|
||||
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q = 1
|
||||
dist[1 * Np + n] =
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
|
||||
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q=2
|
||||
dist[2 * Np + n] =
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
|
||||
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q = 3
|
||||
dist[3 * Np + n] =
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
|
||||
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q = 4
|
||||
dist[4 * Np + n] =
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
|
||||
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
// q = 5
|
||||
dist[5 * Np + n] =
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
|
||||
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
|
||||
// q = 6
|
||||
dist[6 * Np + n] =
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
|
||||
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -314,7 +566,7 @@ __global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, in
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double Ci;//ion concentration of species i
|
||||
@@ -327,10 +579,17 @@ __global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDe
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
Ci = Den[n+ion_component*Np];
|
||||
CD = ChargeDensity[n];
|
||||
if (ion_component == 0) CD=0.0;
|
||||
CD_tmp = F*IonValence*Ci;
|
||||
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
||||
ChargeDensity[n] = CD + CD_tmp;
|
||||
|
||||
// Ci = Den[n+ion_component*Np];
|
||||
// CD = ChargeDensity[n];
|
||||
// CD_tmp = F*IonValence*Ci;
|
||||
// ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -408,7 +667,7 @@ extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np)
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
|
||||
@@ -419,3 +678,61 @@ extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
|
||||
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int memLinks, int Nx, int Ny, int Nz, int Np){
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef<<<NBLOCKS,NTHREADS >>>(membrane, Map, Distance, Psi, coef,
|
||||
Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
|
||||
memLinks, Nx, Ny, Nz, Np);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
||||
const int Cqx, const int Cqy, int const Cqz,
|
||||
int *Map, double *Distance, double *Psi, double Threshold,
|
||||
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
|
||||
const int N, const int Nx, const int Ny, const int Nz) {
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo<<<NBLOCKS,NTHREADS >>>(
|
||||
Cqx, Cqy, Cqz, Map, Distance, Psi, Threshold,
|
||||
MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
|
||||
d3q7_recvlist, d3q7_linkList, coef, start, nlinks, count, N, Nx, Ny, Nz);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
||||
double *recvbuf, double *dist, int N, double *coef) {
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<NBLOCKS,NTHREADS >>>(q, d3q7_recvlist, d3q7_linkList, start, nlinks, count,
|
||||
recvbuf, dist, N, coef) ;
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_Unpack: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
|
||||
double *dist, double *Den, int memLinks, int Np){
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_IonTransport<<<NBLOCKS,NTHREADS >>>(membrane, coef, dist, Den, memLinks, Np);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_IonTransport: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
//*************************************************************************
|
||||
#include <cuda.h>
|
||||
|
||||
#define NBLOCKS 560
|
||||
#define NTHREADS 128
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 512
|
||||
|
||||
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
|
||||
407
cuda/Poisson.cu
407
cuda/Poisson.cu
@@ -271,6 +271,413 @@ __global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Ps
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(
|
||||
int *Map, double *dist, double *Den_charge, double *Psi, double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi,sum; //electric potential
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
double Gs;
|
||||
int idx;
|
||||
|
||||
for (n = start; n < finish; n++) {
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
//........................................................................
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
f7 = dist[8 * Np + n];
|
||||
f8 = dist[7 * Np + n];
|
||||
f9 = dist[10 * Np + n];
|
||||
f10 = dist[9 * Np + n];
|
||||
f11 = dist[12 * Np + n];
|
||||
f12 = dist[11 * Np + n];
|
||||
f13 = dist[14 * Np + n];
|
||||
f14 = dist[13 * Np + n];
|
||||
f15 = dist[16 * Np + n];
|
||||
f16 = dist[15 * Np + n];
|
||||
f17 = dist[18 * Np + n];
|
||||
f18 = dist[17 * Np + n];
|
||||
|
||||
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
|
||||
f11 + f14 + f13 + f16 + f15 + f18 + f17;
|
||||
|
||||
idx = Map[n];
|
||||
|
||||
Psi[idx] = psi - 0.5*rho_e;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
||||
double *dist, double *Den_charge,
|
||||
double *Psi, double *ElectricField,
|
||||
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi; //electric potential
|
||||
double Ex, Ey, Ez; //electric field
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
|
||||
nr14, nr15, nr16, nr17, nr18;
|
||||
double error,sum_q;
|
||||
double rlx = 1.0 / tau;
|
||||
int idx;
|
||||
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n + 2 * Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n + 3 * Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n + 4 * Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n + 5 * Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=7
|
||||
nr7 = neighborList[n + 6 * Np];
|
||||
f7 = dist[nr7];
|
||||
|
||||
// q = 8
|
||||
nr8 = neighborList[n + 7 * Np];
|
||||
f8 = dist[nr8];
|
||||
|
||||
// q=9
|
||||
nr9 = neighborList[n + 8 * Np];
|
||||
f9 = dist[nr9];
|
||||
|
||||
// q = 10
|
||||
nr10 = neighborList[n + 9 * Np];
|
||||
f10 = dist[nr10];
|
||||
|
||||
// q=11
|
||||
nr11 = neighborList[n + 10 * Np];
|
||||
f11 = dist[nr11];
|
||||
|
||||
// q=12
|
||||
nr12 = neighborList[n + 11 * Np];
|
||||
f12 = dist[nr12];
|
||||
|
||||
// q=13
|
||||
nr13 = neighborList[n + 12 * Np];
|
||||
f13 = dist[nr13];
|
||||
|
||||
// q=14
|
||||
nr14 = neighborList[n + 13 * Np];
|
||||
f14 = dist[nr14];
|
||||
|
||||
// q=15
|
||||
nr15 = neighborList[n + 14 * Np];
|
||||
f15 = dist[nr15];
|
||||
|
||||
// q=16
|
||||
nr16 = neighborList[n + 15 * Np];
|
||||
f16 = dist[nr16];
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n + 16 * Np];
|
||||
f17 = dist[nr17];
|
||||
|
||||
// q=18
|
||||
nr18 = neighborList[n + 17 * Np];
|
||||
f18 = dist[nr18];
|
||||
|
||||
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
||||
error = 8.0*(sum_q - f0) + rho_e;
|
||||
|
||||
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
||||
|
||||
idx = Map[n];
|
||||
Psi[idx] = psi;
|
||||
|
||||
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
||||
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
||||
ElectricField[n + 0 * Np] = Ex;
|
||||
ElectricField[n + 1 * Np] = Ey;
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
//........................................................................
|
||||
|
||||
// q = 7
|
||||
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 8
|
||||
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 9
|
||||
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 10
|
||||
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 11
|
||||
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 12
|
||||
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 13
|
||||
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q= 14
|
||||
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 15
|
||||
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 16
|
||||
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 17
|
||||
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 18
|
||||
dist[nr17] = W2*psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
||||
double *Den_charge, double *Psi,
|
||||
double *ElectricField, double *Error, double tau,
|
||||
double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi; //electric potential
|
||||
double Ex, Ey, Ez; //electric field
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
double error,sum_q;
|
||||
double rlx = 1.0 / tau;
|
||||
int idx;
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
|
||||
f7 = dist[8 * Np + n];
|
||||
f8 = dist[7 * Np + n];
|
||||
f9 = dist[10 * Np + n];
|
||||
f10 = dist[9 * Np + n];
|
||||
f11 = dist[12 * Np + n];
|
||||
f12 = dist[11 * Np + n];
|
||||
f13 = dist[14 * Np + n];
|
||||
f14 = dist[13 * Np + n];
|
||||
f15 = dist[16 * Np + n];
|
||||
f16 = dist[15 * Np + n];
|
||||
f17 = dist[18 * Np + n];
|
||||
f18 = dist[17 * Np + n];
|
||||
|
||||
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
||||
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
||||
ElectricField[n + 0 * Np] = Ex;
|
||||
ElectricField[n + 1 * Np] = Ey;
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
||||
error = 8.0*(sum_q - f0) + rho_e;
|
||||
|
||||
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
||||
|
||||
idx = Map[n];
|
||||
Psi[idx] = psi;
|
||||
|
||||
// q = 0
|
||||
dist[n] = W0*psi;//
|
||||
|
||||
// q = 1
|
||||
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
int ijk;
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
ijk = Map[n];
|
||||
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
|
||||
dist[1 * Np + n] = W1 * Psi[ijk];
|
||||
dist[2 * Np + n] = W1 * Psi[ijk];
|
||||
dist[3 * Np + n] = W1 * Psi[ijk];
|
||||
dist[4 * Np + n] = W1 * Psi[ijk];
|
||||
dist[5 * Np + n] = W1 * Psi[ijk];
|
||||
dist[6 * Np + n] = W1 * Psi[ijk];
|
||||
dist[7 * Np + n] = W2* Psi[ijk];
|
||||
dist[8 * Np + n] = W2* Psi[ijk];
|
||||
dist[9 * Np + n] = W2* Psi[ijk];
|
||||
dist[10 * Np + n] = W2* Psi[ijk];
|
||||
dist[11 * Np + n] = W2* Psi[ijk];
|
||||
dist[12 * Np + n] = W2* Psi[ijk];
|
||||
dist[13 * Np + n] = W2* Psi[ijk];
|
||||
dist[14 * Np + n] = W2* Psi[ijk];
|
||||
dist[15 * Np + n] = W2* Psi[ijk];
|
||||
dist[16 * Np + n] = W2* Psi[ijk];
|
||||
dist[17 * Np + n] = W2* Psi[ijk];
|
||||
dist[18 * Np + n] = W2* Psi[ijk];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
||||
double *dist, double *Den_charge,
|
||||
double *Psi, double *ElectricField,
|
||||
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList, Map,
|
||||
dist, Den_charge, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q19_AAodd_Poisson: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
||||
double *Den_charge, double *Psi,
|
||||
double *ElectricField, double *Error, double tau,
|
||||
double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
|
||||
dvc_ScaLBL_D3Q19_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>( Map, dist, Den_charge, Psi,
|
||||
ElectricField, Error, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q19_AAeven_Poisson: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
|
||||
int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
|
||||
dvc_ScaLBL_D3Q19_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map, dist, Psi, start, finish, Np);
|
||||
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_Poisson_Init: %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
|
||||
18
example/Bubble/CreateBubble.py
Normal file
18
example/Bubble/CreateBubble.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
import matplotlib.pylab as plt
|
||||
|
||||
D=np.ones((40,40,40),dtype="uint8")
|
||||
|
||||
cx = 20
|
||||
cy = 20
|
||||
cz = 20
|
||||
|
||||
for i in range(0, 40):
|
||||
for j in range (0, 40):
|
||||
for k in range (0,40):
|
||||
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
|
||||
if (dist < 12.5 ) :
|
||||
D[i,j,k] = 2
|
||||
|
||||
D.tofile("bubble_40x40x40.raw")
|
||||
|
||||
77
example/Bubble/CreateCell.py
Normal file
77
example/Bubble/CreateCell.py
Normal file
@@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import matplotlib.pylab as plt
|
||||
|
||||
D=np.ones((40,40,40),dtype="uint8")
|
||||
|
||||
cx = 20
|
||||
cy = 20
|
||||
cz = 20
|
||||
|
||||
for i in range(0, 40):
|
||||
for j in range (0, 40):
|
||||
for k in range (0,40):
|
||||
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
|
||||
if (dist < 15.5 ) :
|
||||
D[i,j,k] = 2
|
||||
|
||||
D.tofile("cell_40x40x40.raw")
|
||||
|
||||
|
||||
C1=np.zeros((40,40,40),dtype="double")
|
||||
C2=np.zeros((40,40,40),dtype="double")
|
||||
C3=np.zeros((40,40,40),dtype="double")
|
||||
C4=np.zeros((40,40,40),dtype="double")
|
||||
C5=np.zeros((40,40,40),dtype="double")
|
||||
C6=np.zeros((40,40,40),dtype="double")
|
||||
|
||||
for i in range(0, 40):
|
||||
for j in range (0, 40):
|
||||
for k in range (0,40):
|
||||
#outside the cell
|
||||
C1[i,j,k] = 4.0e-6 # K
|
||||
C2[i,j,k] = 150.0e-6 # Na
|
||||
C3[i,j,k] = 116.0e-6 # Cl
|
||||
C4[i,j,k] = 29.0e-6 # HC03
|
||||
#C5[i,j,k] = 2.4e-6 # Ca
|
||||
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
|
||||
# inside the cell
|
||||
if (dist < 15.5 ) :
|
||||
C1[i,j,k] = 145.0e-6
|
||||
C2[i,j,k] = 12.0e-6
|
||||
C3[i,j,k] = 4.0e-6
|
||||
C4[i,j,k] = 12.0e-6 # 12 mmol / L
|
||||
#C5[i,j,k] = 0.10e-6 # 100 nmol / L
|
||||
|
||||
|
||||
# add up the total charge to make sure it is zero
|
||||
TotalCharge = 0
|
||||
for i in range(0, 40):
|
||||
for j in range (0, 40):
|
||||
for k in range (0,40):
|
||||
TotalCharge += C1[i,j,k] + C2[i,j,k] - C3[i,j,k] - C4[i,j,k]
|
||||
|
||||
TotalCharge /= (40*40*40)
|
||||
|
||||
print("Total charge " + str(TotalCharge))
|
||||
|
||||
|
||||
for i in range(0, 40):
|
||||
for j in range (0, 40):
|
||||
for k in range (0,40):
|
||||
if TotalCharge < 0 :
|
||||
# need more cation
|
||||
C5[i,j,k] = abs(TotalCharge)
|
||||
C6[i,j,k] = 0.0
|
||||
else :
|
||||
# need more anion
|
||||
C5[i,j,k] = 0.0
|
||||
C6[i,j,k] = abs(TotalCharge)
|
||||
|
||||
|
||||
C1.tofile("cell_concentration_K_40x40x40.raw")
|
||||
C2.tofile("cell_concentration_Na_40x40x40.raw")
|
||||
C3.tofile("cell_concentration_Cl_40x40x40.raw")
|
||||
C4.tofile("cell_concentration_HCO3_40x40x40.raw")
|
||||
C5.tofile("cell_concentration_cation_40x40x40.raw")
|
||||
C6.tofile("cell_concentration_anion_40x40x40.raw")
|
||||
|
||||
75
example/Bubble/cell.db
Normal file
75
example/Bubble/cell.db
Normal file
@@ -0,0 +1,75 @@
|
||||
MultiphysController {
|
||||
timestepMax = 60
|
||||
num_iter_Ion_List = 2
|
||||
analysis_interval = 50
|
||||
tolerance = 1.0e-9
|
||||
visualization_interval = 100 // Frequency to write visualization data
|
||||
analysis_interval = 50 // Frequency to perform analysis
|
||||
}
|
||||
Stokes {
|
||||
tau = 1.0
|
||||
F = 0, 0, 0
|
||||
ElectricField = 0, 0, 0 //body electric field; user-input unit: [V/m]
|
||||
nu_phys = 0.889e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
|
||||
}
|
||||
Ions {
|
||||
IonConcentrationFile = "cell_concentration_K_40x40x40.raw", "double", "cell_concentration_Na_40x40x40.raw", "double", "cell_concentration_Cl_40x40x40.raw", "double", "cell_concentration_HCO3_40x40x40.raw", "double", "cell_concentration_anion_40x40x40.raw", "double", "cell_concentration_cation_40x40x40.raw", "double"
|
||||
temperature = 293.15 //unit [K]
|
||||
number_ion_species = 6 //number of ions
|
||||
tauList = 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
|
||||
IonDiffusivityList = 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9 //user-input unit: [m^2/sec]
|
||||
IonValenceList = 1, 1, -1, -1, 1, -1 //valence charge of ions; dimensionless; positive/negative integer
|
||||
IonConcentrationList = 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6 //user-input unit: [mol/m^3]
|
||||
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
|
||||
//SolidLabels = 0 //solid labels for assigning solid boundary condition; ONLY for BC_Solid=1
|
||||
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
|
||||
FluidVelDummy = 0.0, 0.0, 1.0e-2 // dummy fluid velocity for debugging
|
||||
}
|
||||
Poisson {
|
||||
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
|
||||
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
|
||||
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
|
||||
//--------------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------------
|
||||
BC_Solid = 2 //solid boundary condition; 1=surface potential; 2=surface charge density
|
||||
SolidLabels = 0 //solid labels for assigning solid boundary condition
|
||||
SolidValues = 0 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
|
||||
WriteLog = true //write convergence log for LB-Poisson solver
|
||||
// ------------------------------- Testing Utilities ----------------------------------------
|
||||
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
|
||||
TestPeriodic = false
|
||||
TestPeriodicTime = 1.0 //unit:[sec]
|
||||
TestPeriodicTimeConv = 0.01 //unit:[sec]
|
||||
TestPeriodicSaveInterval = 0.2 //unit:[sec]
|
||||
//------------------------------ advanced setting ------------------------------------
|
||||
timestepMax = 100000 //max timestep for obtaining steady-state electrical potential
|
||||
analysis_interval = 200 //timestep checking steady-state convergence
|
||||
tolerance = 1.0e-6 //stopping criterion for steady-state solution
|
||||
}
|
||||
Domain {
|
||||
Filename = "cell_40x40x40.raw"
|
||||
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
|
||||
n = 40, 40, 40 // Size of local domain (Nx,Ny,Nz)
|
||||
N = 40, 40, 40 // size of the input image
|
||||
voxel_length = 1.0 //resolution; user-input unit: [um]
|
||||
BC = 0 // Boundary condition type
|
||||
ReadType = "8bit"
|
||||
ReadValues = 0, 1, 2
|
||||
WriteValues = 0, 1, 2
|
||||
}
|
||||
Analysis {
|
||||
analysis_interval = 100
|
||||
subphase_analysis_interval = 50 // Frequency to perform analysis
|
||||
restart_interval = 5000 // Frequency to write restart data
|
||||
restart_file = "Restart" // Filename to use for restart file (will append rank)
|
||||
N_threads = 4 // Number of threads to use
|
||||
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
|
||||
}
|
||||
Visualization {
|
||||
save_electric_potential = true
|
||||
save_concentration = true
|
||||
save_velocity = true
|
||||
}
|
||||
Membrane {
|
||||
MembraneLabels = 2
|
||||
}
|
||||
41
example/SingleCell/NaCl-cell.py
Normal file
41
example/SingleCell/NaCl-cell.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import numpy as np
|
||||
import matplotlib.pylab as plt
|
||||
|
||||
Nx = 64
|
||||
Ny = 64
|
||||
Nz = 64
|
||||
cx = Nx/2
|
||||
cy = Ny/2
|
||||
cz = Nz/2
|
||||
radius = 12
|
||||
|
||||
D=np.ones((Nx,Ny,Nz),dtype="uint8")
|
||||
|
||||
for i in range(0, Nx):
|
||||
for j in range (0, Ny):
|
||||
for k in range (0,Nz):
|
||||
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
|
||||
if (dist < radius ) :
|
||||
D[i,j,k] = 2
|
||||
|
||||
D.tofile("cell_64x64x64.raw")
|
||||
|
||||
|
||||
C1=np.zeros((Nx,Ny,Nz),dtype="double")
|
||||
C2=np.zeros((Nx,Ny,Nz),dtype="double")
|
||||
|
||||
for i in range(0, Nx):
|
||||
for j in range (0, Ny):
|
||||
for k in range (0,Nz):
|
||||
#outside the cell
|
||||
C1[i,j,k] = 125.0e-6 # Na
|
||||
C2[i,j,k] = 125.0e-6 # Cl
|
||||
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
|
||||
# inside the cell
|
||||
if (dist < radius ) :
|
||||
C1[i,j,k] = 110.0e-6
|
||||
C2[i,j,k] = 110.0e-6
|
||||
|
||||
C1.tofile("cell_concentration_Na_64x64x64.raw")
|
||||
C2.tofile("cell_concentration_Cl_64x64x64.raw")
|
||||
|
||||
74
example/SingleCell/NaCl.db
Normal file
74
example/SingleCell/NaCl.db
Normal file
@@ -0,0 +1,74 @@
|
||||
MultiphysController {
|
||||
timestepMax = 2000
|
||||
num_iter_Ion_List = 2
|
||||
analysis_interval = 40
|
||||
tolerance = 1.0e-9
|
||||
visualization_interval = 40 // Frequency to write visualization data
|
||||
}
|
||||
Stokes {
|
||||
tau = 1.0
|
||||
F = 0, 0, 0
|
||||
ElectricField = 0, 0, 0 //body electric field; user-input unit: [V/m]
|
||||
nu_phys = 0.889e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
|
||||
}
|
||||
Ions {
|
||||
IonConcentrationFile = "cell_concentration_Na_64x64x64.raw", "double", "cell_concentration_Cl_64x64x64.raw", "double"
|
||||
temperature = 293.15 //unit [K]
|
||||
number_ion_species = 2 //number of ions
|
||||
tauList = 1.0, 1.0
|
||||
IonDiffusivityList = 1.0e-9, 1.0e-9 //user-input unit: [m^2/sec]
|
||||
IonValenceList = 1, -1 //valence charge of ions; dimensionless; positive/negative integer
|
||||
IonConcentrationList = 1.0e-6, 1.0e-6 //user-input unit: [mol/m^3]
|
||||
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
|
||||
//SolidLabels = 0 //solid labels for assigning solid boundary condition; ONLY for BC_Solid=1
|
||||
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
|
||||
FluidVelDummy = 0.0, 0.0, 0.0 // dummy fluid velocity for debugging
|
||||
}
|
||||
Poisson {
|
||||
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
|
||||
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
|
||||
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
|
||||
//--------------------------------------------------------------------------
|
||||
//--------------------------------------------------------------------------
|
||||
BC_Solid = 2 //solid boundary condition; 1=surface potential; 2=surface charge density
|
||||
SolidLabels = 0 //solid labels for assigning solid boundary condition
|
||||
SolidValues = 0 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
|
||||
WriteLog = true //write convergence log for LB-Poisson solver
|
||||
// ------------------------------- Testing Utilities ----------------------------------------
|
||||
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
|
||||
TestPeriodic = false
|
||||
TestPeriodicTime = 1.0 //unit:[sec]
|
||||
TestPeriodicTimeConv = 0.01 //unit:[sec]
|
||||
TestPeriodicSaveInterval = 0.2 //unit:[sec]
|
||||
//------------------------------ advanced setting ------------------------------------
|
||||
timestepMax = 4000 //max timestep for obtaining steady-state electrical potential
|
||||
analysis_interval = 25 //timestep checking steady-state convergence
|
||||
tolerance = 1.0e-10 //stopping criterion for steady-state solution
|
||||
}
|
||||
Domain {
|
||||
Filename = "cell_64x64x64.raw"
|
||||
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
|
||||
n = 64, 64, 64 // Size of local domain (Nx,Ny,Nz)
|
||||
N = 64, 64, 64 // size of the input image
|
||||
voxel_length = 0.1 //resolution; user-input unit: [um]
|
||||
BC = 0 // Boundary condition type
|
||||
ReadType = "8bit"
|
||||
ReadValues = 0, 1, 2
|
||||
WriteValues = 0, 1, 2
|
||||
}
|
||||
Analysis {
|
||||
analysis_interval = 100
|
||||
subphase_analysis_interval = 50 // Frequency to perform analysis
|
||||
restart_interval = 5000 // Frequency to write restart data
|
||||
restart_file = "Restart" // Filename to use for restart file (will append rank)
|
||||
N_threads = 4 // Number of threads to use
|
||||
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
|
||||
}
|
||||
Visualization {
|
||||
save_electric_potential = true
|
||||
save_concentration = true
|
||||
save_velocity = true
|
||||
}
|
||||
Membrane {
|
||||
MembraneLabels = 2
|
||||
}
|
||||
36
example/systems/crusher/A2-8GPU/Color.sh
Normal file
36
example/systems/crusher/A2-8GPU/Color.sh
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC380
|
||||
#SBATCH -J Color-dense
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 0:10:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
#SBATCH --exclusive
|
||||
|
||||
# MODULE ENVIRONMENT
|
||||
module load PrgEnv-amd
|
||||
module load rocm/4.5.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
#module load craype-accel-amd-gfx908
|
||||
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
|
||||
#export MPL_MBX_SIZE=1024000000
|
||||
|
||||
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
|
||||
|
||||
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/bin
|
||||
|
||||
echo "Running Color LBM"
|
||||
|
||||
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
|
||||
|
||||
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/lbpm_color_simulator input.db
|
||||
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
|
||||
|
||||
exit;
|
||||
36
example/systems/crusher/A2-8GPU/MRT.sh
Normal file
36
example/systems/crusher/A2-8GPU/MRT.sh
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC380
|
||||
#SBATCH -J MRT-a2
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 0:10:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
#SBATCH --exclusive
|
||||
|
||||
# MODULE ENVIRONMENT
|
||||
module load PrgEnv-amd
|
||||
module load rocm/4.5.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
#module load craype-accel-amd-gfx908
|
||||
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
|
||||
#export MPL_MBX_SIZE=1024000000
|
||||
|
||||
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
|
||||
|
||||
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/bin
|
||||
|
||||
echo "Running Color LBM"
|
||||
|
||||
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
|
||||
|
||||
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/lbpm_permeability_simulator input.db
|
||||
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
|
||||
|
||||
exit;
|
||||
69
example/systems/crusher/A2-8GPU/input.db
Normal file
69
example/systems/crusher/A2-8GPU/input.db
Normal file
@@ -0,0 +1,69 @@
|
||||
MRT {
|
||||
timestepMax = 10000
|
||||
analysis_interval = 20000
|
||||
tau = 0.7
|
||||
F = 0, 0, 5.0e-5
|
||||
Restart = false
|
||||
din = 1.0
|
||||
dout = 1.0
|
||||
flux = 0.0
|
||||
}
|
||||
|
||||
Color {
|
||||
tauA = 0.7;
|
||||
tauB = 0.7;
|
||||
rhoA = 1.0;
|
||||
rhoB = 1.0;
|
||||
alpha = 1e-2;
|
||||
beta = 0.95;
|
||||
F = 0, 0, 1.0e-5
|
||||
Restart = false
|
||||
flux = 0.0 // voxels per timestep
|
||||
timestepMax = 10000
|
||||
// rescale_force_after_timestep = 100000
|
||||
ComponentLabels = 0, -1, -2
|
||||
ComponentAffinity = -1.0, -1.0, -0.9
|
||||
// protocol = "image sequence"
|
||||
// capillary_number = 1e-5
|
||||
}
|
||||
|
||||
Domain {
|
||||
Filename = "a2_2048x2048x8192.raw"
|
||||
nproc = 2, 2, 2 // Number of processors (Npx,Npy,Npz)
|
||||
offset = 0, 0, 0
|
||||
n = 382, 382, 382 // Size of local domain (Nx,Ny,Nz)
|
||||
N = 2048, 2048, 1024 // size of the input image
|
||||
|
||||
voxel_length = 1.0 // Length of domain (x,y,z)
|
||||
BC = 0 // Boundary condition type
|
||||
//Sw = 0.2
|
||||
ReadType = "8bit"
|
||||
ReadValues = 0, 1, 2, -1, -2
|
||||
WriteValues = 0, 1, 2, -1, -2
|
||||
ComponentLabels = 0, -1, -2
|
||||
InletLayers = 0, 0, 5
|
||||
OutletLayers = 0, 0, 5
|
||||
}
|
||||
|
||||
Analysis {
|
||||
visualization_interval = 1000000
|
||||
//morph_interval = 100000
|
||||
//morph_delta = -0.08
|
||||
analysis_interval = 20000 // Frequency to perform analysis
|
||||
min_steady_timesteps = 15000000
|
||||
max_steady_timesteps = 15000000
|
||||
restart_interval = 500000 // Frequency to write restart data
|
||||
|
||||
restart_file = "Restart" // Filename to use for restart file (will append rank)
|
||||
N_threads = 0 // Number of threads to use
|
||||
load_balance = "default" // Load balance method to use: "none", "default", "independent"
|
||||
}
|
||||
|
||||
Visualization {
|
||||
save_8bit_raw = true
|
||||
write_silo = true
|
||||
|
||||
}
|
||||
|
||||
FlowAdaptor {
|
||||
}
|
||||
36
example/systems/crusher/Dense-8GPU/MPI-multinode.sh
Normal file
36
example/systems/crusher/Dense-8GPU/MPI-multinode.sh
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC380
|
||||
#SBATCH -J MPI-multinode
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 6:00:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 8
|
||||
#SBATCH --exclusive
|
||||
|
||||
# MODULE ENVIRONMENT
|
||||
module load PrgEnv-amd
|
||||
module load rocm/4.5.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
#module load craype-accel-amd-gfx908
|
||||
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
|
||||
#export MPL_MBX_SIZE=1024000000
|
||||
|
||||
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
|
||||
|
||||
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/tests
|
||||
|
||||
echo "Running Color LBM"
|
||||
|
||||
MYCPUBIND="--cpu-bind=verbose,map_cpu:57"
|
||||
|
||||
srun --verbose -N8 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/TestCommD3Q19 multinode.db
|
||||
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
|
||||
|
||||
exit;
|
||||
36
example/systems/crusher/Dense-8GPU/MPI-singlenode.sh
Normal file
36
example/systems/crusher/Dense-8GPU/MPI-singlenode.sh
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
|
||||
#SBATCH -A CSC380
|
||||
#SBATCH -J MPI-singlenode
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 0:10:00
|
||||
#SBATCH -p batch
|
||||
#SBATCH -N 1
|
||||
#SBATCH --exclusive
|
||||
|
||||
# MODULE ENVIRONMENT
|
||||
module load PrgEnv-amd
|
||||
module load rocm/4.5.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
#module load craype-accel-amd-gfx908
|
||||
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
|
||||
#export MPL_MBX_SIZE=1024000000
|
||||
|
||||
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
|
||||
|
||||
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/tests
|
||||
|
||||
echo "Running Color LBM"
|
||||
|
||||
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
|
||||
|
||||
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/TestCommD3Q19 multinode.db
|
||||
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
|
||||
|
||||
exit;
|
||||
9
example/systems/crusher/Dense-8GPU/RandomSystem.py
Normal file
9
example/systems/crusher/Dense-8GPU/RandomSystem.py
Normal file
@@ -0,0 +1,9 @@
|
||||
import numpy as np
|
||||
|
||||
N = 1024
|
||||
|
||||
data = np.random.randint(low=1,high=3,size=(N,N,N),dtype=np.uint8)
|
||||
|
||||
data.tofile("dense_1024x1024x1024.raw")
|
||||
|
||||
|
||||
69
example/systems/crusher/Dense-8GPU/multinode.db
Normal file
69
example/systems/crusher/Dense-8GPU/multinode.db
Normal file
@@ -0,0 +1,69 @@
|
||||
MRT {
|
||||
timestepMax = 100
|
||||
analysis_interval = 20000
|
||||
tau = 0.7
|
||||
F = 0, 0, 5.0e-5
|
||||
Restart = false
|
||||
din = 1.0
|
||||
dout = 1.0
|
||||
flux = 0.0
|
||||
}
|
||||
|
||||
Color {
|
||||
tauA = 0.7;
|
||||
tauB = 0.7;
|
||||
rhoA = 1.0;
|
||||
rhoB = 1.0;
|
||||
alpha = 1e-2;
|
||||
beta = 0.95;
|
||||
F = 0, 0, 0.0
|
||||
Restart = false
|
||||
flux = 0.0 // voxels per timestep
|
||||
timestepMax = 10
|
||||
// rescale_force_after_timestep = 100000
|
||||
ComponentLabels = 0, -1, -2
|
||||
ComponentAffinity = -1.0, -1.0, -0.9
|
||||
// protocol = "image sequence"
|
||||
// capillary_number = 1e-5
|
||||
}
|
||||
|
||||
Domain {
|
||||
Filename = "dense_1024x1024x1024.raw"
|
||||
nproc = 2, 2, 2 // Number of processors (Npx,Npy,Npz)
|
||||
offset = 0, 0, 0
|
||||
n = 222, 222, 222 // Size of local domain (Nx,Ny,Nz)
|
||||
N = 1024, 1024, 1024 // size of the input image
|
||||
|
||||
voxel_length = 1.0 // Length of domain (x,y,z)
|
||||
BC = 0 // Boundary condition type
|
||||
//Sw = 0.2
|
||||
ReadType = "8bit"
|
||||
ReadValues = 0, 1, 2, -1, -2
|
||||
WriteValues = 0, 1, 2, -1, -2
|
||||
ComponentLabels = 0, -1, -2
|
||||
InletLayers = 0, 0, 5
|
||||
OutletLayers = 0, 0, 5
|
||||
}
|
||||
|
||||
Analysis {
|
||||
visualization_interval = 1000000
|
||||
//morph_interval = 100000
|
||||
//morph_delta = -0.08
|
||||
analysis_interval = 20000 // Frequency to perform analysis
|
||||
min_steady_timesteps = 15000000
|
||||
max_steady_timesteps = 15000000
|
||||
restart_interval = 500000 // Frequency to write restart data
|
||||
|
||||
restart_file = "Restart" // Filename to use for restart file (will append rank)
|
||||
N_threads = 0 // Number of threads to use
|
||||
load_balance = "default" // Load balance method to use: "none", "default", "independent"
|
||||
}
|
||||
|
||||
Visualization {
|
||||
save_8bit_raw = true
|
||||
write_silo = true
|
||||
|
||||
}
|
||||
|
||||
FlowAdaptor {
|
||||
}
|
||||
14
example/systems/spock/GenerateSphereTest.sh
Normal file
14
example/systems/spock/GenerateSphereTest.sh
Normal file
@@ -0,0 +1,14 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -A CSC380
|
||||
#SBATCH -J sphere_test
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 00:05:00
|
||||
#SBATCH -p caar
|
||||
#SBATCH -N 1
|
||||
|
||||
module load rocm/4.2.0
|
||||
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
|
||||
|
||||
srun -n1 --ntasks-per-node=1 $LBPM_DIR/GenerateSphereTest input.db
|
||||
|
||||
|
||||
17
example/systems/spock/Spock.sh
Normal file
17
example/systems/spock/Spock.sh
Normal file
@@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -A CSC380
|
||||
#SBATCH -J sphere_test
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -t 00:05:00
|
||||
#SBATCH -p caar
|
||||
#SBATCH -N 1
|
||||
|
||||
module load rocm/4.2.0
|
||||
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
|
||||
export MPICH_SMP_SINGLE_COPY_MODE=CMA
|
||||
|
||||
#srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/lbpm_color_simulator spheres322.db
|
||||
|
||||
|
||||
srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/TestCommD3Q19 spheres322.db
|
||||
|
||||
32
example/systems/spock/Test.sh
Normal file
32
example/systems/spock/Test.sh
Normal file
@@ -0,0 +1,32 @@
|
||||
#!/bin/bash
|
||||
#SBATCH -A CSC380
|
||||
#SBATCH -J sphere_test
|
||||
#SBATCH -o %x-%j.out
|
||||
#SBATCH -e %x-%j.err
|
||||
#SBATCH -t 00:05:00
|
||||
#SBATCH -p caar
|
||||
#SBATCH -N 1
|
||||
|
||||
module load craype-accel-amd-gfx908
|
||||
module load PrgEnv-cray
|
||||
#module load rocm
|
||||
module load rocm/4.2.0
|
||||
|
||||
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
|
||||
#export MPICH_RDMA_ENABLED_CUDA=1
|
||||
#export MPICH_ENV_DISPLAY=1
|
||||
#export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
export MPICH_GPU_NO_ASYNC_MEMCPY=0
|
||||
export MPICH_SMP_SINGLE_COPY_MODE=CMA
|
||||
#export MPICH_DBG_FILENAME="./mpich-dbg.log"
|
||||
export MPICH_DBG_CLASS=ALL
|
||||
export MPICH_DBG_LEVEL=VERBOSE
|
||||
export MPICH_DBG=yes
|
||||
#export PMI_DEBUG=1
|
||||
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
#srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/lbpm_color_simulator spheres322.db
|
||||
|
||||
|
||||
srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 --verbose --export=ALL $LBPM_DIR/TestCommD3Q19 test.db
|
||||
|
||||
54
example/systems/spock/input.db
Normal file
54
example/systems/spock/input.db
Normal file
@@ -0,0 +1,54 @@
|
||||
MRT {
|
||||
tau = 1.0 // relaxation time
|
||||
F = 0, 0, 1e-4 // external body force applied to system
|
||||
timestepMax = 1000 // max number of timesteps
|
||||
din = 1.0
|
||||
dout = 1.0
|
||||
Restart = false
|
||||
flux = 0.0
|
||||
}
|
||||
|
||||
Color {
|
||||
tauA = 0.7; // relaxation time for fluid A
|
||||
tauB = 0.7; // relaxation time for fluid B
|
||||
rhoA = 1.0; // mass density for fluid A
|
||||
rhoB = 1.0; // mass density for fluid B
|
||||
alpha = 1e-3; // controls interfacial tension between fluids
|
||||
beta = 0.95; // controls interface width
|
||||
F = 0, 0, 1.0e-5 // external body force applied to the system
|
||||
Restart = false // restart from checkpoint file?
|
||||
din = 1.0 // density at inlet (if external BC is applied)
|
||||
dout = 1.0 // density at outlet (if external BC is applied )
|
||||
timestepMax = 10 // maximum number of timesteps to simulate
|
||||
flux = 0.0 // volumetric flux in voxels per timestep (if flux BC is applied)
|
||||
ComponentLabels = 0 // comma separated list of solid mineral labels
|
||||
ComponentAffinity = -1.0 // comma separated list of phase indicato field value to assign for each mineral label
|
||||
}
|
||||
|
||||
Domain {
|
||||
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
|
||||
n = 318, 320, 320 // Size of local domain (Nx,Ny,Nz)
|
||||
N = 320, 320, 320
|
||||
nspheres = 1896 // Number of spheres (only needed if using a sphere packing)
|
||||
L = 1, 1, 1 // Length of domain (x,y,z)
|
||||
BC = 0 // Boundary condition type
|
||||
// BC = 0 for periodic BC
|
||||
// BC = 1 for pressure BC (applied in z direction)
|
||||
// BC = 4 for flux BC (applied in z direction
|
||||
ReadType = "8bit"
|
||||
ReadValues = 0, 1, 2 // list of labels within the binary file (read)
|
||||
WriteValues = 0, 1, 2 // list of labels within the output files (write)
|
||||
}
|
||||
|
||||
Analysis {
|
||||
analysis_interval = 1000 // Frequency to perform analysis
|
||||
restart_interval = 50000 // Frequency to write restart data
|
||||
visualization_interval = 50000 // Frequency to write visualization data
|
||||
restart_file = "Restart" // Filename to use for restart file (will append rank)
|
||||
N_threads = 4 // Number of threads to use
|
||||
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
|
||||
}
|
||||
|
||||
Visualization {
|
||||
|
||||
}
|
||||
0
example/systems/spock/sphere322.db
Normal file
0
example/systems/spock/sphere322.db
Normal file
54
example/systems/spock/test.db
Normal file
54
example/systems/spock/test.db
Normal file
@@ -0,0 +1,54 @@
|
||||
MRT {
|
||||
tau = 1.0 // relaxation time
|
||||
F = 0, 0, 1e-4 // external body force applied to system
|
||||
timestepMax = 1000 // max number of timesteps
|
||||
din = 1.0
|
||||
dout = 1.0
|
||||
Restart = false
|
||||
flux = 0.0
|
||||
}
|
||||
|
||||
Color {
|
||||
tauA = 0.7; // relaxation time for fluid A
|
||||
tauB = 0.7; // relaxation time for fluid B
|
||||
rhoA = 1.0; // mass density for fluid A
|
||||
rhoB = 1.0; // mass density for fluid B
|
||||
alpha = 1e-3; // controls interfacial tension between fluids
|
||||
beta = 0.95; // controls interface width
|
||||
F = 0, 0, 1.0e-5 // external body force applied to the system
|
||||
Restart = false // restart from checkpoint file?
|
||||
din = 1.0 // density at inlet (if external BC is applied)
|
||||
dout = 1.0 // density at outlet (if external BC is applied )
|
||||
timestepMax = 10 // maximum number of timesteps to simulate
|
||||
flux = 0.0 // volumetric flux in voxels per timestep (if flux BC is applied)
|
||||
ComponentLabels = 0 // comma separated list of solid mineral labels
|
||||
ComponentAffinity = -1.0 // comma separated list of phase indicato field value to assign for each mineral label
|
||||
}
|
||||
|
||||
Domain {
|
||||
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
|
||||
n = 240, 240, 240 // Size of local domain (Nx,Ny,Nz)
|
||||
N = 320, 320, 320
|
||||
nspheres = 1896 // Number of spheres (only needed if using a sphere packing)
|
||||
L = 1, 1, 1 // Length of domain (x,y,z)
|
||||
BC = 0 // Boundary condition type
|
||||
// BC = 0 for periodic BC
|
||||
// BC = 1 for pressure BC (applied in z direction)
|
||||
// BC = 4 for flux BC (applied in z direction
|
||||
ReadType = "8bit"
|
||||
ReadValues = 0, 1, 2 // list of labels within the binary file (read)
|
||||
WriteValues = 0, 1, 2 // list of labels within the output files (write)
|
||||
}
|
||||
|
||||
Analysis {
|
||||
analysis_interval = 1000 // Frequency to perform analysis
|
||||
restart_interval = 50000 // Frequency to write restart data
|
||||
visualization_interval = 50000 // Frequency to write visualization data
|
||||
restart_file = "Restart" // Filename to use for restart file (will append rank)
|
||||
N_threads = 4 // Number of threads to use
|
||||
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
|
||||
}
|
||||
|
||||
Visualization {
|
||||
|
||||
}
|
||||
@@ -18,9 +18,11 @@
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
#define NTHREADS 512
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
|
||||
__global__ void
|
||||
__launch_bounds__(512,1) dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
int n;
|
||||
// conserved momemnts
|
||||
double rho,ux,uy,uz,uu;
|
||||
@@ -138,7 +140,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
__global__ void
|
||||
__launch_bounds__(512,1) dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
|
||||
int n;
|
||||
// conserved momemnts
|
||||
double rho,ux,uy,uz,uu;
|
||||
@@ -1,10 +0,0 @@
|
||||
SET( HIP_SEPERABLE_COMPILATION ON )
|
||||
FILE( GLOB HIP_SOURCES "*.cu" )
|
||||
SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
|
||||
HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so )
|
||||
#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip )
|
||||
#ADD_DEPENDENCIES( lbpm-hip copy-include )
|
||||
|
||||
|
||||
|
||||
@@ -21,6 +21,21 @@
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
|
||||
__device__ __constant__ double mrt_V1=0.05263157894736842;
|
||||
__device__ __constant__ double mrt_V2=0.012531328320802;
|
||||
__device__ __constant__ double mrt_V3=0.04761904761904762;
|
||||
__device__ __constant__ double mrt_V4=0.004594820384294068;
|
||||
__device__ __constant__ double mrt_V5=0.01587301587301587;
|
||||
__device__ __constant__ double mrt_V6=0.0555555555555555555555555;
|
||||
__device__ __constant__ double mrt_V7=0.02777777777777778;
|
||||
__device__ __constant__ double mrt_V8=0.08333333333333333;
|
||||
__device__ __constant__ double mrt_V9=0.003341687552213868;
|
||||
__device__ __constant__ double mrt_V10=0.003968253968253968;
|
||||
__device__ __constant__ double mrt_V11=0.01388888888888889;
|
||||
__device__ __constant__ double mrt_V12=0.04166666666666666;
|
||||
|
||||
__global__ void dvc_ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz)
|
||||
{
|
||||
//int i,j,k;
|
||||
@@ -541,7 +556,7 @@ __global__ void dvc_ColorCollide( char *ID, double *disteven, double *distodd,
|
||||
}
|
||||
|
||||
__global__ void
|
||||
__launch_bounds__(512,2)
|
||||
__launch_bounds__(256,1)
|
||||
dvc_ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad,
|
||||
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
|
||||
double alpha, double beta, double Fx, double Fy, double Fz)
|
||||
@@ -1257,7 +1272,8 @@ __global__ void dvc_ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny
|
||||
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,
|
||||
__global__ void
|
||||
__launch_bounds__(256,1) dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,
|
||||
double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
|
||||
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
|
||||
int ijk,nn,n;
|
||||
@@ -1273,19 +1289,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
|
||||
double ux,uy,uz;
|
||||
double phi,tau,rho0,rlx_setA,rlx_setB;
|
||||
|
||||
const double mrt_V1=0.05263157894736842;
|
||||
const double mrt_V2=0.012531328320802;
|
||||
const double mrt_V3=0.04761904761904762;
|
||||
const double mrt_V4=0.004594820384294068;
|
||||
const double mrt_V5=0.01587301587301587;
|
||||
const double mrt_V6=0.0555555555555555555555555;
|
||||
const double mrt_V7=0.02777777777777778;
|
||||
const double mrt_V8=0.08333333333333333;
|
||||
const double mrt_V9=0.003341687552213868;
|
||||
const double mrt_V10=0.003968253968253968;
|
||||
const double mrt_V11=0.01388888888888889;
|
||||
const double mrt_V12=0.04166666666666666;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
@@ -1295,9 +1298,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
|
||||
// read the component number densities
|
||||
nA = Den[n];
|
||||
nB = Den[Np + n];
|
||||
nAB = 1.0/(nA+nB);
|
||||
|
||||
// compute phase indicator field
|
||||
phi=(nA-nB)/(nA+nB);
|
||||
phi=(nA-nB)*nAB;
|
||||
|
||||
// local density
|
||||
rho0=rhoA + 0.5*(1.0-phi)*(rhoB-rhoA);
|
||||
@@ -1372,11 +1376,11 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
|
||||
|
||||
//...........Normalize the Color Gradient.................................
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
double ColorMag = C;
|
||||
if (C==0.0) ColorMag=1.0;
|
||||
nx = nx/ColorMag;
|
||||
ny = ny/ColorMag;
|
||||
nz = nz/ColorMag;
|
||||
double iColorMag = 1.0/C;
|
||||
if (C==0.0) iColorMag=1.0;
|
||||
nx = nx*iColorMag;
|
||||
ny = ny*iColorMag;
|
||||
nz = nz*iColorMag;
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
@@ -1651,19 +1655,20 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
|
||||
//........................................................................
|
||||
//..............carry out relaxation process..............................
|
||||
//..........Toelke, Fruediger et. al. 2006................................
|
||||
double irho0 = 1.0/rho0;
|
||||
if (C == 0.0) nx = ny = nz = 0.0;
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2);
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)*irho0 - 11*rho) -19*alpha*C - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)*irho0)- m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
|
||||
m10 = m10 + rlx_setA*( - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
|
||||
m12 = m12 + rlx_setA*( - m12);
|
||||
m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13);
|
||||
m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14);
|
||||
m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15);
|
||||
m13 = m13 + rlx_setA*( (jx*jy*irho0) + 0.5*alpha*C*nx*ny - m13);
|
||||
m14 = m14 + rlx_setA*( (jy*jz*irho0) + 0.5*alpha*C*ny*nz - m14);
|
||||
m15 = m15 + rlx_setA*( (jx*jz*irho0) + 0.5*alpha*C*nx*nz - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
@@ -1776,9 +1781,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
|
||||
//........................................................................
|
||||
|
||||
// write the velocity
|
||||
ux = jx / rho0;
|
||||
uy = jy / rho0;
|
||||
uz = jz / rho0;
|
||||
ux = jx*irho0;
|
||||
uy = jy*irho0;
|
||||
uz = jz*irho0;
|
||||
Velocity[n] = ux;
|
||||
Velocity[Np+n] = uy;
|
||||
Velocity[2*Np+n] = uz;
|
||||
@@ -1786,7 +1791,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
|
||||
// Instantiate mass transport distributions
|
||||
// Stationary value - distribution 0
|
||||
|
||||
nAB = 1.0/(nA+nB);
|
||||
Aq[n] = 0.3333333333333333*nA;
|
||||
Bq[n] = 0.3333333333333333*nB;
|
||||
|
||||
@@ -1839,8 +1843,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den,
|
||||
__global__ void
|
||||
__launch_bounds__(256,1) dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den,
|
||||
double *Phi, double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
|
||||
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
|
||||
|
||||
@@ -1861,19 +1865,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
|
||||
double ux,uy,uz;
|
||||
double phi,tau,rho0,rlx_setA,rlx_setB;
|
||||
|
||||
const double mrt_V1=0.05263157894736842;
|
||||
const double mrt_V2=0.012531328320802;
|
||||
const double mrt_V3=0.04761904761904762;
|
||||
const double mrt_V4=0.004594820384294068;
|
||||
const double mrt_V5=0.01587301587301587;
|
||||
const double mrt_V6=0.0555555555555555555555555;
|
||||
const double mrt_V7=0.02777777777777778;
|
||||
const double mrt_V8=0.08333333333333333;
|
||||
const double mrt_V9=0.003341687552213868;
|
||||
const double mrt_V10=0.003968253968253968;
|
||||
const double mrt_V11=0.01388888888888889;
|
||||
const double mrt_V12=0.04166666666666666;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
@@ -1882,9 +1873,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
|
||||
// read the component number densities
|
||||
nA = Den[n];
|
||||
nB = Den[Np + n];
|
||||
nAB = 1.0/(nA+nB);
|
||||
|
||||
// compute phase indicator field
|
||||
phi=(nA-nB)/(nA+nB);
|
||||
phi=(nA-nB)*nAB;
|
||||
|
||||
// local density
|
||||
rho0=rhoA + 0.5*(1.0-phi)*(rhoB-rhoA);
|
||||
@@ -1959,11 +1951,11 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
|
||||
|
||||
//...........Normalize the Color Gradient.................................
|
||||
C = sqrt(nx*nx+ny*ny+nz*nz);
|
||||
double ColorMag = C;
|
||||
if (C==0.0) ColorMag=1.0;
|
||||
nx = nx/ColorMag;
|
||||
ny = ny/ColorMag;
|
||||
nz = nz/ColorMag;
|
||||
double iColorMag = 1.0/C;
|
||||
if (C==0.0) iColorMag=1.0;
|
||||
nx = nx*iColorMag;
|
||||
ny = ny*iColorMag;
|
||||
nz = nz*iColorMag;
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
@@ -2290,18 +2282,19 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
|
||||
//..............carry out relaxation process..............................
|
||||
//..........Toelke, Fruediger et. al. 2006................................
|
||||
if (C == 0.0) nx = ny = nz = 0.0;
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2);
|
||||
double irho0=1.0/rho0;
|
||||
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)*irho0 - 11*rho) -19*alpha*C - m1);
|
||||
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)*irho0)- m2);
|
||||
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
|
||||
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
|
||||
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
|
||||
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
|
||||
m10 = m10 + rlx_setA*( - m10);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
|
||||
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
|
||||
m12 = m12 + rlx_setA*( - m12);
|
||||
m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13);
|
||||
m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14);
|
||||
m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15);
|
||||
m13 = m13 + rlx_setA*( (jx*jy*irho0) + 0.5*alpha*C*nx*ny - m13);
|
||||
m14 = m14 + rlx_setA*( (jy*jz*irho0) + 0.5*alpha*C*ny*nz - m14);
|
||||
m15 = m15 + rlx_setA*( (jx*jz*irho0) + 0.5*alpha*C*nx*nz - m15);
|
||||
m16 = m16 + rlx_setB*( - m16);
|
||||
m17 = m17 + rlx_setB*( - m17);
|
||||
m18 = m18 + rlx_setB*( - m18);
|
||||
@@ -2426,16 +2419,15 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
|
||||
dist[nread] = fq;
|
||||
|
||||
// write the velocity
|
||||
ux = jx / rho0;
|
||||
uy = jy / rho0;
|
||||
uz = jz / rho0;
|
||||
ux = jx*irho0;
|
||||
uy = jy*irho0;
|
||||
uz = jz*irho0;
|
||||
Velocity[n] = ux;
|
||||
Velocity[Np+n] = uy;
|
||||
Velocity[2*Np+n] = uz;
|
||||
|
||||
// Instantiate mass transport distributions
|
||||
// Stationary value - distribution 0
|
||||
nAB = 1.0/(nA+nB);
|
||||
Aq[n] = 0.3333333333333333*nA;
|
||||
Bq[n] = 0.3333333333333333*nB;
|
||||
|
||||
@@ -3677,7 +3669,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_ColorMass(int *neighborList, double *Aq,
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq,
|
||||
__global__ void
|
||||
__launch_bounds__(256,1) dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq,
|
||||
double *Den, double *Phi, int start, int finish, int Np){
|
||||
int idx,n,nread;
|
||||
double fq,nA,nB;
|
||||
@@ -3747,7 +3740,8 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, d
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi,
|
||||
__global__ void
|
||||
__launch_bounds__(256,1) dvc_ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi,
|
||||
int start, int finish, int Np){
|
||||
int idx,n;
|
||||
double fq,nA,nB;
|
||||
@@ -19,7 +19,7 @@
|
||||
#include "hip/hip_cooperative_groups.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
#define NTHREADS 512
|
||||
|
||||
/*
|
||||
1. constants that are known at compile time should be defined using preprocessor macros (e.g. #define) or via C/C++ const variables at global/file scope.
|
||||
@@ -321,10 +321,10 @@ __global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *distev
|
||||
}
|
||||
}
|
||||
|
||||
//__launch_bounds__(512,4)
|
||||
//__launch_bounds__(512,1)
|
||||
|
||||
__global__ void
|
||||
dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
|
||||
dvc_ScaLBL_AAodd_Compact( int *d_neighborList, double *dist, int Np) {
|
||||
|
||||
int n;
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
@@ -463,7 +463,8 @@ dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
|
||||
}
|
||||
|
||||
|
||||
__global__ void
|
||||
__global__ void
|
||||
__launch_bounds__(512,1)
|
||||
dvc_ScaLBL_AAodd_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) {
|
||||
|
||||
int n;
|
||||
@@ -932,7 +933,8 @@ dvc_ScaLBL_AAodd_MRT(int *neighborList, double *dist, int start, int finish, int
|
||||
|
||||
|
||||
//__launch_bounds__(512,1)
|
||||
__global__ void
|
||||
__global__ void
|
||||
__launch_bounds__(512,1)
|
||||
dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) {
|
||||
|
||||
int n;
|
||||
@@ -1353,9 +1355,9 @@ dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_se
|
||||
}
|
||||
}
|
||||
|
||||
//__launch_bounds__(512,4)
|
||||
//__launch_bounds__(512,1)
|
||||
|
||||
__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *dist, int Np) {
|
||||
__global__ void dvc_ScaLBL_AAeven_Compact( double *dist, int Np) {
|
||||
|
||||
int n;
|
||||
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
|
||||
@@ -2374,12 +2376,12 @@ __global__ void dvc_ScaLBL_D3Q19_Init_Simple(char *ID, double *f_even, double *f
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Pack <<<GRID,512 >>>(q, list, start, count, sendbuf, dist, N);
|
||||
dvc_ScaLBL_D3Q19_Pack <<<NBLOCKS,NTHREADS >>>(q, list, start, count, sendbuf, dist, N);
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q19_Unpack <<<GRID,512 >>>(q, list, start, count, recvbuf, dist, N);
|
||||
dvc_ScaLBL_D3Q19_Unpack <<<NBLOCKS,NTHREADS >>>(q, list, start, count, recvbuf, dist, N);
|
||||
}
|
||||
//*************************************************************************
|
||||
|
||||
@@ -2423,19 +2425,17 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d
|
||||
printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) {
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np) {
|
||||
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAeven_Compact, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_dist, Np);
|
||||
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>( d_dist, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) {
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np) {
|
||||
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAodd_Compact, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(ID,d_neighborList, d_dist,Np);
|
||||
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(d_neighborList, d_dist,Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err));
|
||||
567
hip/D3Q7BC.cu
567
hip/D3Q7BC.cu
@@ -1,567 +0,0 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 560
|
||||
#define NTHREADS 128
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = value_q + value_b;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_b_label,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_b_label = BoundaryLabel[ib];//get boundary label (i.e. type of BC) from a solid site
|
||||
value_q = dist[iq];
|
||||
if (value_b_label==1){//Dirichlet BC
|
||||
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
|
||||
}
|
||||
if (value_b_label==2){//Neumann BC
|
||||
dist[iq] = value_q + value_b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vin;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vout;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
|
||||
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (cudaSuccess != err){
|
||||
printf("hip error in ScaLBL_Solid_DirichletAndNeumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
917
hip/D3Q7BC.hip
Normal file
917
hip/D3Q7BC.hip
Normal file
@@ -0,0 +1,917 @@
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
#define CHECK_ERROR(KERNEL) \
|
||||
do { \
|
||||
auto err = hipGetLastError(); \
|
||||
if ( hipSuccess != err ){ \
|
||||
auto errString = hipGetErrorString(err); \
|
||||
printf("error in %s (kernel): %s \n",KERNEL,errString); \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_q = dist[iq];
|
||||
dist[iq] = value_q + value_b;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
|
||||
{
|
||||
|
||||
int idx;
|
||||
int iq,ib;
|
||||
double value_b,value_b_label,value_q;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
value_b = BoundaryValue[ib];//get boundary value from a solid site
|
||||
value_b_label = BoundaryLabel[ib];//get boundary label (i.e. type of BC) from a solid site
|
||||
value_q = dist[iq];
|
||||
if (value_b_label==1){//Dirichlet BC
|
||||
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
|
||||
}
|
||||
if (value_b_label==2){//Neumann BC
|
||||
dist[iq] = value_q + value_b;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
|
||||
double epsilon_LB, double tau, double rho0,double den_scale, double h, double time_conv,
|
||||
int *BounceBackDist_list, int *BounceBackSolid_list, int *FluidBoundary_list,
|
||||
double *lattice_weight, float *lattice_cx, float *lattice_cy, float *lattice_cz,
|
||||
int count, int Np)
|
||||
{
|
||||
int idx;
|
||||
int iq,ib,ifluidBC;
|
||||
double value_b,value_q;
|
||||
double Ex,Ey,Ez;
|
||||
double Etx,Ety,Etz;//tangential part of electric field
|
||||
double E_mag_normal;
|
||||
double nsx,nsy,nsz;//unit normal solid gradient
|
||||
double ubx,uby,ubz;//slipping velocity at fluid boundary nodes
|
||||
float cx,cy,cz;//lattice velocity (D3Q19)
|
||||
double LB_weight;//lattice weighting coefficient (D3Q19)
|
||||
double cs2_inv = 3.0;//inverse of cs^2 for D3Q19
|
||||
double nu_LB = (tau-0.5)/cs2_inv;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
iq = BounceBackDist_list[idx];
|
||||
ib = BounceBackSolid_list[idx];
|
||||
ifluidBC = FluidBoundary_list[idx];
|
||||
value_b = zeta_potential[ib];//get zeta potential from a solid site
|
||||
value_q = dist[iq];
|
||||
|
||||
//Load electric field and compute its tangential componet
|
||||
Ex = ElectricField[ifluidBC+0*Np];
|
||||
Ey = ElectricField[ifluidBC+1*Np];
|
||||
Ez = ElectricField[ifluidBC+2*Np];
|
||||
nsx = SolidGrad[ifluidBC+0*Np];
|
||||
nsy = SolidGrad[ifluidBC+1*Np];
|
||||
nsz = SolidGrad[ifluidBC+2*Np];
|
||||
E_mag_normal = Ex*nsx+Ey*nsy+Ez*nsz;//magnitude of electric field in the direction normal to solid nodes
|
||||
//compute tangential electric field
|
||||
Etx = Ex - E_mag_normal*nsx;
|
||||
Ety = Ey - E_mag_normal*nsy;
|
||||
Etz = Ez - E_mag_normal*nsz;
|
||||
ubx = -epsilon_LB*value_b*Etx/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
|
||||
uby = -epsilon_LB*value_b*Ety/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
|
||||
ubz = -epsilon_LB*value_b*Etz/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
|
||||
|
||||
//compute bounce-back distribution
|
||||
LB_weight = lattice_weight[idx];
|
||||
cx = lattice_cx[idx];
|
||||
cy = lattice_cy[idx];
|
||||
cz = lattice_cz[idx];
|
||||
dist[iq] = value_q - 2.0*LB_weight*rho0*cs2_inv*(cx*ubx+cy*uby+cz*ubz);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Vin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Vout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vin;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
|
||||
{
|
||||
int idx,n,nm;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
nm = Map[n];
|
||||
Psi[nm] = Vout;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
//...................................................
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
//...................................................
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
f5 = Cin - (f0+f1+f2+f3+f4+f6);
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
|
||||
{
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
f6 = Cout - (f0+f1+f2+f3+f4+f5);
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*(f6+uz*fsum_partial))/(1.0-0.5/tau)/(1.0-uz);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*(f5-uz*fsum_partial))/(1.0-0.5/tau)/(1.0+uz);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*(f6+uz*fsum_partial))/(1.0-0.5/tau)/(1.0-uz);
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*(f5-uz*fsum_partial))/(1.0-0.5/tau)/(1.0+uz);
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
double uEPz;//electrochemical induced velocity
|
||||
double Ez;//electrical field
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
Ez = ElectricField_Z[n];
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau+0.5*uz/tau+uEPz);
|
||||
dist[6*Np+n] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx,n;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
double uEPz;//electrochemical induced velocity
|
||||
double Ez;//electrical field
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
Ez = ElectricField_Z[n];
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau-0.5*uz/tau-uEPz);
|
||||
dist[5*Np+n] = f6;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr5;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
double uEPz;//electrochemical induced velocity
|
||||
double Ez;//electrical field
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+5*Np];
|
||||
f6 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f6;
|
||||
uz = VelocityZ[n];
|
||||
Ez = ElectricField_Z[n];
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
//...................................................
|
||||
f5 =(FluxIn+(1.0-0.5/tau)*f6-(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau+0.5*uz/tau+uEPz);
|
||||
|
||||
// Unknown distributions
|
||||
nr5 = d_neighborList[n+4*Np];
|
||||
dist[nr5] = f5;
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np)
|
||||
{
|
||||
//NOTE: FluxIn is the inward flux
|
||||
int idx, n;
|
||||
int nread,nr6;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double fsum_partial;
|
||||
double uz;
|
||||
double uEPz;//electrochemical induced velocity
|
||||
double Ez;//electrical field
|
||||
idx = blockIdx.x*blockDim.x + threadIdx.x;
|
||||
if (idx < count){
|
||||
n = list[idx];
|
||||
f0 = dist[n];
|
||||
|
||||
nread = d_neighborList[n];
|
||||
f1 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+2*Np];
|
||||
f3 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+4*Np];
|
||||
f5 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+Np];
|
||||
f2 = dist[nread];
|
||||
|
||||
nread = d_neighborList[n+3*Np];
|
||||
f4 = dist[nread];
|
||||
|
||||
fsum_partial = f0+f1+f2+f3+f4+f5;
|
||||
uz = VelocityZ[n];
|
||||
Ez = ElectricField_Z[n];
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
//...................................................
|
||||
f6 =(FluxIn+(1.0-0.5/tau)*f5+(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau-0.5*uz/tau-uEPz);
|
||||
|
||||
// unknown distributions
|
||||
nr6 = d_neighborList[n+5*Np];
|
||||
dist[nr6] = f6;
|
||||
}
|
||||
}
|
||||
//*************************************************************************
|
||||
|
||||
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
CHECK_ERROR("ScaLBL_Solid_Dirichlet_D3Q7");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
CHECK_ERROR("ScaLBL_Solid_Neumann_D3Q7");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
|
||||
CHECK_ERROR("ScaLBL_Solid_DirichletAndNeumann_D3Q7");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
|
||||
double epsilon_LB, double tau, double rho0,double den_scale, double h, double time_conv,
|
||||
int *BounceBackDist_list, int *BounceBackSolid_list, int *FluidBoundary_list,
|
||||
double *lattice_weight, float *lattice_cx, float *lattice_cy, float *lattice_cz,
|
||||
int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Solid_SlippingVelocityBC_D3Q19<<<GRID,512>>>(dist, zeta_potential, ElectricField, SolidGrad,
|
||||
epsilon_LB, tau, rho0, den_scale, h, time_conv,
|
||||
BounceBackDist_list, BounceBackSolid_list, FluidBoundary_list,
|
||||
lattice_weight, lattice_cx, lattice_cy, lattice_cz,
|
||||
count, Np);
|
||||
CHECK_ERROR("ScaLBL_Solid_SlippingVelocityBC_D3Q19");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
|
||||
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
|
||||
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z");
|
||||
}
|
||||
//------------Diff-----------------
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z");
|
||||
}
|
||||
//----------DiffAdvc-------------
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z");
|
||||
}
|
||||
//----------DiffAdvcElec-------------
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z");
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
|
||||
double Di, double zi, double Vt, int count, int Np){
|
||||
int GRID = count / 512 + 1;
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
|
||||
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z");
|
||||
}
|
||||
//-------------------------------
|
||||
@@ -2726,10 +2726,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined(int *Map, double *
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField_alt(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
|
||||
double rhoA, double rhoB, int start, int finish, int Np){
|
||||
|
||||
int idx,nread;
|
||||
int n,idx,nread;
|
||||
double fq,phi;
|
||||
|
||||
// for (int n=start; n<finish; n++){
|
||||
@@ -2787,7 +2787,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList,
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi,
|
||||
double rhoA, double rhoB, int start, int finish, int Np){
|
||||
int idx;
|
||||
int n,idx;
|
||||
double fq,phi;
|
||||
// for (int n=start; n<finish; n++){
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
@@ -2833,7 +2833,6 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double
|
||||
idx = Map[n];
|
||||
Phi[idx] = phi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
|
||||
@@ -3396,7 +3395,7 @@ extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, doubl
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel,
|
||||
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np)
|
||||
{
|
||||
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<<NBLOCKS,NTHREADS >>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel,
|
||||
rhoA, rhoB, tauM, W, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
@@ -3406,9 +3405,9 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel,
|
||||
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){
|
||||
|
||||
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1);
|
||||
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np)
|
||||
{
|
||||
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
@@ -3419,7 +3418,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, dou
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){
|
||||
|
||||
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q7_ComputePhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
@@ -3432,7 +3431,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou
|
||||
double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz,
|
||||
int strideY, int strideZ, int start, int finish, int Np){
|
||||
|
||||
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel<<<NBLOCKS,NTHREADS >>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
|
||||
rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
@@ -3445,7 +3444,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double
|
||||
double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz,
|
||||
int strideY, int strideZ, int start, int finish, int Np){
|
||||
|
||||
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel<<<NBLOCKS,NTHREADS >>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
|
||||
rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
@@ -3458,7 +3457,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad,
|
||||
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
|
||||
int strideY, int strideZ, int start, int finish, int Np){
|
||||
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined, cudaFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined<<<NBLOCKS,NTHREADS >>>(Map, dist, Den, hq, Phi, mu_phi, Vel, Pressure, ColorGrad,
|
||||
rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
@@ -3471,7 +3470,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined(int *neighborList, int
|
||||
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
|
||||
int strideY, int strideZ, int start, int finish, int Np){
|
||||
|
||||
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined, cudaFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined<<<NBLOCKS,NTHREADS >>>(neighborList, Map, dist, hq, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
|
||||
rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
@@ -3482,7 +3481,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined(int *neighborList, int
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
|
||||
double rhoA, double rhoB, int start, int finish, int Np){
|
||||
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField, cudaFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField<<<NBLOCKS,NTHREADS >>>( neighborList, Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
@@ -3492,7 +3491,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){
|
||||
|
||||
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField, cudaFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
@@ -3503,7 +3502,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq,
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
|
||||
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){
|
||||
|
||||
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
|
||||
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<<NBLOCKS,NTHREADS >>>(neighborList, dist, Vel, Pressure,
|
||||
tau, rho0, Fx, Fy, Fz, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
@@ -3513,9 +3512,9 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborLis
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure,
|
||||
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){
|
||||
|
||||
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
|
||||
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np)
|
||||
{
|
||||
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
|
||||
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<<NBLOCKS,NTHREADS >>>(dist, Vel, Pressure,
|
||||
tau, rho0, Fx, Fy, Fz, start, finish, Np);
|
||||
hipError_t err = hipGetLastError();
|
||||
@@ -1,5 +1,6 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
@@ -1609,7 +1610,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor_CP(int *neighborList, int
|
||||
Fcpy = ny;
|
||||
Fcpz = nz;
|
||||
double Fcp_mag=sqrt(Fcpx*Fcpx+Fcpy*Fcpy+Fcpz*Fcpz);
|
||||
if (Fcp_mag==0.0); Fcpx=Fcpy=Fcpz=0.0;
|
||||
if (Fcp_mag==0.0) {
|
||||
Fcpx=Fcpy=Fcpz=0.0;
|
||||
}
|
||||
//NOTE for open node (porosity=1.0),Fcp=0.0
|
||||
Fcpx *= alpha*W*(1.0-porosity)/sqrt(perm);
|
||||
Fcpy *= alpha*W*(1.0-porosity)/sqrt(perm);
|
||||
@@ -2396,7 +2399,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor_CP(int *Map, double *dis
|
||||
Fcpy = ny;
|
||||
Fcpz = nz;
|
||||
double Fcp_mag=sqrt(Fcpx*Fcpx+Fcpy*Fcpy+Fcpz*Fcpz);
|
||||
if (Fcp_mag==0.0); Fcpx=Fcpy=Fcpz=0.0;
|
||||
if (Fcp_mag==0.0) {
|
||||
Fcpx=Fcpy=Fcpz=0.0;
|
||||
}
|
||||
//NOTE for open node (porosity=1.0),Fcp=0.0
|
||||
Fcpx *= alpha*W*(1.0-porosity)/sqrt(perm);
|
||||
Fcpy *= alpha*W*(1.0-porosity)/sqrt(perm);
|
||||
422
hip/Ion.cu
422
hip/Ion.cu
@@ -1,422 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
int n,nread;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
int n;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
// q=2
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
// q=6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// compute diffusive flux
|
||||
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
|
||||
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
|
||||
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
|
||||
FluxDiffusive[n+0*Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n+1*Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n+2*Np] = flux_diffusive_z;
|
||||
FluxAdvective[n+0*Np] = ux*Ci;
|
||||
FluxAdvective[n+1*Np] = uy*Ci;
|
||||
FluxAdvective[n+2*Np] = uz*Ci;
|
||||
FluxElectrical[n+0*Np] = uEPx*Ci;
|
||||
FluxElectrical[n+1*Np] = uEPy*Ci;
|
||||
FluxElectrical[n+2*Np] = uEPz*Ci;
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q=2
|
||||
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ci=Den[n];
|
||||
Ex=ElectricField[n+0*Np];
|
||||
Ey=ElectricField[n+1*Np];
|
||||
Ez=ElectricField[n+2*Np];
|
||||
ux=Velocity[n+0*Np];
|
||||
uy=Velocity[n+1*Np];
|
||||
uz=Velocity[n+2*Np];
|
||||
uEPx=zi*Di/Vt*Ex;
|
||||
uEPy=zi*Di/Vt*Ey;
|
||||
uEPz=zi*Di/Vt*Ez;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
// compute diffusive flux
|
||||
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
|
||||
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
|
||||
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
|
||||
FluxDiffusive[n+0*Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n+1*Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n+2*Np] = flux_diffusive_z;
|
||||
FluxAdvective[n+0*Np] = ux*Ci;
|
||||
FluxAdvective[n+1*Np] = uy*Ci;
|
||||
FluxAdvective[n+2*Np] = uz*Ci;
|
||||
FluxElectrical[n+0*Np] = uEPx*Ci;
|
||||
FluxElectrical[n+1*Np] = uEPy*Ci;
|
||||
FluxElectrical[n+2*Np] = uEPz*Ci;
|
||||
|
||||
// q=0
|
||||
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
|
||||
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
|
||||
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q=2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
|
||||
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
|
||||
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
|
||||
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
|
||||
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
|
||||
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
int n;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
Den[n] = DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
int n;
|
||||
double DenInit;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
DenInit = Den[n];
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double Ci;//ion concentration of species i
|
||||
double CD;//charge density
|
||||
double CD_tmp;
|
||||
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
Ci = Den[n+ion_component*Np];
|
||||
CD = ChargeDensity[n];
|
||||
CD_tmp = F*IonValence*Ci;
|
||||
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
731
hip/Ion.hip
Normal file
731
hip/Ion.hip
Normal file
@@ -0,0 +1,731 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
|
||||
double *recvbuf, double *dist, int N) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
for (link=0; link<linkCount; link++){
|
||||
|
||||
idx = links[start+link];
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[start + idx];
|
||||
// unpack the distribution to the proper location
|
||||
if (!(n < 0))
|
||||
dist[q * N + n] = recvbuf[start + idx];
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
|
||||
int linkCount, double *recvbuf, double *dist, int N){
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
double alpha;
|
||||
for (link=offset; link<linkCount; link++){
|
||||
|
||||
idx = list[start+link];
|
||||
// Get the value from the list -- note that n is the index is from the send (non-local) process
|
||||
n = list[start + idx];
|
||||
alpha = coef[start + idx];
|
||||
// unpack the distribution to the proper location
|
||||
if (!(n < 0))
|
||||
dist[q * N + n] = alpha*recvbuf[start + idx];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
|
||||
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int memLinks, int Nx, int Ny, int Nz, int Np){
|
||||
|
||||
int link,iq,ip,nq,np,nqm,npm;
|
||||
double aq, ap, membranePotential;
|
||||
/* Interior Links */
|
||||
|
||||
int S = memLinks/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (link < memLinks) {
|
||||
|
||||
// inside //outside
|
||||
aq = MassFractionIn; ap = MassFractionOut;
|
||||
iq = membrane[2*link]; ip = membrane[2*link+1];
|
||||
nq = iq%Np; np = ip%Np;
|
||||
nqm = Map[nq]; npm = Map[np]; // strided layout
|
||||
|
||||
/* membrane potential for this link */
|
||||
membranePotential = Psi[nqm] - Psi[npm];
|
||||
if (membranePotential > Threshold){
|
||||
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
|
||||
}
|
||||
|
||||
/* Save the mass transfer coefficients */
|
||||
coef[2*link] = aq; coef[2*link+1] = ap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
||||
const int Cqx, const int Cqy, int const Cqz,
|
||||
int *Map, double *Distance, double *Psi, double Threshold,
|
||||
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
|
||||
const int N, const int Nx, const int Ny, const int Nz) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link, nqm, npm, i, j, k;
|
||||
double distanceLocal, distanceNonlocal;
|
||||
double psiLocal, psiNonlocal, membranePotential;
|
||||
double ap,aq; // coefficient
|
||||
|
||||
/* second enforce custom rule for membrane links */
|
||||
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks;
|
||||
|
||||
if (link < count) {
|
||||
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
// get the index in strided layout
|
||||
nqm = Map[n];
|
||||
distanceLocal = Distance[nqm];
|
||||
psiLocal = Psi[nqm];
|
||||
|
||||
// Get the 3-D indices from the send process
|
||||
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
|
||||
// Streaming link the non-local distribution
|
||||
i -= Cqx; j -= Cqy; k -= Cqz;
|
||||
npm = k*Nx*Ny + j*Nx + i;
|
||||
distanceNonlocal = Distance[npm];
|
||||
psiNonlocal = Psi[npm];
|
||||
|
||||
membranePotential = psiLocal - psiNonlocal;
|
||||
aq = MassFractionIn;
|
||||
ap = MassFractionOut;
|
||||
|
||||
/* link is inside membrane */
|
||||
if (distanceLocal > 0.0){
|
||||
if (membranePotential < Threshold*(-1.0)){
|
||||
ap = MassFractionIn;
|
||||
aq = MassFractionOut;
|
||||
}
|
||||
else {
|
||||
ap = ThresholdMassFractionIn;
|
||||
aq = ThresholdMassFractionOut;
|
||||
}
|
||||
}
|
||||
else if (membranePotential > Threshold){
|
||||
aq = ThresholdMassFractionIn;
|
||||
ap = ThresholdMassFractionOut;
|
||||
}
|
||||
|
||||
// update link based on mass transfer coefficients
|
||||
coef[2*(link-nlinks)] = aq;
|
||||
coef[2*(link-nlinks)+1] = ap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
||||
double *recvbuf, double *dist, int N, double *coef) {
|
||||
//....................................................................................
|
||||
// Unack distribution from the recv buffer
|
||||
// Distribution q matche Cqx, Cqy, Cqz
|
||||
// swap rule means that the distributions in recvbuf are OPPOSITE of q
|
||||
// dist may be even or odd distributions stored by stream layout
|
||||
//....................................................................................
|
||||
int n, idx, link;
|
||||
double fq,fp,fqq,ap,aq; // coefficient
|
||||
|
||||
/* second enforce custom rule for membrane links */
|
||||
int S = count/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
|
||||
/* First unpack the regular links */
|
||||
if (link < nlinks) {
|
||||
// get the index for the recv list (deal with reordering of links)
|
||||
idx = d3q7_linkList[start+link];
|
||||
// get the distribution index
|
||||
n = d3q7_recvlist[start+idx];
|
||||
if (!(n < 0)){
|
||||
fp = recvbuf[start + idx];
|
||||
dist[q * N + n] = fp;
|
||||
}
|
||||
}
|
||||
// else if (link < count){
|
||||
// /* second enforce custom rule for membrane links */
|
||||
// // get the index for the recv list (deal with reordering of links)
|
||||
// idx = d3q7_linkList[link];
|
||||
// // get the distribution index
|
||||
// n = d3q7_recvlist[start+idx];
|
||||
// // update link based on mass transfer coefficients
|
||||
// if (!(n < 0)){
|
||||
// aq = coef[2*(link-nlinks)];
|
||||
// ap = coef[2*(link-nlinks)+1];
|
||||
// fq = dist[q * N + n];
|
||||
// fp = recvbuf[start + idx];
|
||||
// fqq = (1-aq)*fq+ap*fp;
|
||||
// dist[q * N + n] = fqq;
|
||||
// }
|
||||
// }
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
|
||||
double *dist, double *Den, int memLinks, int Np){
|
||||
int link,iq,ip,nq,np;
|
||||
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
|
||||
|
||||
int S = memLinks/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (link < memLinks){
|
||||
|
||||
// inside //outside
|
||||
aq = coef[2*link]; ap = coef[2*link+1];
|
||||
iq = membrane[2*link]; ip = membrane[2*link+1];
|
||||
nq = iq%Np; np = ip%Np;
|
||||
fq = dist[iq]; fp = dist[ip];
|
||||
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
|
||||
Cq = Den[nq]; Cp = Den[np];
|
||||
Cq += fqq - fq; Cp += fpp - fp;
|
||||
Den[nq] = Cq; Den[np] = Cp;
|
||||
dist[iq] = fqq; dist[ip] = fpp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
int n,nread;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
int n;
|
||||
double fq,Ci;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
Ci = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
Ci += fq;
|
||||
|
||||
Den[n]=Ci;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double X,Y,Z,factor_x,factor_y,factor_z;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
Ex = ElectricField[n + 0 * Np];
|
||||
Ey = ElectricField[n + 1 * Np];
|
||||
Ez = ElectricField[n + 2 * Np];
|
||||
ux = Velocity[n + 0 * Np];
|
||||
uy = Velocity[n + 1 * Np];
|
||||
uz = Velocity[n + 2 * Np];
|
||||
uEPx = zi * Di / Vt * Ex;
|
||||
uEPy = zi * Di / Vt * Ey;
|
||||
uEPz = zi * Di / Vt * Ez;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
// q=2
|
||||
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
// q=3
|
||||
nr3 = neighborList[n + 2 * Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
// q=4
|
||||
nr4 = neighborList[n + 3 * Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
// q=5
|
||||
nr5 = neighborList[n + 4 * Np];
|
||||
f5 = dist[nr5];
|
||||
// q=6
|
||||
nr6 = neighborList[n + 5 * Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// compute diffusive flux
|
||||
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
|
||||
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
|
||||
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
|
||||
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
|
||||
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
|
||||
FluxAdvective[n + 0 * Np] = ux * Ci;
|
||||
FluxAdvective[n + 1 * Np] = uy * Ci;
|
||||
FluxAdvective[n + 2 * Np] = uz * Ci;
|
||||
FluxElectrical[n + 0 * Np] = uEPx * Ci;
|
||||
FluxElectrical[n + 1 * Np] = uEPy * Ci;
|
||||
FluxElectrical[n + 2 * Np] = uEPz * Ci;
|
||||
|
||||
Den[n] = Ci;
|
||||
|
||||
/* use logistic function to prevent negative distributions*/
|
||||
X = 4.0 * (ux + uEPx);
|
||||
Y = 4.0 * (uy + uEPy);
|
||||
Z = 4.0 * (uz + uEPz);
|
||||
factor_x = X / sqrt(1 + X*X);
|
||||
factor_y = Y / sqrt(1 + Y*Y);
|
||||
factor_z = Z / sqrt(1 + Z*Z);
|
||||
|
||||
// q=0
|
||||
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
|
||||
|
||||
// q = 1
|
||||
dist[nr2] =
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
|
||||
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
|
||||
|
||||
// q=2
|
||||
dist[nr1] =
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
|
||||
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 3
|
||||
dist[nr4] =
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
|
||||
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 4
|
||||
dist[nr3] =
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
|
||||
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 5
|
||||
dist[nr6] =
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
|
||||
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
|
||||
// q = 6
|
||||
dist[nr5] =
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
int n;
|
||||
double Ci;
|
||||
double ux,uy,uz;
|
||||
double uEPx,uEPy,uEPz;//electrochemical induced velocity
|
||||
double Ex,Ey,Ez;//electrical field
|
||||
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double X,Y,Z,factor_x,factor_y,factor_z;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
//Ci = Den[n];
|
||||
Ex = ElectricField[n + 0 * Np];
|
||||
Ey = ElectricField[n + 1 * Np];
|
||||
Ez = ElectricField[n + 2 * Np];
|
||||
ux = Velocity[n + 0 * Np];
|
||||
uy = Velocity[n + 1 * Np];
|
||||
uz = Velocity[n + 2 * Np];
|
||||
uEPx = zi * Di / Vt * Ex;
|
||||
uEPy = zi * Di / Vt * Ey;
|
||||
uEPz = zi * Di / Vt * Ez;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
|
||||
// compute diffusive flux
|
||||
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
|
||||
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
|
||||
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
|
||||
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
|
||||
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
|
||||
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
|
||||
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
|
||||
FluxAdvective[n + 0 * Np] = ux * Ci;
|
||||
FluxAdvective[n + 1 * Np] = uy * Ci;
|
||||
FluxAdvective[n + 2 * Np] = uz * Ci;
|
||||
FluxElectrical[n + 0 * Np] = uEPx * Ci;
|
||||
FluxElectrical[n + 1 * Np] = uEPy * Ci;
|
||||
FluxElectrical[n + 2 * Np] = uEPz * Ci;
|
||||
|
||||
Den[n] = Ci;
|
||||
|
||||
/* use logistic function to prevent negative distributions*/
|
||||
X = 4.0 * (ux + uEPx);
|
||||
Y = 4.0 * (uy + uEPy);
|
||||
Z = 4.0 * (uz + uEPz);
|
||||
factor_x = X / sqrt(1 + X*X);
|
||||
factor_y = Y / sqrt(1 + Y*Y);
|
||||
factor_z = Z / sqrt(1 + Z*Z);
|
||||
|
||||
// q=0
|
||||
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
|
||||
|
||||
// q = 1
|
||||
dist[1 * Np + n] =
|
||||
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
|
||||
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
|
||||
|
||||
// q=2
|
||||
dist[2 * Np + n] =
|
||||
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
|
||||
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
|
||||
|
||||
// q = 3
|
||||
dist[3 * Np + n] =
|
||||
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
|
||||
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 4
|
||||
dist[4 * Np + n] =
|
||||
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
|
||||
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
|
||||
|
||||
// q = 5
|
||||
dist[5 * Np + n] =
|
||||
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
|
||||
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
|
||||
|
||||
// q = 6
|
||||
dist[6 * Np + n] =
|
||||
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
|
||||
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
int n;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
Den[n] = DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
int n;
|
||||
double DenInit;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
|
||||
if (n<Np) {
|
||||
DenInit = Den[n];
|
||||
dist[0*Np+n] = 0.25*DenInit;
|
||||
dist[1*Np+n] = 0.125*DenInit;
|
||||
dist[2*Np+n] = 0.125*DenInit;
|
||||
dist[3*Np+n] = 0.125*DenInit;
|
||||
dist[4*Np+n] = 0.125*DenInit;
|
||||
dist[5*Np+n] = 0.125*DenInit;
|
||||
dist[6*Np+n] = 0.125*DenInit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double Ci;//ion concentration of species i
|
||||
double CD;//charge density
|
||||
double CD_tmp;
|
||||
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
Ci = Den[n+ion_component*Np];
|
||||
CD = ChargeDensity[n];
|
||||
if (ion_component == 0) CD=0.0;
|
||||
CD_tmp = F*IonValence*Ci;
|
||||
ChargeDensity[n] = CD + CD_tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
|
||||
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
|
||||
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int memLinks, int Nx, int Ny, int Nz, int Np){
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef<<<NBLOCKS,NTHREADS >>>(membrane, Map, Distance, Psi, coef,
|
||||
Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
|
||||
memLinks, Nx, Ny, Nz, Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
|
||||
const int Cqx, const int Cqy, int const Cqz,
|
||||
int *Map, double *Distance, double *Psi, double Threshold,
|
||||
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
|
||||
const int N, const int Nx, const int Ny, const int Nz) {
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo<<<NBLOCKS,NTHREADS >>>(
|
||||
Cqx, Cqy, Cqz, Map, Distance, Psi, Threshold,
|
||||
MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
|
||||
d3q7_recvlist, d3q7_linkList, coef, start, nlinks, count, N, Nx, Ny, Nz);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
|
||||
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
|
||||
double *recvbuf, double *dist, int N, double *coef) {
|
||||
|
||||
int GRID = count / 1024 + 1;
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<GRID,1024 >>>(q, d3q7_recvlist, d3q7_linkList, start, nlinks, count,
|
||||
recvbuf, dist, N, coef) ;
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_Unpack: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
|
||||
double *dist, double *Den, int memLinks, int Np){
|
||||
|
||||
dvc_ScaLBL_D3Q7_Membrane_IonTransport<<<NBLOCKS,NTHREADS >>>(membrane, coef, dist, Den, memLinks, Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_IonTransport: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
@@ -19,8 +19,8 @@
|
||||
//*************************************************************************
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 560
|
||||
#define NTHREADS 128
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
|
||||
{
|
||||
@@ -122,8 +122,7 @@ __global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, do
|
||||
|
||||
//*************************************************************************
|
||||
__global__ void
|
||||
__launch_bounds__(512,2)
|
||||
D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz,
|
||||
__launch_bounds__(256,4) D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz,
|
||||
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
|
||||
{
|
||||
|
||||
@@ -65,13 +65,11 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gr
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
|
||||
{
|
||||
hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q19_MixedGradient<<<NBLOCKS,NTHREADS >>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
hipProfilerStop();
|
||||
}
|
||||
|
||||
332
hip/Poisson.cu
332
hip/Poisson.cu
@@ -1,332 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int nread;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
idx=Map[n];
|
||||
psi = Psi[idx];
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
idx=Map[n];
|
||||
psi = Psi[idx];
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
int ijk;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
ijk = Map[n];
|
||||
dist[0*Np+n] = 0.25*Psi[ijk];
|
||||
dist[1*Np+n] = 0.125*Psi[ijk];
|
||||
dist[2*Np+n] = 0.125*Psi[ijk];
|
||||
dist[3*Np+n] = 0.125*Psi[ijk];
|
||||
dist[4*Np+n] = 0.125*Psi[ijk];
|
||||
dist[5*Np+n] = 0.125*Psi[ijk];
|
||||
dist[6*Np+n] = 0.125*Psi[ijk];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//cudaProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//cudaProfilerStop();
|
||||
}
|
||||
705
hip/Poisson.hip
Normal file
705
hip/Poisson.hip
Normal file
@@ -0,0 +1,705 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include "hip/hip_runtime.h"
|
||||
|
||||
#define NBLOCKS 1024
|
||||
#define NTHREADS 256
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int nread;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
nread = neighborList[n];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
nread = neighborList[n+Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
nread = neighborList[n+2*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 4
|
||||
nread = neighborList[n+3*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
nread = neighborList[n+4*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
// q = 6
|
||||
nread = neighborList[n+5*Np];
|
||||
fq = dist[nread];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double fq;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
// q=0
|
||||
fq = dist[n];
|
||||
psi = fq;
|
||||
|
||||
// q=1
|
||||
fq = dist[2*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=2
|
||||
fq = dist[1*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=3
|
||||
fq = dist[4*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=4
|
||||
fq = dist[3*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=5
|
||||
fq = dist[6*Np+n];
|
||||
psi += fq;
|
||||
|
||||
// q=6
|
||||
fq = dist[5*Np+n];
|
||||
psi += fq;
|
||||
|
||||
idx=Map[n];
|
||||
Psi[idx] = psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
int nr1,nr2,nr3,nr4,nr5,nr6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
//rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
rho_e = Den_charge[n] / epsilon_LB;
|
||||
idx=Map[n];
|
||||
psi = Psi[idx];
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n+2*Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n+3*Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n+4*Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n+5*Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
double psi;//electric potential
|
||||
double Ex,Ey,Ez;//electric field
|
||||
double rho_e;//local charge density
|
||||
double f0,f1,f2,f3,f4,f5,f6;
|
||||
double rlx=1.0/tau;
|
||||
int idx;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = Den_charge[n] / epsilon_LB;
|
||||
// rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
idx=Map[n];
|
||||
|
||||
psi = Psi[idx];
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2*Np+n];
|
||||
f2 = dist[1*Np+n];
|
||||
f3 = dist[4*Np+n];
|
||||
f4 = dist[3*Np+n];
|
||||
f5 = dist[6*Np+n];
|
||||
f6 = dist[5*Np+n];
|
||||
|
||||
|
||||
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
|
||||
Ez = (f5-f6)*rlx*4.0;
|
||||
ElectricField[n+0*Np] = Ex;
|
||||
ElectricField[n+1*Np] = Ey;
|
||||
ElectricField[n+2*Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
|
||||
|
||||
// q = 1
|
||||
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 2
|
||||
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 3
|
||||
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 4
|
||||
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 5
|
||||
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
|
||||
// q = 6
|
||||
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
int n;
|
||||
int ijk;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
ijk = Map[n];
|
||||
dist[0*Np+n] = 0.25*Psi[ijk];
|
||||
dist[1*Np+n] = 0.125*Psi[ijk];
|
||||
dist[2*Np+n] = 0.125*Psi[ijk];
|
||||
dist[3*Np+n] = 0.125*Psi[ijk];
|
||||
dist[4*Np+n] = 0.125*Psi[ijk];
|
||||
dist[5*Np+n] = 0.125*Psi[ijk];
|
||||
dist[6*Np+n] = 0.125*Psi[ijk];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
||||
double *dist, double *Den_charge,
|
||||
double *Psi, double *ElectricField,
|
||||
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi; //electric potential
|
||||
double Ex, Ey, Ez; //electric field
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
|
||||
nr14, nr15, nr16, nr17, nr18;
|
||||
double error,sum_q;
|
||||
double rlx = 1.0 / tau;
|
||||
int idx;
|
||||
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
// q=0
|
||||
f0 = dist[n];
|
||||
// q=1
|
||||
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
|
||||
f1 = dist[nr1]; // reading the f1 data into register fq
|
||||
|
||||
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
|
||||
f2 = dist[nr2]; // reading the f2 data into register fq
|
||||
|
||||
// q=3
|
||||
nr3 = neighborList[n + 2 * Np]; // neighbor 4
|
||||
f3 = dist[nr3];
|
||||
|
||||
// q = 4
|
||||
nr4 = neighborList[n + 3 * Np]; // neighbor 3
|
||||
f4 = dist[nr4];
|
||||
|
||||
// q=5
|
||||
nr5 = neighborList[n + 4 * Np];
|
||||
f5 = dist[nr5];
|
||||
|
||||
// q = 6
|
||||
nr6 = neighborList[n + 5 * Np];
|
||||
f6 = dist[nr6];
|
||||
|
||||
// q=7
|
||||
nr7 = neighborList[n + 6 * Np];
|
||||
f7 = dist[nr7];
|
||||
|
||||
// q = 8
|
||||
nr8 = neighborList[n + 7 * Np];
|
||||
f8 = dist[nr8];
|
||||
|
||||
// q=9
|
||||
nr9 = neighborList[n + 8 * Np];
|
||||
f9 = dist[nr9];
|
||||
|
||||
// q = 10
|
||||
nr10 = neighborList[n + 9 * Np];
|
||||
f10 = dist[nr10];
|
||||
|
||||
// q=11
|
||||
nr11 = neighborList[n + 10 * Np];
|
||||
f11 = dist[nr11];
|
||||
|
||||
// q=12
|
||||
nr12 = neighborList[n + 11 * Np];
|
||||
f12 = dist[nr12];
|
||||
|
||||
// q=13
|
||||
nr13 = neighborList[n + 12 * Np];
|
||||
f13 = dist[nr13];
|
||||
|
||||
// q=14
|
||||
nr14 = neighborList[n + 13 * Np];
|
||||
f14 = dist[nr14];
|
||||
|
||||
// q=15
|
||||
nr15 = neighborList[n + 14 * Np];
|
||||
f15 = dist[nr15];
|
||||
|
||||
// q=16
|
||||
nr16 = neighborList[n + 15 * Np];
|
||||
f16 = dist[nr16];
|
||||
|
||||
// q=17
|
||||
//fq = dist[18*Np+n];
|
||||
nr17 = neighborList[n + 16 * Np];
|
||||
f17 = dist[nr17];
|
||||
|
||||
// q=18
|
||||
nr18 = neighborList[n + 17 * Np];
|
||||
f18 = dist[nr18];
|
||||
|
||||
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
||||
error = 8.0*(sum_q - f0) + rho_e;
|
||||
|
||||
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
||||
|
||||
idx = Map[n];
|
||||
Psi[idx] = psi;
|
||||
|
||||
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
||||
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
||||
ElectricField[n + 0 * Np] = Ex;
|
||||
ElectricField[n + 1 * Np] = Ey;
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
// q = 0
|
||||
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
|
||||
|
||||
// q = 1
|
||||
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
//........................................................................
|
||||
|
||||
// q = 7
|
||||
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 8
|
||||
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 9
|
||||
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 10
|
||||
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 11
|
||||
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 12
|
||||
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 13
|
||||
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q= 14
|
||||
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 15
|
||||
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 16
|
||||
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 17
|
||||
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
// q = 18
|
||||
dist[nr17] = W2*psi;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
||||
double *Den_charge, double *Psi,
|
||||
double *ElectricField, double *Error, double tau,
|
||||
double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
double psi; //electric potential
|
||||
double Ex, Ey, Ez; //electric field
|
||||
double rho_e; //local charge density
|
||||
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
|
||||
f16, f17, f18;
|
||||
double error,sum_q;
|
||||
double rlx = 1.0 / tau;
|
||||
int idx;
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
//Load data
|
||||
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
|
||||
//and thus the net space charge density is zero.
|
||||
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
|
||||
|
||||
f0 = dist[n];
|
||||
f1 = dist[2 * Np + n];
|
||||
f2 = dist[1 * Np + n];
|
||||
f3 = dist[4 * Np + n];
|
||||
f4 = dist[3 * Np + n];
|
||||
f5 = dist[6 * Np + n];
|
||||
f6 = dist[5 * Np + n];
|
||||
|
||||
f7 = dist[8 * Np + n];
|
||||
f8 = dist[7 * Np + n];
|
||||
f9 = dist[10 * Np + n];
|
||||
f10 = dist[9 * Np + n];
|
||||
f11 = dist[12 * Np + n];
|
||||
f12 = dist[11 * Np + n];
|
||||
f13 = dist[14 * Np + n];
|
||||
f14 = dist[13 * Np + n];
|
||||
f15 = dist[16 * Np + n];
|
||||
f16 = dist[15 * Np + n];
|
||||
f17 = dist[18 * Np + n];
|
||||
f18 = dist[17 * Np + n];
|
||||
|
||||
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
|
||||
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
|
||||
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
|
||||
ElectricField[n + 0 * Np] = Ex;
|
||||
ElectricField[n + 1 * Np] = Ey;
|
||||
ElectricField[n + 2 * Np] = Ez;
|
||||
|
||||
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
|
||||
error = 8.0*(sum_q - f0) + rho_e;
|
||||
|
||||
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
|
||||
|
||||
idx = Map[n];
|
||||
Psi[idx] = psi;
|
||||
|
||||
// q = 0
|
||||
dist[n] = W0*psi;//
|
||||
|
||||
// q = 1
|
||||
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 2
|
||||
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 3
|
||||
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 4
|
||||
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 5
|
||||
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
// q = 6
|
||||
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
|
||||
|
||||
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
|
||||
|
||||
//........................................................................
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void dvc_ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
|
||||
int start, int finish, int Np) {
|
||||
int n;
|
||||
int ijk;
|
||||
double W0 = 0.5;
|
||||
double W1 = 1.0/24.0;
|
||||
double W2 = 1.0/48.0;
|
||||
int S = Np/NBLOCKS/NTHREADS + 1;
|
||||
for (int s=0; s<S; s++){
|
||||
//........Get 1-D index for this thread....................
|
||||
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
|
||||
if (n<finish) {
|
||||
ijk = Map[n];
|
||||
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
|
||||
dist[1 * Np + n] = W1 * Psi[ijk];
|
||||
dist[2 * Np + n] = W1 * Psi[ijk];
|
||||
dist[3 * Np + n] = W1 * Psi[ijk];
|
||||
dist[4 * Np + n] = W1 * Psi[ijk];
|
||||
dist[5 * Np + n] = W1 * Psi[ijk];
|
||||
dist[6 * Np + n] = W1 * Psi[ijk];
|
||||
dist[7 * Np + n] = W2* Psi[ijk];
|
||||
dist[8 * Np + n] = W2* Psi[ijk];
|
||||
dist[9 * Np + n] = W2* Psi[ijk];
|
||||
dist[10 * Np + n] = W2* Psi[ijk];
|
||||
dist[11 * Np + n] = W2* Psi[ijk];
|
||||
dist[12 * Np + n] = W2* Psi[ijk];
|
||||
dist[13 * Np + n] = W2* Psi[ijk];
|
||||
dist[14 * Np + n] = W2* Psi[ijk];
|
||||
dist[15 * Np + n] = W2* Psi[ijk];
|
||||
dist[16 * Np + n] = W2* Psi[ijk];
|
||||
dist[17 * Np + n] = W2* Psi[ijk];
|
||||
dist[18 * Np + n] = W2* Psi[ijk];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
|
||||
double *dist, double *Den_charge,
|
||||
double *Psi, double *ElectricField,
|
||||
double tau, double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
|
||||
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_Poisson, hipFuncCachePreferL1);
|
||||
|
||||
dvc_ScaLBL_D3Q19_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList, Map,
|
||||
dist, Den_charge, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("Hip error in dvc_ScaLBL_D3Q19_AAodd_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
|
||||
double *Den_charge, double *Psi,
|
||||
double *ElectricField, double *Error, double tau,
|
||||
double epsilon_LB, bool UseSlippingVelBC,
|
||||
int start, int finish, int Np) {
|
||||
|
||||
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_Poisson, hipFuncCachePreferL1);
|
||||
|
||||
|
||||
dvc_ScaLBL_D3Q19_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>( Map, dist, Den_charge, Psi,
|
||||
ElectricField, Error, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("Hip error in dvc_ScaLBL_D3Q19_AAeven_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
|
||||
int start, int finish, int Np){
|
||||
//hipProfilerStart();
|
||||
|
||||
dvc_ScaLBL_D3Q19_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map, dist, Psi, start, finish, Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("Hip error in ScaLBL_D3Q19_Poisson_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//hipProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//hipProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
//hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//hipProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
|
||||
|
||||
//hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//hipProfilerStop();
|
||||
}
|
||||
|
||||
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
|
||||
|
||||
//hipProfilerStart();
|
||||
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
|
||||
|
||||
hipError_t err = hipGetLastError();
|
||||
if (hipSuccess != err){
|
||||
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
|
||||
}
|
||||
//hipProfilerStop();
|
||||
}
|
||||
538
models/BGKModel.cpp
Normal file
538
models/BGKModel.cpp
Normal file
@@ -0,0 +1,538 @@
|
||||
/*
|
||||
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
|
||||
Copyright Equnior ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
* Multi-relaxation time LBM Model
|
||||
*/
|
||||
#include "models/BGKModel.h"
|
||||
#include "analysis/distance.h"
|
||||
#include "common/ReadMicroCT.h"
|
||||
ScaLBL_BGKModel::ScaLBL_BGKModel(int RANK, int NP, const Utilities::MPI &COMM)
|
||||
: rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), tau(0),
|
||||
Fx(0), Fy(0), Fz(0), flux(0), din(0), dout(0), mu(0), Nx(0), Ny(0), Nz(0),
|
||||
N(0), Np(0), nprocx(0), nprocy(0), nprocz(0), BoundaryCondition(0), Lx(0),
|
||||
Ly(0), Lz(0), comm(COMM) {}
|
||||
ScaLBL_BGKModel::~ScaLBL_BGKModel() {}
|
||||
|
||||
void ScaLBL_BGKModel::ReadParams(string filename) {
|
||||
// read the input database
|
||||
db = std::make_shared<Database>(filename);
|
||||
domain_db = db->getDatabase("Domain");
|
||||
mrt_db = db->getDatabase("BGK");
|
||||
vis_db = db->getDatabase("Visualization");
|
||||
|
||||
tau = 1.0;
|
||||
timestepMax = 100000;
|
||||
ANALYSIS_INTERVAL = 1000;
|
||||
tolerance = 1.0e-8;
|
||||
Fx = Fy = 0.0;
|
||||
Fz = 1.0e-5;
|
||||
dout = 1.0;
|
||||
din = 1.0;
|
||||
|
||||
// Color Model parameters
|
||||
if (mrt_db->keyExists("timestepMax")) {
|
||||
timestepMax = mrt_db->getScalar<int>("timestepMax");
|
||||
}
|
||||
if (mrt_db->keyExists("analysis_interval")) {
|
||||
ANALYSIS_INTERVAL = mrt_db->getScalar<int>("analysis_interval");
|
||||
}
|
||||
if (mrt_db->keyExists("tolerance")) {
|
||||
tolerance = mrt_db->getScalar<double>("tolerance");
|
||||
}
|
||||
if (mrt_db->keyExists("tau")) {
|
||||
tau = mrt_db->getScalar<double>("tau");
|
||||
}
|
||||
if (mrt_db->keyExists("F")) {
|
||||
Fx = mrt_db->getVector<double>("F")[0];
|
||||
Fy = mrt_db->getVector<double>("F")[1];
|
||||
Fz = mrt_db->getVector<double>("F")[2];
|
||||
}
|
||||
if (mrt_db->keyExists("Restart")) {
|
||||
Restart = mrt_db->getScalar<bool>("Restart");
|
||||
}
|
||||
if (mrt_db->keyExists("din")) {
|
||||
din = mrt_db->getScalar<double>("din");
|
||||
}
|
||||
if (mrt_db->keyExists("dout")) {
|
||||
dout = mrt_db->getScalar<double>("dout");
|
||||
}
|
||||
if (mrt_db->keyExists("flux")) {
|
||||
flux = mrt_db->getScalar<double>("flux");
|
||||
}
|
||||
|
||||
// Read domain parameters
|
||||
if (mrt_db->keyExists("BoundaryCondition")) {
|
||||
BoundaryCondition = mrt_db->getScalar<int>("BC");
|
||||
} else if (domain_db->keyExists("BC")) {
|
||||
BoundaryCondition = domain_db->getScalar<int>("BC");
|
||||
}
|
||||
|
||||
mu = (tau - 0.5) / 3.0;
|
||||
}
|
||||
void ScaLBL_BGKModel::SetDomain() {
|
||||
Dm = std::shared_ptr<Domain>(
|
||||
new Domain(domain_db, comm)); // full domain for analysis
|
||||
Mask = std::shared_ptr<Domain>(
|
||||
new Domain(domain_db, comm)); // mask domain removes immobile phases
|
||||
|
||||
// domain parameters
|
||||
Nx = Dm->Nx;
|
||||
Ny = Dm->Ny;
|
||||
Nz = Dm->Nz;
|
||||
Lx = Dm->Lx;
|
||||
Ly = Dm->Ly;
|
||||
Lz = Dm->Lz;
|
||||
|
||||
N = Nx * Ny * Nz;
|
||||
Distance.resize(Nx, Ny, Nz);
|
||||
Velocity_x.resize(Nx, Ny, Nz);
|
||||
Velocity_y.resize(Nx, Ny, Nz);
|
||||
Velocity_z.resize(Nx, Ny, Nz);
|
||||
|
||||
for (int i = 0; i < Nx * Ny * Nz; i++)
|
||||
Dm->id[i] = 1; // initialize this way
|
||||
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
|
||||
comm.barrier();
|
||||
Dm->CommInit();
|
||||
comm.barrier();
|
||||
|
||||
rank = Dm->rank();
|
||||
nprocx = Dm->nprocx();
|
||||
nprocy = Dm->nprocy();
|
||||
nprocz = Dm->nprocz();
|
||||
}
|
||||
|
||||
void ScaLBL_BGKModel::ReadInput() {
|
||||
|
||||
sprintf(LocalRankString, "%05d", Dm->rank());
|
||||
sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString);
|
||||
sprintf(LocalRestartFile, "%s%s", "Restart.", LocalRankString);
|
||||
|
||||
if (domain_db->keyExists("Filename")) {
|
||||
auto Filename = domain_db->getScalar<std::string>("Filename");
|
||||
Mask->Decomp(Filename);
|
||||
} else if (domain_db->keyExists("GridFile")) {
|
||||
// Read the local domain data
|
||||
auto input_id = readMicroCT(*domain_db, comm);
|
||||
// Fill the halo (assuming GCW of 1)
|
||||
array<int, 3> size0 = {(int)input_id.size(0), (int)input_id.size(1),
|
||||
(int)input_id.size(2)};
|
||||
ArraySize size1 = {(size_t)Mask->Nx, (size_t)Mask->Ny,
|
||||
(size_t)Mask->Nz};
|
||||
ASSERT((int)size1[0] == size0[0] + 2 && (int)size1[1] == size0[1] + 2 &&
|
||||
(int)size1[2] == size0[2] + 2);
|
||||
fillHalo<signed char> fill(comm, Mask->rank_info, size0, {1, 1, 1}, 0,
|
||||
1);
|
||||
Array<signed char> id_view;
|
||||
id_view.viewRaw(size1, Mask->id.data());
|
||||
fill.copy(input_id, id_view);
|
||||
fill.fill(id_view);
|
||||
} else {
|
||||
Mask->ReadIDs();
|
||||
}
|
||||
|
||||
// Generate the signed distance map
|
||||
// Initialize the domain and communication
|
||||
Array<char> id_solid(Nx, Ny, Nz);
|
||||
// Solve for the position of the solid phase
|
||||
for (int k = 0; k < Nz; k++) {
|
||||
for (int j = 0; j < Ny; j++) {
|
||||
for (int i = 0; i < Nx; i++) {
|
||||
int n = k * Nx * Ny + j * Nx + i;
|
||||
// Initialize the solid phase
|
||||
if (Mask->id[n] > 0)
|
||||
id_solid(i, j, k) = 1;
|
||||
else
|
||||
id_solid(i, j, k) = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Initialize the signed distance function
|
||||
for (int k = 0; k < Nz; k++) {
|
||||
for (int j = 0; j < Ny; j++) {
|
||||
for (int i = 0; i < Nx; i++) {
|
||||
// Initialize distance to +/- 1
|
||||
Distance(i, j, k) = 2.0 * double(id_solid(i, j, k)) - 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
// MeanFilter(Averages->SDs);
|
||||
if (rank == 0)
|
||||
printf("Initialized solid phase -- Converting to Signed Distance "
|
||||
"function \n");
|
||||
CalcDist(Distance, id_solid, *Dm);
|
||||
if (rank == 0)
|
||||
cout << "Domain set." << endl;
|
||||
}
|
||||
|
||||
void ScaLBL_BGKModel::Create() {
|
||||
/*
|
||||
* This function creates the variables needed to run a LBM
|
||||
*/
|
||||
int rank = Mask->rank();
|
||||
//.........................................................
|
||||
// Initialize communication structures in averaging domain
|
||||
for (int i = 0; i < Nx * Ny * Nz; i++)
|
||||
Dm->id[i] = Mask->id[i];
|
||||
Mask->CommInit();
|
||||
Np = Mask->PoreCount();
|
||||
//...........................................................................
|
||||
if (rank == 0)
|
||||
printf("Create ScaLBL_Communicator \n");
|
||||
// Create a communicator for the device (will use optimized layout)
|
||||
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
|
||||
ScaLBL_Comm =
|
||||
std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
|
||||
|
||||
int Npad = (Np / 16 + 2) * 16;
|
||||
if (rank == 0)
|
||||
printf("Set up memory efficient layout \n");
|
||||
Map.resize(Nx, Ny, Nz);
|
||||
Map.fill(-2);
|
||||
auto neighborList = new int[18 * Npad];
|
||||
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map, neighborList,
|
||||
Mask->id.data(), Np, 1);
|
||||
comm.barrier();
|
||||
|
||||
//...........................................................................
|
||||
// MAIN VARIABLES ALLOCATED HERE
|
||||
//...........................................................................
|
||||
// LBM variables
|
||||
if (rank == 0)
|
||||
printf("Allocating distributions \n");
|
||||
//......................device distributions.................................
|
||||
int dist_mem_size = Np * sizeof(double);
|
||||
int neighborSize = 18 * (Np * sizeof(int));
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&fq, 19 * dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&Pressure, sizeof(double) * Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&Velocity, 3 * sizeof(double) * Np);
|
||||
//...........................................................................
|
||||
// Update GPU data structures
|
||||
if (rank == 0)
|
||||
printf("Setting up device map and neighbor list \n");
|
||||
// copy the neighbor list
|
||||
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
|
||||
comm.barrier();
|
||||
double MLUPS = ScaLBL_Comm->GetPerformance(NeighborList, fq, Np);
|
||||
printf(" MLPUS=%f from rank %i\n", MLUPS, rank);
|
||||
}
|
||||
|
||||
void ScaLBL_BGKModel::Initialize() {
|
||||
/*
|
||||
* This function initializes model
|
||||
*/
|
||||
if (rank == 0)
|
||||
printf("Initializing distributions \n");
|
||||
ScaLBL_D3Q19_Init(fq, Np);
|
||||
}
|
||||
|
||||
void ScaLBL_BGKModel::Run() {
|
||||
double rlx = 1.0 / tau;
|
||||
|
||||
Minkowski Morphology(Mask);
|
||||
|
||||
if (rank == 0) {
|
||||
bool WriteHeader = false;
|
||||
FILE *log_file = fopen("Permeability.csv", "r");
|
||||
if (log_file != NULL)
|
||||
fclose(log_file);
|
||||
else
|
||||
WriteHeader = true;
|
||||
|
||||
if (WriteHeader) {
|
||||
log_file = fopen("Permeability.csv", "a+");
|
||||
fprintf(log_file, "time Fx Fy Fz mu Vs As Js Xs vx vy vz k\n");
|
||||
fclose(log_file);
|
||||
}
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
if (rank == 0)
|
||||
printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
|
||||
if (rank == 0)
|
||||
printf("********************************************************\n");
|
||||
timestep = 0;
|
||||
double error = 1.0;
|
||||
double flow_rate_previous = 0.0;
|
||||
auto t1 = std::chrono::system_clock::now();
|
||||
while (timestep < timestepMax && error > tolerance) {
|
||||
//************************************************************************/
|
||||
/* timestep++;
|
||||
ScaLBL_Comm.SendD3Q19AA(dist); //READ FROM NORMAL
|
||||
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
|
||||
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
|
||||
|
||||
timestep++;
|
||||
ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
|
||||
ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
|
||||
ScaLBL_D3Q19_AAeven_BGK(dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_DeviceBarrier(); MPI_Barrie
|
||||
*/
|
||||
timestep++;
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
ScaLBL_D3Q19_AAodd_BGK(NeighborList, fq, ScaLBL_Comm->FirstInterior(),
|
||||
ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
// Set boundary conditions
|
||||
if (BoundaryCondition == 3) {
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
} else if (BoundaryCondition == 4) {
|
||||
din =
|
||||
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
} else if (BoundaryCondition == 5) {
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
|
||||
}
|
||||
ScaLBL_D3Q19_AAodd_BGK(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(),
|
||||
Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
timestep++;
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
|
||||
ScaLBL_D3Q19_AAeven_BGK(fq, ScaLBL_Comm->FirstInterior(),
|
||||
ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
// Set boundary conditions
|
||||
if (BoundaryCondition == 3) {
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
} else if (BoundaryCondition == 4) {
|
||||
din =
|
||||
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
|
||||
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
|
||||
} else if (BoundaryCondition == 5) {
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
|
||||
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
|
||||
}
|
||||
ScaLBL_D3Q19_AAeven_BGK(fq, 0, ScaLBL_Comm->LastExterior(), Np,
|
||||
rlx, Fx, Fy, Fz);
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
//************************************************************************/
|
||||
|
||||
if (timestep % ANALYSIS_INTERVAL == 0) {
|
||||
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
ScaLBL_Comm->RegularLayout(Map, &Velocity[0], Velocity_x);
|
||||
ScaLBL_Comm->RegularLayout(Map, &Velocity[Np], Velocity_y);
|
||||
ScaLBL_Comm->RegularLayout(Map, &Velocity[2 * Np], Velocity_z);
|
||||
|
||||
double count_loc = 0;
|
||||
double count;
|
||||
double vax, vay, vaz;
|
||||
double vax_loc, vay_loc, vaz_loc;
|
||||
vax_loc = vay_loc = vaz_loc = 0.f;
|
||||
for (int k = 1; k < Nz - 1; k++) {
|
||||
for (int j = 1; j < Ny - 1; j++) {
|
||||
for (int i = 1; i < Nx - 1; i++) {
|
||||
if (Distance(i, j, k) > 0) {
|
||||
vax_loc += Velocity_x(i, j, k);
|
||||
vay_loc += Velocity_y(i, j, k);
|
||||
vaz_loc += Velocity_z(i, j, k);
|
||||
count_loc += 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
vax = Dm->Comm.sumReduce(vax_loc);
|
||||
vay = Dm->Comm.sumReduce(vay_loc);
|
||||
vaz = Dm->Comm.sumReduce(vaz_loc);
|
||||
count = Dm->Comm.sumReduce(count_loc);
|
||||
|
||||
vax /= count;
|
||||
vay /= count;
|
||||
vaz /= count;
|
||||
|
||||
double force_mag = sqrt(Fx * Fx + Fy * Fy + Fz * Fz);
|
||||
double dir_x = Fx / force_mag;
|
||||
double dir_y = Fy / force_mag;
|
||||
double dir_z = Fz / force_mag;
|
||||
if (force_mag == 0.0) {
|
||||
// default to z direction
|
||||
dir_x = 0.0;
|
||||
dir_y = 0.0;
|
||||
dir_z = 1.0;
|
||||
force_mag = 1.0;
|
||||
}
|
||||
double flow_rate = (vax * dir_x + vay * dir_y + vaz * dir_z);
|
||||
|
||||
error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
|
||||
flow_rate_previous = flow_rate;
|
||||
|
||||
//if (rank==0) printf("Computing Minkowski functionals \n");
|
||||
Morphology.ComputeScalar(Distance, 0.f);
|
||||
//Morphology.PrintAll();
|
||||
double mu = (tau - 0.5) / 3.f;
|
||||
double Vs = Morphology.V();
|
||||
double As = Morphology.A();
|
||||
double Hs = Morphology.H();
|
||||
double Xs = Morphology.X();
|
||||
Vs = Dm->Comm.sumReduce(Vs);
|
||||
As = Dm->Comm.sumReduce(As);
|
||||
Hs = Dm->Comm.sumReduce(Hs);
|
||||
Xs = Dm->Comm.sumReduce(Xs);
|
||||
|
||||
double h = Dm->voxel_length;
|
||||
double absperm =
|
||||
h * h * mu * Mask->Porosity() * flow_rate / force_mag;
|
||||
if (rank == 0) {
|
||||
printf(" %f\n", absperm);
|
||||
FILE *log_file = fopen("Permeability.csv", "a");
|
||||
fprintf(log_file,
|
||||
"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g "
|
||||
"%.8g %.8g\n",
|
||||
timestep, Fx, Fy, Fz, mu, h * h * h * Vs, h * h * As,
|
||||
h * Hs, Xs, vax, vay, vaz, absperm);
|
||||
fclose(log_file);
|
||||
}
|
||||
}
|
||||
}
|
||||
//************************************************************************/
|
||||
if (rank == 0)
|
||||
printf("---------------------------------------------------------------"
|
||||
"----\n");
|
||||
// Compute the walltime per timestep
|
||||
auto t2 = std::chrono::system_clock::now();
|
||||
double cputime = std::chrono::duration<double>(t2 - t1).count() / timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np) / cputime / 1000000;
|
||||
|
||||
if (rank == 0)
|
||||
printf("********************************************************\n");
|
||||
if (rank == 0)
|
||||
printf("CPU time = %f \n", cputime);
|
||||
if (rank == 0)
|
||||
printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
|
||||
MLUPS *= nprocs;
|
||||
if (rank == 0)
|
||||
printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
|
||||
if (rank == 0)
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
|
||||
void ScaLBL_BGKModel::VelocityField() {
|
||||
|
||||
auto format = vis_db->getWithDefault<string>("format", "silo");
|
||||
|
||||
/* memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double));
|
||||
Morphology.Initialize();
|
||||
Morphology.UpdateMeshValues();
|
||||
Morphology.ComputeLocal();
|
||||
Morphology.Reduce();
|
||||
|
||||
double count_loc=0;
|
||||
double count;
|
||||
double vax,vay,vaz;
|
||||
double vax_loc,vay_loc,vaz_loc;
|
||||
vax_loc = vay_loc = vaz_loc = 0.f;
|
||||
for (int n=0; n<ScaLBL_Comm->LastExterior(); n++){
|
||||
vax_loc += VELOCITY[n];
|
||||
vay_loc += VELOCITY[Np+n];
|
||||
vaz_loc += VELOCITY[2*Np+n];
|
||||
count_loc+=1.0;
|
||||
}
|
||||
|
||||
for (int n=ScaLBL_Comm->FirstInterior(); n<ScaLBL_Comm->LastInterior(); n++){
|
||||
vax_loc += VELOCITY[n];
|
||||
vay_loc += VELOCITY[Np+n];
|
||||
vaz_loc += VELOCITY[2*Np+n];
|
||||
count_loc+=1.0;
|
||||
}
|
||||
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
|
||||
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
|
||||
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
|
||||
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
|
||||
|
||||
vax /= count;
|
||||
vay /= count;
|
||||
vaz /= count;
|
||||
|
||||
double mu = (tau-0.5)/3.f;
|
||||
if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n");
|
||||
if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu,
|
||||
Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz);
|
||||
*/
|
||||
vis_db = db->getDatabase("Visualization");
|
||||
if (vis_db->getWithDefault<bool>("write_silo", false)) {
|
||||
|
||||
std::vector<IO::MeshDataStruct> visData;
|
||||
fillHalo<double> fillData(Dm->Comm, Dm->rank_info,
|
||||
{Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2},
|
||||
{1, 1, 1}, 0, 1);
|
||||
|
||||
auto VxVar = std::make_shared<IO::Variable>();
|
||||
auto VyVar = std::make_shared<IO::Variable>();
|
||||
auto VzVar = std::make_shared<IO::Variable>();
|
||||
auto SignDistVar = std::make_shared<IO::Variable>();
|
||||
|
||||
IO::initialize("", format, "false");
|
||||
// Create the MeshDataStruct
|
||||
visData.resize(1);
|
||||
visData[0].meshName = "domain";
|
||||
visData[0].mesh = std::make_shared<IO::DomainMesh>(
|
||||
Dm->rank_info, Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2, Dm->Lx, Dm->Ly,
|
||||
Dm->Lz);
|
||||
SignDistVar->name = "SignDist";
|
||||
SignDistVar->type = IO::VariableType::VolumeVariable;
|
||||
SignDistVar->dim = 1;
|
||||
SignDistVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
|
||||
visData[0].vars.push_back(SignDistVar);
|
||||
|
||||
VxVar->name = "Velocity_x";
|
||||
VxVar->type = IO::VariableType::VolumeVariable;
|
||||
VxVar->dim = 1;
|
||||
VxVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
|
||||
visData[0].vars.push_back(VxVar);
|
||||
VyVar->name = "Velocity_y";
|
||||
VyVar->type = IO::VariableType::VolumeVariable;
|
||||
VyVar->dim = 1;
|
||||
VyVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
|
||||
visData[0].vars.push_back(VyVar);
|
||||
VzVar->name = "Velocity_z";
|
||||
VzVar->type = IO::VariableType::VolumeVariable;
|
||||
VzVar->dim = 1;
|
||||
VzVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
|
||||
visData[0].vars.push_back(VzVar);
|
||||
|
||||
Array<double> &SignData = visData[0].vars[0]->data;
|
||||
Array<double> &VelxData = visData[0].vars[1]->data;
|
||||
Array<double> &VelyData = visData[0].vars[2]->data;
|
||||
Array<double> &VelzData = visData[0].vars[3]->data;
|
||||
|
||||
ASSERT(visData[0].vars[0]->name == "SignDist");
|
||||
ASSERT(visData[0].vars[1]->name == "Velocity_x");
|
||||
ASSERT(visData[0].vars[2]->name == "Velocity_y");
|
||||
ASSERT(visData[0].vars[3]->name == "Velocity_z");
|
||||
|
||||
fillData.copy(Distance, SignData);
|
||||
fillData.copy(Velocity_x, VelxData);
|
||||
fillData.copy(Velocity_y, VelyData);
|
||||
fillData.copy(Velocity_z, VelzData);
|
||||
|
||||
IO::writeData(timestep, visData, Dm->Comm);
|
||||
}
|
||||
}
|
||||
94
models/BGKModel.h
Normal file
94
models/BGKModel.h
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
|
||||
Copyright Equnior ASA
|
||||
|
||||
This file is part of the Open Porous Media project (OPM).
|
||||
OPM is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
OPM is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with OPM. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
/*
|
||||
* Multi-relaxation time LBM Model
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <iostream>
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
|
||||
#include "common/ScaLBL.h"
|
||||
#include "common/Communication.h"
|
||||
#include "common/MPI.h"
|
||||
#include "analysis/Minkowski.h"
|
||||
#include "ProfilerApp.h"
|
||||
|
||||
class ScaLBL_BGKModel {
|
||||
public:
|
||||
ScaLBL_BGKModel(int RANK, int NP, const Utilities::MPI &COMM);
|
||||
~ScaLBL_BGKModel();
|
||||
|
||||
// functions in they should be run
|
||||
void ReadParams(string filename);
|
||||
void ReadParams(std::shared_ptr<Database> db0);
|
||||
void SetDomain();
|
||||
void ReadInput();
|
||||
void Create();
|
||||
void Initialize();
|
||||
void Run();
|
||||
void VelocityField();
|
||||
|
||||
bool Restart, pBC;
|
||||
int timestep, timestepMax;
|
||||
int ANALYSIS_INTERVAL;
|
||||
int BoundaryCondition;
|
||||
double tau, mu;
|
||||
double Fx, Fy, Fz, flux;
|
||||
double din, dout;
|
||||
double tolerance;
|
||||
|
||||
int Nx, Ny, Nz, N, Np;
|
||||
int rank, nprocx, nprocy, nprocz, nprocs;
|
||||
double Lx, Ly, Lz;
|
||||
|
||||
std::shared_ptr<Domain> Dm; // this domain is for analysis
|
||||
std::shared_ptr<Domain> Mask; // this domain is for lbm
|
||||
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
|
||||
// input database
|
||||
std::shared_ptr<Database> db;
|
||||
std::shared_ptr<Database> domain_db;
|
||||
std::shared_ptr<Database> mrt_db;
|
||||
std::shared_ptr<Database> vis_db;
|
||||
|
||||
IntArray Map;
|
||||
DoubleArray Distance;
|
||||
int *NeighborList;
|
||||
double *fq;
|
||||
double *Velocity;
|
||||
double *Pressure;
|
||||
|
||||
//Minkowski Morphology;
|
||||
|
||||
DoubleArray Velocity_x;
|
||||
DoubleArray Velocity_y;
|
||||
DoubleArray Velocity_z;
|
||||
|
||||
private:
|
||||
Utilities::MPI comm;
|
||||
|
||||
// filenames
|
||||
char LocalRankString[8];
|
||||
char LocalRankFilename[40];
|
||||
char LocalRestartFile[40];
|
||||
|
||||
//int rank,nprocs;
|
||||
void LoadParams(std::shared_ptr<Database> db0);
|
||||
};
|
||||
@@ -115,8 +115,7 @@ void ScaLBL_ColorModel::ReadParams(string filename) {
|
||||
inletB = 0.f;
|
||||
outletA = 0.f;
|
||||
outletB = 1.f;
|
||||
|
||||
|
||||
|
||||
BoundaryCondition = 0;
|
||||
if (color_db->keyExists("BC")) {
|
||||
BoundaryCondition = color_db->getScalar<int>("BC");
|
||||
@@ -388,6 +387,10 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) {
|
||||
AFFINITY, volume_fraction);
|
||||
}
|
||||
}
|
||||
|
||||
// clean up
|
||||
delete [] label_count;
|
||||
delete [] label_count_global;
|
||||
}
|
||||
|
||||
void ScaLBL_ColorModel::Create() {
|
||||
@@ -483,12 +486,22 @@ void ScaLBL_ColorModel::Create() {
|
||||
|
||||
// copy the neighbor list
|
||||
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
|
||||
ScaLBL_Comm->Barrier();
|
||||
delete[] neighborList;
|
||||
|
||||
// initialize phi based on PhaseLabel (include solid component labels)
|
||||
double *PhaseLabel;
|
||||
PhaseLabel = new double[N];
|
||||
PhaseLabel = new double[Nx*Ny*Nz];
|
||||
ScaLBL_Comm->Barrier();
|
||||
|
||||
AssignComponentLabels(PhaseLabel);
|
||||
ScaLBL_CopyToDevice(Phi, PhaseLabel, N * sizeof(double));
|
||||
ScaLBL_Comm->Barrier();
|
||||
|
||||
ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz * sizeof(double));
|
||||
ScaLBL_Comm->Barrier();
|
||||
|
||||
if (rank == 0)
|
||||
printf("Model created \n");
|
||||
delete[] PhaseLabel;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ ScaLBL_IonModel::~ScaLBL_IonModel() {}
|
||||
|
||||
void ScaLBL_IonModel::ReadParams(string filename, vector<int> &num_iter) {
|
||||
|
||||
USE_MEMBRANE = true;
|
||||
// read the input database
|
||||
db = std::make_shared<Database>(filename);
|
||||
domain_db = db->getDatabase("Domain");
|
||||
@@ -421,7 +422,25 @@ void ScaLBL_IonModel::ReadParams(string filename) {
|
||||
1.0e-18); //LB ion concentration has unit [mol/lu^3]
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (ion_db->keyExists("MembraneIonConcentrationList")) {
|
||||
if (rank == 0) printf(".... Read MembraneIonConcentrationList \n");
|
||||
MembraneIonConcentration.clear();
|
||||
MembraneIonConcentration = ion_db->getVector<double>("MembraneIonConcentrationList");
|
||||
if (MembraneIonConcentration.size() != number_ion_species) {
|
||||
ERROR("Error: number_ion_species and MembraneIonConcentrationList must be "
|
||||
"the same length! \n");
|
||||
}
|
||||
else {
|
||||
for (size_t i = 0; i < MembraneIonConcentration.size(); i++) {
|
||||
MembraneIonConcentration[i] =
|
||||
MembraneIonConcentration[i] *
|
||||
(h * h * h *
|
||||
1.0e-18); //LB ion concentration has unit [mol/lu^3]
|
||||
}
|
||||
}
|
||||
}
|
||||
//Read solid boundary condition specific to Ion model
|
||||
BoundaryConditionSolid = 0;
|
||||
if (ion_db->keyExists("BC_Solid")) {
|
||||
@@ -583,6 +602,73 @@ void ScaLBL_IonModel::SetDomain() {
|
||||
nprocz = Dm->nprocz();
|
||||
}
|
||||
|
||||
void ScaLBL_IonModel::SetMembrane() {
|
||||
size_t NLABELS = 0;
|
||||
|
||||
membrane_db = db->getDatabase("Membrane");
|
||||
|
||||
/* set distance based on labels inside the membrane (all other labels will be outside) */
|
||||
auto MembraneLabels = membrane_db->getVector<int>("MembraneLabels");
|
||||
|
||||
IonMembrane = std::shared_ptr<Membrane>(new Membrane(Dm, NeighborList, Np));
|
||||
|
||||
signed char LABEL = 0;
|
||||
double *label_count;
|
||||
double *label_count_global;
|
||||
Array<char> membrane_id(Nx,Ny,Nz);
|
||||
label_count = new double[NLABELS];
|
||||
label_count_global = new double[NLABELS];
|
||||
// Assign the labels
|
||||
for (size_t idx = 0; idx < NLABELS; idx++)
|
||||
label_count[idx] = 0;
|
||||
/* set the distance to the membrane */
|
||||
MembraneDistance.resize(Nx, Ny, Nz);
|
||||
MembraneDistance.fill(0);
|
||||
for (int k = 0; k < Nz; k++) {
|
||||
for (int j = 0; j < Ny; j++) {
|
||||
for (int i = 0; i < Nx; i++) {
|
||||
membrane_id(i,j,k) = 1; // default value
|
||||
LABEL = Dm->id[k*Nx*Ny + j*Nx + i];
|
||||
for (size_t m=0; m<MembraneLabels.size(); m++){
|
||||
if (LABEL == MembraneLabels[m]) {
|
||||
label_count[m] += 1.0;
|
||||
membrane_id(i,j,k) = 0; // inside
|
||||
m = MembraneLabels.size(); //exit loop
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t m=0; m<MembraneLabels.size(); m++){
|
||||
label_count_global[m] = Dm->Comm.sumReduce(label_count[m]);
|
||||
}
|
||||
if (rank == 0) {
|
||||
printf(" Membrane labels: %lu \n", MembraneLabels.size());
|
||||
for (size_t m=0; m<MembraneLabels.size(); m++){
|
||||
LABEL = MembraneLabels[m];
|
||||
double volume_fraction = double(label_count_global[m]) /
|
||||
double((Nx - 2) * (Ny - 2) * (Nz - 2) * nprocs);
|
||||
printf(" label=%d, volume fraction = %f\n", LABEL, volume_fraction);
|
||||
}
|
||||
}
|
||||
/* signed distance to the membrane ( - inside / + outside) */
|
||||
for (int k = 0; k < Nz; k++) {
|
||||
for (int j = 0; j < Ny; j++) {
|
||||
for (int i = 0; i < Nx; i++) {
|
||||
MembraneDistance(i, j, k) = 2.0 * double(membrane_id(i, j, k)) - 1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
CalcDist(MembraneDistance, membrane_id, *Dm);
|
||||
/* create the membrane data structure */
|
||||
if (rank==0) printf("Creating membrane data structure...\n");
|
||||
MembraneCount = IonMembrane->Create(Dm, MembraneDistance, Map);
|
||||
|
||||
// clean up
|
||||
delete [] label_count;
|
||||
delete [] label_count_global;
|
||||
}
|
||||
|
||||
void ScaLBL_IonModel::ReadInput() {
|
||||
|
||||
sprintf(LocalRankString, "%05d", Dm->rank());
|
||||
@@ -709,6 +795,33 @@ void ScaLBL_IonModel::AssignSolidBoundary(double *ion_solid) {
|
||||
}
|
||||
}
|
||||
|
||||
void ScaLBL_IonModel::AssignIonConcentrationMembrane( double *Ci, int ic) {
|
||||
// double *Ci, const vector<double> MembraneIonConcentration, const vector<double> IonConcentration, int ic) {
|
||||
|
||||
double VALUE = 0.f;
|
||||
|
||||
if (rank == 0){
|
||||
printf(".... Set concentration(%i): inside=%f, outside=%f \n", ic, MembraneIonConcentration[ic], IonConcentration[ic]);
|
||||
}
|
||||
for (int k = 0; k < Nz; k++) {
|
||||
for (int j = 0; j < Ny; j++) {
|
||||
for (int i = 0; i < Nx; i++) {
|
||||
int idx = Map(i, j, k);
|
||||
if (!(idx < 0)) {
|
||||
if (MembraneDistance(i,j,k) < 0.0) {
|
||||
VALUE = MembraneIonConcentration[ic];//* (h * h * h * 1.0e-18);
|
||||
} else {
|
||||
VALUE = IonConcentration[ic];//* (h * h * h * 1.0e-18);
|
||||
|
||||
}
|
||||
Ci[idx] = VALUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ScaLBL_IonModel::AssignIonConcentration_FromFile(
|
||||
double *Ci, const vector<std::string> &File_ion, int ic) {
|
||||
double *Ci_host;
|
||||
@@ -764,7 +877,7 @@ void ScaLBL_IonModel::Create() {
|
||||
Map.fill(-2);
|
||||
auto neighborList = new int[18 * Npad];
|
||||
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map, neighborList,
|
||||
Mask->id.data(), Np, 1);
|
||||
Mask->id.data(), Npad, 1);
|
||||
comm.barrier();
|
||||
|
||||
//...........................................................................
|
||||
@@ -778,6 +891,7 @@ void ScaLBL_IonModel::Create() {
|
||||
int neighborSize = 18 * (Np * sizeof(int));
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&dvcMap, sizeof(int) * Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&fq,
|
||||
number_ion_species * 7 * dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&Ci,
|
||||
@@ -794,6 +908,37 @@ void ScaLBL_IonModel::Create() {
|
||||
if (rank == 0)
|
||||
printf("LB Ion Solver: Setting up device map and neighbor list \n");
|
||||
// copy the neighbor list
|
||||
int *TmpMap;
|
||||
TmpMap = new int[Np];
|
||||
for (int k = 1; k < Nz - 1; k++) {
|
||||
for (int j = 1; j < Ny - 1; j++) {
|
||||
for (int i = 1; i < Nx - 1; i++) {
|
||||
int idx = Map(i, j, k);
|
||||
if (!(idx < 0))
|
||||
TmpMap[idx] = k * Nx * Ny + j * Nx + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
// check that TmpMap is valid
|
||||
for (int idx = 0; idx < ScaLBL_Comm->LastExterior(); idx++) {
|
||||
auto n = TmpMap[idx];
|
||||
if (n > Nx * Ny * Nz) {
|
||||
printf("Bad value! idx=%i \n", n);
|
||||
TmpMap[idx] = Nx * Ny * Nz - 1;
|
||||
}
|
||||
}
|
||||
for (int idx = ScaLBL_Comm->FirstInterior();
|
||||
idx < ScaLBL_Comm->LastInterior(); idx++) {
|
||||
auto n = TmpMap[idx];
|
||||
if (n > Nx * Ny * Nz) {
|
||||
printf("Bad value! idx=%i \n", n);
|
||||
TmpMap[idx] = Nx * Ny * Nz - 1;
|
||||
}
|
||||
}
|
||||
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int) * Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
delete[] TmpMap;
|
||||
|
||||
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
|
||||
comm.barrier();
|
||||
|
||||
@@ -822,10 +967,26 @@ void ScaLBL_IonModel::Initialize() {
|
||||
*/
|
||||
if (rank == 0)
|
||||
printf("LB Ion Solver: initializing D3Q7 distributions\n");
|
||||
if (ion_db->keyExists("IonConcentrationFile")) {
|
||||
USE_MEMBRANE = true;
|
||||
if (USE_MEMBRANE){
|
||||
double *Ci_host;
|
||||
if (rank == 0)
|
||||
printf(" ...initializing based on membrane list \n");
|
||||
Ci_host = new double[number_ion_species * Np];
|
||||
for (size_t ic = 0; ic < number_ion_species; ic++) {
|
||||
AssignIonConcentrationMembrane( &Ci_host[ic * Np], ic);
|
||||
}
|
||||
ScaLBL_CopyToDevice(Ci, Ci_host, number_ion_species * sizeof(double) * Np);
|
||||
comm.barrier();
|
||||
for (size_t ic = 0; ic < number_ion_species; ic++) {
|
||||
ScaLBL_D3Q7_Ion_Init_FromFile(&fq[ic * Np * 7], &Ci[ic * Np], Np);
|
||||
}
|
||||
delete[] Ci_host;
|
||||
}
|
||||
else if (ion_db->keyExists("IonConcentrationFile")) {
|
||||
//NOTE: "IonConcentrationFile" is a vector, including "file_name, datatype"
|
||||
auto File_ion = ion_db->getVector<std::string>("IonConcentrationFile");
|
||||
if (File_ion.size() == 2 * number_ion_species) {
|
||||
if (File_ion.size() == 2*number_ion_species) {
|
||||
double *Ci_host;
|
||||
Ci_host = new double[number_ion_species * Np];
|
||||
for (size_t ic = 0; ic < number_ion_species; ic++) {
|
||||
@@ -844,7 +1005,8 @@ void ScaLBL_IonModel::Initialize() {
|
||||
ERROR("Error: Number of user-input ion concentration files should "
|
||||
"be equal to number of ion species!\n");
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
for (size_t ic = 0; ic < number_ion_species; ic++) {
|
||||
ScaLBL_D3Q7_Ion_Init(&fq[ic * Np * 7], &Ci[ic * Np],
|
||||
IonConcentration[ic], Np);
|
||||
@@ -1181,6 +1343,152 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField) {
|
||||
//if (rank==0) printf("********************************************************\n");
|
||||
}
|
||||
|
||||
void ScaLBL_IonModel::RunMembrane(double *Velocity, double *ElectricField, double *Psi) {
|
||||
|
||||
//Input parameter:
|
||||
//1. Velocity is from StokesModel
|
||||
//2. ElectricField is from Poisson model
|
||||
|
||||
//LB-related parameter
|
||||
vector<double> rlx;
|
||||
for (size_t ic = 0; ic < tau.size(); ic++) {
|
||||
rlx.push_back(1.0 / tau[ic]);
|
||||
}
|
||||
|
||||
//.......create and start timer............
|
||||
//double starttime,stoptime,cputime;
|
||||
//ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
//auto t1 = std::chrono::system_clock::now();
|
||||
|
||||
for (size_t ic = 0; ic < number_ion_species; ic++) {
|
||||
/* set the mass transfer coefficients for the membrane */
|
||||
if (ic == 0)
|
||||
IonMembrane->AssignCoefficients(dvcMap, Psi, "Na+");
|
||||
else {
|
||||
IonMembrane->AssignCoefficients(dvcMap, Psi, "impermeable");
|
||||
}
|
||||
timestep = 0;
|
||||
while (timestep < timestepMax[ic]) {
|
||||
//************************************************************************/
|
||||
// *************ODD TIMESTEP*************//
|
||||
timestep++;
|
||||
|
||||
//LB-Ion collison
|
||||
IonMembrane->SendD3Q7AA(&fq[ic * Np * 7]); //READ FORM NORMAL
|
||||
|
||||
ScaLBL_D3Q7_AAodd_Ion(
|
||||
IonMembrane->NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
|
||||
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
|
||||
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
|
||||
IonDiffusivity[ic], IonValence[ic], rlx[ic], Vt,
|
||||
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
|
||||
IonMembrane->RecvD3Q7AA(&fq[ic * Np * 7]); //WRITE INTO OPPOSITE
|
||||
|
||||
ScaLBL_D3Q7_AAodd_Ion(
|
||||
IonMembrane->NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
|
||||
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
|
||||
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
|
||||
IonDiffusivity[ic], IonValence[ic], rlx[ic], Vt, 0,
|
||||
ScaLBL_Comm->LastExterior(), Np);
|
||||
|
||||
|
||||
IonMembrane->IonTransport(&fq[ic * Np * 7],&Ci[ic * Np]);
|
||||
|
||||
|
||||
/* if (BoundaryConditionSolid == 1) {
|
||||
//TODO IonSolid may also be species-dependent
|
||||
ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic * Np * 7], IonSolid);
|
||||
}
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
*/
|
||||
// *************EVEN TIMESTEP*************//
|
||||
timestep++;
|
||||
|
||||
//LB-Ion collison
|
||||
IonMembrane->SendD3Q7AA(&fq[ic * Np * 7]); //READ FORM NORMAL
|
||||
|
||||
ScaLBL_D3Q7_AAeven_Ion(
|
||||
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
|
||||
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
|
||||
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
|
||||
rlx[ic], Vt, ScaLBL_Comm->FirstInterior(),
|
||||
ScaLBL_Comm->LastInterior(), Np);
|
||||
|
||||
|
||||
IonMembrane->RecvD3Q7AA(&fq[ic * Np * 7]); //WRITE INTO OPPOSITE
|
||||
|
||||
ScaLBL_D3Q7_AAeven_Ion(
|
||||
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
|
||||
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
|
||||
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
|
||||
rlx[ic], Vt, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
|
||||
IonMembrane->IonTransport(&fq[ic * Np * 7],&Ci[ic * Np]);
|
||||
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
|
||||
/*
|
||||
if (BoundaryConditionSolid == 1) {
|
||||
//TODO IonSolid may also be species-dependent
|
||||
ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic * Np * 7], IonSolid);
|
||||
}
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
//Compute charge density for Poisson equation
|
||||
for (size_t ic = 0; ic < number_ion_species; ic++) {
|
||||
int Valence = IonValence[ic];
|
||||
if (rank==0) printf("compute charge density for ion %i, Valence =%i \n", ic,Valence);
|
||||
|
||||
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, Valence, ic,
|
||||
ScaLBL_Comm->FirstInterior(),
|
||||
ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, Valence, ic, 0,
|
||||
ScaLBL_Comm->LastExterior(), Np);
|
||||
}
|
||||
|
||||
/* DoubleArray Charge(Nx,Ny,Nz);
|
||||
ScaLBL_Comm->RegularLayout(Map, ChargeDensity, Charge);
|
||||
double charge_sum=0.0;
|
||||
double charge_sum_total=0.0;
|
||||
for (int k=1; k<Nz-1; k++){
|
||||
for (int j=1; j<Ny-1; j++){
|
||||
for (int i=1; i<Nx-1; i++){
|
||||
charge_sum += Charge(i,j,k);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf(" Local charge value = %.8g (rank=%i)\n",charge_sum, rank);
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
*/
|
||||
ScaLBL_Comm->Barrier();
|
||||
comm.barrier();
|
||||
//if (rank==0) printf(" IonMembrane: completeted full step \n");
|
||||
//fflush(stdout);
|
||||
//************************************************************************/
|
||||
//if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
//// Compute the walltime per timestep
|
||||
//auto t2 = std::chrono::system_clock::now();
|
||||
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
|
||||
//// Performance obtained from each node
|
||||
//double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
//if (rank==0) printf("********************************************************\n");
|
||||
//if (rank==0) printf("CPU time = %f \n", cputime);
|
||||
//if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
|
||||
//MLUPS *= nprocs;
|
||||
//if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
|
||||
//if (rank==0) printf("********************************************************\n");
|
||||
}
|
||||
|
||||
|
||||
void ScaLBL_IonModel::getIonConcentration(DoubleArray &IonConcentration,
|
||||
const size_t ic) {
|
||||
//This function wirte out the data in a normal layout (by aggregating all decomposed domains)
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/*
|
||||
* Ion transporte LB Model
|
||||
*/
|
||||
|
||||
#ifndef ScaLBL_IonModel_INC
|
||||
#define ScaLBL_IonModel_INC
|
||||
|
||||
@@ -16,6 +15,7 @@
|
||||
|
||||
#include "common/ScaLBL.h"
|
||||
#include "common/Communication.h"
|
||||
#include "common/Membrane.h"
|
||||
#include "common/MPI.h"
|
||||
#include "analysis/Minkowski.h"
|
||||
#include "ProfilerApp.h"
|
||||
@@ -30,10 +30,12 @@ public:
|
||||
void ReadParams(string filename);
|
||||
void ReadParams(std::shared_ptr<Database> db0);
|
||||
void SetDomain();
|
||||
void SetMembrane();
|
||||
void ReadInput();
|
||||
void Create();
|
||||
void Initialize();
|
||||
void Run(double *Velocity, double *ElectricField);
|
||||
void RunMembrane(double *Velocity, double *ElectricField, double *Psi);
|
||||
void getIonConcentration(DoubleArray &IonConcentration, const size_t ic);
|
||||
void getIonConcentration_debug(int timestep);
|
||||
void getIonFluxDiffusive(DoubleArray &IonFlux_x, DoubleArray &IonFlux_y,
|
||||
@@ -66,6 +68,7 @@ public:
|
||||
vector<double> IonDiffusivity; //User input unit [m^2/sec]
|
||||
vector<int> IonValence;
|
||||
vector<double> IonConcentration; //unit [mol/m^3]
|
||||
vector<double> MembraneIonConcentration; //unit [mol/m^3]
|
||||
vector<double>
|
||||
Cin; //inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
|
||||
vector<double>
|
||||
@@ -88,6 +91,7 @@ public:
|
||||
IntArray Map;
|
||||
DoubleArray Distance;
|
||||
int *NeighborList;
|
||||
int *dvcMap;
|
||||
double *fq;
|
||||
double *Ci;
|
||||
double *ChargeDensity;
|
||||
@@ -97,7 +101,14 @@ public:
|
||||
double *FluxDiffusive;
|
||||
double *FluxAdvective;
|
||||
double *FluxElectrical;
|
||||
|
||||
|
||||
/* these support membrane capabilities */
|
||||
bool USE_MEMBRANE;
|
||||
std::shared_ptr<Database> membrane_db;
|
||||
std::shared_ptr<Membrane> IonMembrane;
|
||||
DoubleArray MembraneDistance;
|
||||
int MembraneCount; // number of links the cross the membrane
|
||||
|
||||
private:
|
||||
Utilities::MPI comm;
|
||||
|
||||
@@ -113,6 +124,8 @@ private:
|
||||
void AssignIonConcentration_FromFile(double *Ci,
|
||||
const vector<std::string> &File_ion,
|
||||
int ic);
|
||||
void AssignIonConcentrationMembrane( double *Ci, int ic);
|
||||
|
||||
void IonConcentration_LB_to_Phys(DoubleArray &Den_reg);
|
||||
void IonFlux_LB_to_Phys(DoubleArray &Den_reg, const size_t ic);
|
||||
};
|
||||
|
||||
@@ -36,6 +36,7 @@ void ScaLBL_MRTModel::ReadParams(string filename) {
|
||||
|
||||
tau = 1.0;
|
||||
timestepMax = 100000;
|
||||
ANALYSIS_INTERVAL = 1000;
|
||||
tolerance = 1.0e-8;
|
||||
Fx = Fy = 0.0;
|
||||
Fz = 1.0e-5;
|
||||
@@ -46,6 +47,9 @@ void ScaLBL_MRTModel::ReadParams(string filename) {
|
||||
if (mrt_db->keyExists("timestepMax")) {
|
||||
timestepMax = mrt_db->getScalar<int>("timestepMax");
|
||||
}
|
||||
if (mrt_db->keyExists("analysis_interval")) {
|
||||
ANALYSIS_INTERVAL = mrt_db->getScalar<int>("analysis_interval");
|
||||
}
|
||||
if (mrt_db->keyExists("tolerance")) {
|
||||
tolerance = mrt_db->getScalar<double>("tolerance");
|
||||
}
|
||||
@@ -318,7 +322,7 @@ void ScaLBL_MRTModel::Run() {
|
||||
comm.barrier();
|
||||
//************************************************************************/
|
||||
|
||||
if (timestep % 1000 == 0) {
|
||||
if (timestep % ANALYSIS_INTERVAL == 0) {
|
||||
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
|
||||
@@ -48,6 +48,7 @@ public:
|
||||
|
||||
bool Restart, pBC;
|
||||
int timestep, timestepMax;
|
||||
int ANALYSIS_INTERVAL;
|
||||
int BoundaryCondition;
|
||||
double tau, mu;
|
||||
double Fx, Fy, Fz, flux;
|
||||
|
||||
@@ -42,7 +42,7 @@ void ScaLBL_Poisson::ReadParams(string filename){
|
||||
domain_db = db->getDatabase( "Domain" );
|
||||
electric_db = db->getDatabase( "Poisson" );
|
||||
|
||||
k2_inv = 4.0;//speed of sound for D3Q7 lattice
|
||||
k2_inv = 3.0;//speed of sound for D3Q19 lattice
|
||||
tau = 0.5+k2_inv;
|
||||
timestepMax = 100000;
|
||||
tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential
|
||||
@@ -63,6 +63,9 @@ void ScaLBL_Poisson::ReadParams(string filename){
|
||||
if (electric_db->keyExists( "timestepMax" )){
|
||||
timestepMax = electric_db->getScalar<int>( "timestepMax" );
|
||||
}
|
||||
if (electric_db->keyExists( "tau" )){
|
||||
tau = electric_db->getScalar<double>( "tau" );
|
||||
}
|
||||
if (electric_db->keyExists( "analysis_interval" )){
|
||||
analysis_interval = electric_db->getScalar<int>( "analysis_interval" );
|
||||
}
|
||||
@@ -330,7 +333,7 @@ void ScaLBL_Poisson::Create(){
|
||||
if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n");
|
||||
Map.resize(Nx,Ny,Nz); Map.fill(-2);
|
||||
auto neighborList= new int[18*Npad];
|
||||
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1);
|
||||
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Npad,1);
|
||||
comm.barrier();
|
||||
|
||||
//...........................................................................
|
||||
@@ -345,7 +348,7 @@ void ScaLBL_Poisson::Create(){
|
||||
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
|
||||
//ScaLBL_AllocateDeviceMemory((void **) &dvcID, sizeof(signed char)*Nx*Ny*Nz);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Nx*Ny*Nz);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &Psi_BCLabel, sizeof(int)*Nx*Ny*Nz);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np);
|
||||
@@ -366,6 +369,8 @@ void ScaLBL_Poisson::Create(){
|
||||
}
|
||||
}
|
||||
}
|
||||
comm.barrier();
|
||||
if (rank==0) printf (" .... LB-Poisson Solver: check neighbor list \n");
|
||||
// check that TmpMap is valid
|
||||
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
|
||||
auto n = TmpMap[idx];
|
||||
@@ -381,6 +386,8 @@ void ScaLBL_Poisson::Create(){
|
||||
TmpMap[idx] = Nx*Ny*Nz-1;
|
||||
}
|
||||
}
|
||||
comm.barrier();
|
||||
if (rank==0) printf (" .... LB-Poisson Solver: copy neighbor list to GPU \n");
|
||||
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
delete [] TmpMap;
|
||||
@@ -547,7 +554,7 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
|
||||
* "time_conv_from_Study" is the phys to LB time conversion factor, unit=[sec/lt]
|
||||
* which is used for periodic voltage input for inlet and outlet boundaries
|
||||
*/
|
||||
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n");
|
||||
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q19 distributions\n");
|
||||
//NOTE the initialization involves two steps:
|
||||
//1. assign solid boundary value (surface potential or surface change density)
|
||||
//2. Initialize electric potential for pore nodes
|
||||
@@ -561,8 +568,9 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
|
||||
ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double));
|
||||
ScaLBL_CopyToDevice(Psi_BCLabel, psi_BCLabel_host, Nx*Ny*Nz*sizeof(int));
|
||||
ScaLBL_Comm->Barrier();
|
||||
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
/* switch to d3Q19 model */
|
||||
ScaLBL_D3Q19_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_D3Q19_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
delete [] psi_host;
|
||||
delete [] psi_BCLabel_host;
|
||||
|
||||
@@ -583,6 +591,8 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, bool UseSlippingVelBC, int times
|
||||
//double starttime,stoptime,cputime;
|
||||
//comm.barrier();
|
||||
//auto t1 = std::chrono::system_clock::now();
|
||||
double *host_Error;
|
||||
host_Error = new double [Np];
|
||||
|
||||
timestep=0;
|
||||
double error = 1.0;
|
||||
@@ -590,98 +600,40 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, bool UseSlippingVelBC, int times
|
||||
//************************************************************************/
|
||||
// *************ODD TIMESTEP*************//
|
||||
timestep++;
|
||||
SolveElectricPotentialAAodd(timestep_from_Study);//update electric potential
|
||||
//SolveElectricPotentialAAodd(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
|
||||
SolvePoissonAAodd(ChargeDensity, UseSlippingVelBC);//perform collision
|
||||
ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
|
||||
// *************EVEN TIMESTEP*************//
|
||||
timestep++;
|
||||
SolveElectricPotentialAAeven(timestep_from_Study);//update electric potential
|
||||
//SolveElectricPotentialAAeven(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
|
||||
SolvePoissonAAeven(ChargeDensity, UseSlippingVelBC);//perform collision
|
||||
ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
//************************************************************************/
|
||||
|
||||
|
||||
// Check convergence of steady-state solution
|
||||
if (timestep==2){
|
||||
//save electric potential for convergence check
|
||||
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
|
||||
}
|
||||
if (timestep%analysis_interval==0){
|
||||
if (tolerance_method.compare("MSE")==0){
|
||||
double count_loc=0;
|
||||
double count;
|
||||
double MSE_loc=0.0;
|
||||
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
|
||||
for (int k=1; k<Nz-1; k++){
|
||||
for (int j=1; j<Ny-1; j++){
|
||||
for (int i=1; i<Nx-1; i++){
|
||||
if (Distance(i,j,k) > 0){
|
||||
MSE_loc += (Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k));
|
||||
count_loc+=1.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
error=Dm->Comm.sumReduce(MSE_loc);
|
||||
count=Dm->Comm.sumReduce(count_loc);
|
||||
error /= count;
|
||||
}
|
||||
else if (tolerance_method.compare("MSE_max")==0){
|
||||
vector<double>MSE_loc;
|
||||
double MSE_loc_max;
|
||||
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
|
||||
for (int k=1; k<Nz-1; k++){
|
||||
for (int j=1; j<Ny-1; j++){
|
||||
for (int i=1; i<Nx-1; i++){
|
||||
if (Distance(i,j,k) > 0){
|
||||
MSE_loc.push_back((Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
vector<double>::iterator it_max = max_element(MSE_loc.begin(),MSE_loc.end());
|
||||
unsigned int idx_max=distance(MSE_loc.begin(),it_max);
|
||||
MSE_loc_max=MSE_loc[idx_max];
|
||||
error=Dm->Comm.maxReduce(MSE_loc_max);
|
||||
}
|
||||
else{
|
||||
ERROR("Error: user-specified tolerance_method cannot be identified; check you input database! \n");
|
||||
}
|
||||
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
|
||||
/* get the elecric potential */
|
||||
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
|
||||
if (rank==0) printf(" ... getting Poisson solver error \n");
|
||||
double err = 0.0;
|
||||
double max_error = 0.0;
|
||||
ScaLBL_CopyToHost(host_Error,ResidualError,sizeof(double)*Np);
|
||||
for (int idx=0; idx<Np; idx++){
|
||||
err = host_Error[idx]*host_Error[idx];
|
||||
if (err > max_error ){
|
||||
max_error = err;
|
||||
}
|
||||
}
|
||||
error=Dm->Comm.maxReduce(max_error);
|
||||
|
||||
/* compute the eletric field */
|
||||
//ScaLBL_D3Q19_Poisson_getElectricField(fq, ElectricField, tau, Np);
|
||||
|
||||
|
||||
|
||||
|
||||
//legacy code that tried to use residual to check convergence
|
||||
//ScaLBL_D3Q7_PoissonResidualError(NeighborList,dvcMap,ResidualError,Psi,ChargeDensity,epsilon_LB,Nx,Nx*Ny,ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior());
|
||||
//ScaLBL_D3Q7_PoissonResidualError(NeighborList,dvcMap,ResidualError,Psi,ChargeDensity,epsilon_LB,Nx,Nx*Ny,0, ScaLBL_Comm->LastExterior());
|
||||
//ScaLBL_Comm->Barrier(); comm.barrier();
|
||||
|
||||
//vector<double> ResidualError_host(Np);
|
||||
//double error_loc_max;
|
||||
////calculate the maximum residual error
|
||||
//ScaLBL_CopyToHost(&ResidualError_host[0],ResidualError,sizeof(double)*Np);
|
||||
|
||||
//vector<double>::iterator it_temp1,it_temp2;
|
||||
//it_temp1=ResidualError_host.begin();
|
||||
//advance(it_temp1,ScaLBL_Comm->LastExterior());
|
||||
//vector<double>::iterator it_max = max_element(ResidualError_host.begin(),it_temp1);
|
||||
//unsigned int idx_max1 = distance(ResidualError_host.begin(),it_max);
|
||||
|
||||
//it_temp1=ResidualError_host.begin();
|
||||
//it_temp2=ResidualError_host.begin();
|
||||
//advance(it_temp1,ScaLBL_Comm->FirstInterior());
|
||||
//advance(it_temp2,ScaLBL_Comm->LastInterior());
|
||||
//it_max = max_element(it_temp1,it_temp2);
|
||||
//unsigned int idx_max2 = distance(ResidualError_host.begin(),it_max);
|
||||
//if (ResidualError_host[idx_max1]>ResidualError_host[idx_max2]){
|
||||
// error_loc_max=ResidualError_host[idx_max1];
|
||||
//}
|
||||
//else{
|
||||
// error_loc_max=ResidualError_host[idx_max2];
|
||||
//}
|
||||
//error = Dm->Comm.maxReduce(error_loc_max);
|
||||
}
|
||||
}
|
||||
if(WriteLog==true){
|
||||
@@ -714,11 +666,12 @@ void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){
|
||||
}
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){
|
||||
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL
|
||||
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC){
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
//ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
/*
|
||||
// Set boundary conditions
|
||||
if (BoundaryConditionInlet > 0){
|
||||
switch (BoundaryConditionInlet){
|
||||
@@ -743,15 +696,20 @@ void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){
|
||||
}
|
||||
}
|
||||
//-------------------------//
|
||||
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
* */
|
||||
//ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){
|
||||
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL
|
||||
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
|
||||
void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC){
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
|
||||
//ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC,
|
||||
// ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->Barrier();
|
||||
|
||||
|
||||
// Set boundary conditions
|
||||
/*
|
||||
if (BoundaryConditionInlet > 0){
|
||||
switch (BoundaryConditionInlet){
|
||||
case 1:
|
||||
@@ -774,35 +732,35 @@ void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){
|
||||
break;
|
||||
}
|
||||
}
|
||||
*/
|
||||
//-------------------------//
|
||||
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
//ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::SolvePoissonAAodd(double *ChargeDensity, bool UseSlippingVelBC){
|
||||
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
ScaLBL_D3Q19_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
//ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
|
||||
ScaLBL_D3Q19_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
//TODO: perhaps add another ScaLBL_Comm routine to update Psi values on solid boundary nodes.
|
||||
//something like:
|
||||
//ScaLBL_Comm->SolidDirichletBoundaryUpdates(Psi, Psi_BCLabel, timestep);
|
||||
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
|
||||
//if (BoundaryConditionSolid==1){
|
||||
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
|
||||
//}
|
||||
//else if (BoundaryConditionSolid==2){
|
||||
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
|
||||
//}
|
||||
//ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
|
||||
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::SolvePoissonAAeven(double *ChargeDensity, bool UseSlippingVelBC){
|
||||
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
|
||||
//if (BoundaryConditionSolid==1){
|
||||
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
|
||||
//}
|
||||
//else if (BoundaryConditionSolid==2){
|
||||
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
|
||||
//}
|
||||
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
ScaLBL_D3Q19_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, ResidualError, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
|
||||
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
// ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
|
||||
ScaLBL_D3Q19_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, ResidualError, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
|
||||
ScaLBL_Comm->Barrier();
|
||||
|
||||
//ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
|
||||
}
|
||||
|
||||
void ScaLBL_Poisson::DummyChargeDensity(){
|
||||
|
||||
@@ -108,8 +108,8 @@ private:
|
||||
void AssignSolidBoundary(double *poisson_solid, int *poisson_solid_label);
|
||||
void Potential_Init(double *psi_init);
|
||||
void ElectricField_LB_to_Phys(DoubleArray &Efield_reg);
|
||||
void SolveElectricPotentialAAodd(int timestep_from_Study);
|
||||
void SolveElectricPotentialAAeven(int timestep_from_Study);
|
||||
void SolveElectricPotentialAAeven(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC);
|
||||
void SolveElectricPotentialAAodd(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC);
|
||||
//void SolveElectricField();
|
||||
void SolvePoissonAAodd(double *ChargeDensity, bool UseSlippingVelBC);
|
||||
void SolvePoissonAAeven(double *ChargeDensity, bool UseSlippingVelBC);
|
||||
|
||||
47
sample_scripts/configure_crusher_cpu
Executable file
47
sample_scripts/configure_crusher_cpu
Executable file
@@ -0,0 +1,47 @@
|
||||
#module load cmake/3.21.3
|
||||
#module load PrgEnv-gnu
|
||||
module load PrgEnv-amd
|
||||
module load rocm/4.5.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
#module load craype-accel-amd-gfx908
|
||||
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
|
||||
|
||||
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
|
||||
|
||||
# Need a new version of cmake
|
||||
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
|
||||
|
||||
#-I${MPICH_DIR}/include
|
||||
#-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
|
||||
|
||||
HIPFLAGS = --amdgpu-target=gfx90a
|
||||
|
||||
# configure
|
||||
rm -rf CMake*
|
||||
${CMAKE_DIR}/cmake \
|
||||
-D CMAKE_BUILD_TYPE:STRING=Release \
|
||||
-D CMAKE_C_COMPILER:PATH=cc \
|
||||
-D CMAKE_CXX_COMPILER:PATH=CC \
|
||||
-D CMAKE_CXX_STANDARD=14 \
|
||||
-D DISABLE_GOLD:BOOL=TRUE \
|
||||
-D DISABLE_LTO:BOOL=TRUE \
|
||||
-D CMAKE_C_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
|
||||
-D CMAKE_CXX_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
|
||||
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
|
||||
-D USE_HIP=0 \
|
||||
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
|
||||
-D USE_MPI=1 \
|
||||
-D MPI_SKIP_SEARCH=1 \
|
||||
-D MPIEXEC="srun" \
|
||||
-D USE_HDF5=1 \
|
||||
-D HDF5_DIRECTORY="${HDF5_DIR}" \
|
||||
-D USE_SILO=0 \
|
||||
-D USE_TIMER=0 \
|
||||
-D USE_DOXYGEN:BOOL=false \
|
||||
~/LBPM-WIA
|
||||
|
||||
|
||||
52
sample_scripts/configure_crusher_hip
Executable file
52
sample_scripts/configure_crusher_hip
Executable file
@@ -0,0 +1,52 @@
|
||||
#module load cmake/3.21.3
|
||||
#module load PrgEnv-gnu
|
||||
module load PrgEnv-amd
|
||||
module load rocm/4.5.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
module load craype-accel-amd-gfx908
|
||||
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
|
||||
|
||||
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
|
||||
|
||||
# Need a new version of cmake
|
||||
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
|
||||
|
||||
#-I${MPICH_DIR}/include
|
||||
#-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
|
||||
|
||||
#export HIPFLAGS="--amdgpu-target=gfx90a --save-temps"
|
||||
#--amdgpu-spill-vgpr-to-agpr=0
|
||||
|
||||
#THIS IS HOW TO CHECK FOR SPILLS (example)
|
||||
# hipcc -c -g -ggdb --save-temps Color.hip
|
||||
|
||||
# -munsafe-fp-atomics
|
||||
# configure
|
||||
rm -rf CMake*
|
||||
${CMAKE_DIR}/cmake \
|
||||
-D CMAKE_BUILD_TYPE:STRING=Release \
|
||||
-D CMAKE_C_COMPILER:PATH=cc \
|
||||
-D CMAKE_CXX_COMPILER:PATH=CC \
|
||||
-D CMAKE_CXX_STANDARD=14 \
|
||||
-D DISABLE_GOLD:BOOL=TRUE \
|
||||
-D DISABLE_LTO:BOOL=TRUE \
|
||||
-D CMAKE_C_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
|
||||
-D CMAKE_CXX_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
|
||||
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
|
||||
-D USE_HIP=1 \
|
||||
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
|
||||
-D USE_MPI=1 \
|
||||
-D MPI_SKIP_SEARCH=1 \
|
||||
-D MPIEXEC="srun" \
|
||||
-D USE_HDF5=1 \
|
||||
-D HDF5_DIRECTORY="${HDF5_DIR}" \
|
||||
-D USE_SILO=0 \
|
||||
-D USE_TIMER=0 \
|
||||
-D USE_DOXYGEN:BOOL=false \
|
||||
~/LBPM-WIA
|
||||
|
||||
|
||||
@@ -1,14 +1,13 @@
|
||||
export TPL_ROOT=/ccs/home/mbt/repos
|
||||
export TPL_BUILDER=/ccs/home/mbt/repos/TPL-builder
|
||||
export TPL_ROOT=/ccs/proj/csc380/mcclurej/spock
|
||||
export TPL_BUILDER=/ccs/home/mcclurej/tpl-builder
|
||||
export TPL_WEBPAGE=http://bitbucket.org/AdvancedMultiPhysics/tpl-builder/downloads
|
||||
|
||||
export INSTALL_DIR=/ccs/home/mbt/spock/install
|
||||
export INSTALL_DIR=/ccs/proj/csc380/mcclurej/spock/install
|
||||
|
||||
module load cmake
|
||||
module load llvm-amdgpu
|
||||
module load hip
|
||||
|
||||
|
||||
cmake \
|
||||
-D CMAKE_BUILD_TYPE=Release \
|
||||
-D CXX_STD=14 \
|
||||
@@ -24,7 +23,7 @@ cmake \
|
||||
-D ENABLE_SHARED:BOOL=OFF \
|
||||
-D PROCS_INSTALL=8 \
|
||||
-D TPL_LIST:STRING="TIMER;ZLIB;HDF5;SILO" \
|
||||
-D TIMER_URL="${TPL_ROOT}/TimerUtility" \
|
||||
-D TIMER_URL="${TPL_ROOT}/timerutility" \
|
||||
-D ZLIB_URL="http://zlib.net/zlib-1.2.11.tar.gz" \
|
||||
-D HDF5_URL="${TPL_ROOT}/hdf5-1.8.12.tar.gz" \
|
||||
-D BUILD_TYPE=x86_64 \
|
||||
|
||||
@@ -1,30 +1,40 @@
|
||||
module load cmake
|
||||
module load llvm-amdgpu
|
||||
module load hip
|
||||
module load PrgEnv-gnu
|
||||
module load rocm/4.2.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
#module load craype-accel-amd-gfx908
|
||||
|
||||
|
||||
export TPL_DIR=/gpfs/alpine/stf006/proj-shared/mbt/spock/install
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx908="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx908="-lmpi_gtl_hsa"
|
||||
|
||||
|
||||
# Need a new version of cmake
|
||||
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
|
||||
|
||||
|
||||
# configure
|
||||
rm -rf CMake*
|
||||
cmake \
|
||||
${CMAKE_DIR}/cmake \
|
||||
-D CMAKE_BUILD_TYPE:STRING=Release \
|
||||
-D CMAKE_C_COMPILER:PATH=cc \
|
||||
-D CMAKE_C_COMPILER:PATH=cc \
|
||||
-D CMAKE_CXX_COMPILER:PATH=CC \
|
||||
-D CMAKE_CXX_STANDARD=14 \
|
||||
-D DISABLE_GOLD:BOOL=TRUE \
|
||||
-D DISABLE_LTO:BOOL=TRUE \
|
||||
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
|
||||
-D USE_HIP=1 \
|
||||
-D LINK_LIBRARIES=${HIP_PATH}/lib/libamdhip64.so \
|
||||
-D USE_CUDA=0 \
|
||||
-D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \
|
||||
-D CMAKE_CUDA_HOST_COMPILER="gcc" \
|
||||
-D USE_MPI=0 \
|
||||
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
|
||||
-D USE_MPI=1 \
|
||||
-D MPI_SKIP_SEARCH=1 \
|
||||
-D MPIEXEC="srun" \
|
||||
-D USE_HDF5=1 \
|
||||
-D HDF5_DIRECTORY="${TPL_DIR}/hdf5" \
|
||||
-D USE_SILO=0 \
|
||||
-D SILO_DIRECTORY="${TPL_DIR}/silo" \
|
||||
-D USE_DOXYGEN:BOOL=false \
|
||||
-D HDF5_DIRECTORY="${HDF5_DIR}" \
|
||||
-D USE_SILO=0 \
|
||||
-D USE_TIMER=0 \
|
||||
~/repos/LBPM-WIA
|
||||
-D USE_DOXYGEN:BOOL=false \
|
||||
~/LBPM-WIA
|
||||
|
||||
|
||||
|
||||
39
sample_scripts/configure_spock_hip_mark
Executable file
39
sample_scripts/configure_spock_hip_mark
Executable file
@@ -0,0 +1,39 @@
|
||||
## Load the desired modules
|
||||
module load PrgEnv-gcc
|
||||
module load rocm/4.3.0
|
||||
module load cray-mpich
|
||||
module load cray-hdf5-parallel
|
||||
|
||||
## These must be set before compiling so the executable picks up GTL
|
||||
export PE_MPICH_GTL_DIR_amd_gfx908="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
|
||||
export PE_MPICH_GTL_LIBS_amd_gfx908="-lmpi_gtl_hsa"
|
||||
|
||||
## These must be set before running
|
||||
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
export MPICH_SMP_SINGLE_COPY_MODE=CMA
|
||||
|
||||
#export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.3/bin
|
||||
export CMAKE_DIR=/ccs/home/mbt/spock/cmake-3.21.3/bin
|
||||
|
||||
# configure
|
||||
rm -rf CMake*
|
||||
${CMAKE_DIR}/cmake \
|
||||
-D CMAKE_BUILD_TYPE:STRING=Release \
|
||||
-D CMAKE_CXX_COMPILER:PATH=CC \
|
||||
-D CMAKE_CXX_STANDARD=14 \
|
||||
-D DISABLE_GOLD:BOOL=TRUE \
|
||||
-D DISABLE_LTO:BOOL=TRUE \
|
||||
-D USE_HIP=1 \
|
||||
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
|
||||
-D USE_MPI=1 \
|
||||
-D MPI_SKIP_SEARCH=1 \
|
||||
-D MPIEXEC="srun" \
|
||||
-D USE_HDF5=1 \
|
||||
-D HDF5_DIRECTORY="${HDF5_DIR}" \
|
||||
-D USE_SILO=0 \
|
||||
-D USE_TIMER=0 \
|
||||
-D USE_DOXYGEN:BOOL=false \
|
||||
~/repos/LBPM-WIA
|
||||
|
||||
|
||||
@@ -1,19 +1,28 @@
|
||||
# load the module for cmake
|
||||
#module load cmake
|
||||
module load cmake
|
||||
|
||||
# gcc/7.5.0
|
||||
|
||||
module load gcc/7.5.0
|
||||
module load cuda/10.2.89
|
||||
module load hdf5/1.10.7
|
||||
|
||||
#source /gpfs/gpfs_stage1/b6p315aa/setup/setup-mpi.sh
|
||||
module load cmake gcc/7.5.0
|
||||
module load cuda
|
||||
module load hdf5
|
||||
|
||||
#/ccs/proj/csc380/mcclurej
|
||||
|
||||
#export HDF5_DIR=/ccs/proj/csc380/mcclurej/install/hdf5/1.8.12/
|
||||
|
||||
#export SILO_DIR=/ccs/proj/csc380/mcclurej/install/silo/4.10.2/
|
||||
|
||||
#export NETCDF_DIR=/ccs/proj/geo136/install/netcdf/4.6.1
|
||||
|
||||
export HDF5_DIR="$OLCF_HDF5_ROOT"
|
||||
|
||||
# configure
|
||||
|
||||
|
||||
|
||||
rm -rf CMake*
|
||||
|
||||
cmake \
|
||||
-D CMAKE_BUILD_TYPE:STRING=Release \
|
||||
-D CMAKE_C_COMPILER:PATH=mpicc \
|
||||
@@ -22,7 +31,7 @@ cmake \
|
||||
-D CMAKE_CXX_STANDARD=14 \
|
||||
-D USE_CUDA=1 \
|
||||
-D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \
|
||||
-D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/6.4.0/bin/gcc" \
|
||||
-D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/7.5.0-2/bin/gcc" \
|
||||
-D USE_MPI=1 \
|
||||
-D MPIEXEC=mpirun \
|
||||
-D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \
|
||||
@@ -38,4 +47,6 @@ cmake \
|
||||
-D USE_TIMER=0 \
|
||||
~/LBPM-WIA
|
||||
|
||||
make VERBOSE=1 -j1 && make install
|
||||
|
||||
|
||||
make VERBOSE=1 -j8 && make install
|
||||
|
||||
@@ -7,9 +7,10 @@ ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_nernst_planck_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_cell_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator )
|
||||
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
|
||||
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
|
||||
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )
|
||||
#ADD_LBPM_EXECUTABLE( lbpm_sphere_pp )
|
||||
@@ -48,7 +49,6 @@ ADD_LBPM_EXECUTABLE( TestPNP_Stokes )
|
||||
ADD_LBPM_EXECUTABLE( TestMixedGrad )
|
||||
|
||||
|
||||
|
||||
CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/cylindertest ${CMAKE_CURRENT_BINARY_DIR}/cylindertest COPYONLY )
|
||||
|
||||
# Add the tests
|
||||
@@ -60,6 +60,7 @@ ADD_LBPM_TEST( TestTopo3D )
|
||||
ADD_LBPM_TEST( TestFluxBC )
|
||||
ADD_LBPM_TEST( TestFlowAdaptor )
|
||||
ADD_LBPM_TEST( TestMap )
|
||||
ADD_LBPM_TEST( TestMembrane )
|
||||
#ADD_LBPM_TEST( TestMRT )
|
||||
#ADD_LBPM_TEST( TestColorGrad )
|
||||
ADD_LBPM_TEST( TestWideHalo )
|
||||
|
||||
@@ -183,11 +183,12 @@ int main(int argc, char **argv)
|
||||
int i,j,k;
|
||||
|
||||
// Load inputs
|
||||
auto db = loadInputs( nprocs );
|
||||
/* auto filename = argv[1];
|
||||
auto input_db = std::make_shared<Database>( filename );
|
||||
auto db = input_db->getDatabase( "Domain" );
|
||||
*/
|
||||
auto filename = argv[1];
|
||||
auto input_db = std::make_shared<Database>( filename );
|
||||
auto db = input_db->getDatabase( "Domain" );
|
||||
//else {
|
||||
// auto db = loadInputs( nprocs );
|
||||
//}
|
||||
int Nx = db->getVector<int>( "n" )[0];
|
||||
int Ny = db->getVector<int>( "n" )[1];
|
||||
int Nz = db->getVector<int>( "n" )[2];
|
||||
@@ -269,17 +270,19 @@ int main(int argc, char **argv)
|
||||
//.......................................................................
|
||||
|
||||
//...........................................................................
|
||||
comm.barrier();
|
||||
//comm.barrier();
|
||||
if (rank == 0) cout << "Domain set." << endl;
|
||||
//...........................................................................
|
||||
|
||||
cout << flush;
|
||||
//...........................................................................
|
||||
if (rank==0) printf ("Create ScaLBL_Communicator \n");
|
||||
cout << flush;
|
||||
// Create a communicator for the device (will use optimized layout)
|
||||
ScaLBL_Communicator ScaLBL_Comm(Dm);
|
||||
|
||||
int Npad=(Np/16 + 2)*16;
|
||||
if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N);
|
||||
cout << flush;
|
||||
auto neighborList= new int[18*Npad];
|
||||
IntArray Map(Nx,Ny,Nz);
|
||||
Map.fill(-2);
|
||||
@@ -290,7 +293,8 @@ int main(int argc, char **argv)
|
||||
//......................device distributions.................................
|
||||
dist_mem_size = Np*sizeof(double);
|
||||
if (rank==0) printf ("Allocating distributions \n");
|
||||
|
||||
cout << flush;
|
||||
|
||||
int *NeighborList;
|
||||
int *dvcMap;
|
||||
double *fq;
|
||||
@@ -320,6 +324,9 @@ int main(int argc, char **argv)
|
||||
ScaLBL_DeviceBarrier();
|
||||
delete [] TmpMap;
|
||||
|
||||
if (rank==0) printf("Map is copied to GPU \n");
|
||||
cout << flush;
|
||||
|
||||
//...........................................................................
|
||||
|
||||
/* // Write the communcation structure into a file for debugging
|
||||
@@ -351,11 +358,13 @@ int main(int argc, char **argv)
|
||||
fclose(CommFile);
|
||||
*/
|
||||
if (rank==0) printf("Setting the distributions, size = : %i\n", Np);
|
||||
cout << flush;
|
||||
|
||||
//...........................................................................
|
||||
GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2, iproc,jproc,kproc,nprocx,nprocy,nprocz);
|
||||
ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size);
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
//comm.barrier();
|
||||
//*************************************************************************
|
||||
// First timestep
|
||||
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
|
||||
@@ -375,6 +384,7 @@ int main(int argc, char **argv)
|
||||
int timestep = 0;
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("No. of timesteps for timing: %i \n", 100);
|
||||
cout << flush;
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
@@ -420,13 +430,16 @@ int main(int argc, char **argv)
|
||||
// 18 reads and 18 writes for each lattice site
|
||||
double MemoryRefs = double(Np)*36;
|
||||
// number of memory references for the swap algorithm - GigaBytes / second
|
||||
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)*1e-9);
|
||||
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)/cputime*1e-9);
|
||||
// Report bandwidth in Gigabits per second
|
||||
// communication bandwidth includes both send and recieve
|
||||
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
|
||||
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
|
||||
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/cputime*1e-9);
|
||||
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/cputime*1e-9);
|
||||
cout << flush;
|
||||
|
||||
}
|
||||
// ****************************************************
|
||||
cout << fflush;
|
||||
comm.barrier();
|
||||
Utilities::shutdown();
|
||||
// ****************************************************
|
||||
|
||||
348
tests/TestMembrane.cpp
Normal file
348
tests/TestMembrane.cpp
Normal file
@@ -0,0 +1,348 @@
|
||||
|
||||
//*************************************************************************
|
||||
// Lattice Boltzmann Simulator for Single Phase Flow in Porous Media
|
||||
// James E. McCLure
|
||||
//*************************************************************************
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "common/MPI.h"
|
||||
#include "common/Membrane.h"
|
||||
#include "common/ScaLBL.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::shared_ptr<Database> loadInputs( int nprocs )
|
||||
{
|
||||
//auto db = std::make_shared<Database>( "Domain.in" );
|
||||
auto db = std::make_shared<Database>();
|
||||
db->putScalar<int>( "BC", 0 );
|
||||
db->putVector<int>( "nproc", { 1, 1, 1 } );
|
||||
db->putVector<int>( "n", { 32, 32, 32 } );
|
||||
db->putScalar<int>( "nspheres", 1 );
|
||||
db->putVector<double>( "L", { 1, 1, 1 } );
|
||||
return db;
|
||||
}
|
||||
|
||||
//***************************************************************************************
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// Initialize MPI
|
||||
Utilities::startup( argc, argv );
|
||||
Utilities::MPI comm( MPI_COMM_WORLD );
|
||||
int check=0;
|
||||
{
|
||||
|
||||
int i,j,k,n;
|
||||
|
||||
int rank = comm.getRank();
|
||||
if (rank == 0){
|
||||
printf("********************************************************\n");
|
||||
printf("Running unit test: TestMembrane \n");
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
|
||||
// Load inputs
|
||||
auto db = loadInputs( comm.getSize() );
|
||||
int Nx = db->getVector<int>( "n" )[0];
|
||||
int Ny = db->getVector<int>( "n" )[1];
|
||||
int Nz = db->getVector<int>( "n" )[2];
|
||||
auto Dm = std::make_shared<Domain>(db,comm);
|
||||
|
||||
Nx += 2;
|
||||
Ny += 2;
|
||||
Nz += 2;
|
||||
int N = Nx*Ny*Nz;
|
||||
//.......................................................................
|
||||
int Np = 0;
|
||||
double distance,radius;
|
||||
DoubleArray Distance(Nx,Ny,Nz);
|
||||
for (k=0;k<Nz;k++){
|
||||
for (j=0;j<Ny;j++){
|
||||
for (i=0;i<Nx;i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
Dm->id[n] = 1;
|
||||
radius = double(Nx)/4;
|
||||
distance = sqrt(double((i-0.5*Nx)*(i-0.5*Nx)+ (j-0.5*Ny)*(j-0.5*Ny)+ (k-0.5*Nz)*(k-0.5*Nz)))-radius;
|
||||
if (distance < 0.0 ){
|
||||
Dm->id[n] = 1;
|
||||
}
|
||||
Distance(i,j,k) = distance;
|
||||
Np++;
|
||||
}
|
||||
}
|
||||
}
|
||||
Dm->CommInit();
|
||||
|
||||
// Create a communicator for the device (will use optimized layout)
|
||||
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm(new ScaLBL_Communicator(Dm));
|
||||
//Create a second communicator based on the regular data layout
|
||||
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular(new ScaLBL_Communicator(Dm));
|
||||
|
||||
if (rank==0){
|
||||
printf("Total domain size = %i \n",N);
|
||||
printf("Reduced domain size = %i \n",Np);
|
||||
}
|
||||
|
||||
// LBM variables
|
||||
if (rank==0) printf ("Set up the neighborlist \n");
|
||||
int Npad=Np+32;
|
||||
int neighborSize=18*Npad*sizeof(int);
|
||||
int *neighborList;
|
||||
IntArray Map(Nx,Ny,Nz);
|
||||
neighborList= new int[18*Npad];
|
||||
|
||||
//......................device distributions.................................
|
||||
int *NeighborList;
|
||||
int *dvcMap;
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Npad);
|
||||
|
||||
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np,1);
|
||||
comm.barrier();
|
||||
ScaLBL_CopyToDevice(NeighborList, neighborList, 18*Np*sizeof(int));
|
||||
|
||||
double *dist;
|
||||
dist = new double [19*Np];
|
||||
|
||||
// Check the neighborlist
|
||||
printf("Check neighborlist: exterior %i, first interior %i last interior %i \n",ScaLBL_Comm->LastExterior(),ScaLBL_Comm->FirstInterior(),ScaLBL_Comm->LastInterior());
|
||||
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
|
||||
for (int q=0; q<18; q++){
|
||||
int nn = neighborList[q*Np+idx]%Np;
|
||||
if (nn>Np) printf("neighborlist error (exterior) at q=%i, idx=%i \n",q,idx);
|
||||
dist[q*Np + idx] = 0.0;
|
||||
}
|
||||
}
|
||||
for (int idx=ScaLBL_Comm->FirstInterior(); idx<ScaLBL_Comm->LastInterior(); idx++){
|
||||
for (int q=0; q<18; q++){
|
||||
int nn = neighborList[q*Np+idx]%Np;
|
||||
if (nn>Np) printf("neighborlist error (exterior) at q=%i, idx=%i \n",q,idx);
|
||||
dist[q*Np + idx] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
/* create a membrane data structure */
|
||||
Membrane M(Dm, NeighborList, Np);
|
||||
|
||||
int MembraneCount = M.Create(Dm, Distance, Map);
|
||||
if (rank==0) printf (" Number of membrane links: %i \n", MembraneCount);
|
||||
|
||||
/* create a tagged array to show where the mebrane is*/
|
||||
double *MembraneLinks;
|
||||
MembraneLinks = new double [Nx*Ny*Nz];
|
||||
for (int n=0; n<Nx*Ny*Nz; n++) {
|
||||
MembraneLinks[n] = 0.0;
|
||||
}
|
||||
for (int mlink=0; mlink<MembraneCount; mlink++){
|
||||
int iq = M.membraneLinks[2*mlink];
|
||||
int jq = M.membraneLinks[2*mlink+1];
|
||||
dist[iq] = -1.0; // set these distributions to non-zero
|
||||
dist[jq] = 1.0;
|
||||
}
|
||||
for (k=1;k<Nz-1;k++){
|
||||
for (j=1;j<Ny-1;j++){
|
||||
for (i=1;i<Nx-1;i++){
|
||||
int idx = Map(i,j,k);
|
||||
double sum = 0.0;
|
||||
for (int q=0; q<19; q++){
|
||||
sum += dist[q*Np + idx];
|
||||
}
|
||||
int n = k*Nx*Ny + j*Nx + i;
|
||||
MembraneLinks[n] = sum;
|
||||
if (sum > 0.f){
|
||||
Dm->id[n] = 127;
|
||||
}
|
||||
if (sum < 0.f){
|
||||
Dm->id[n] = 64;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (argc > 1)
|
||||
Dm->AggregateLabels("membrane.raw");
|
||||
|
||||
|
||||
/* create a pair of distributions to test membrane mass transport routine */
|
||||
double *fq, *gq, *Ci, *Cj, *Psi, *Ci_host;
|
||||
Ci_host = new double [Np];
|
||||
|
||||
ScaLBL_AllocateDeviceMemory((void **)&fq, 19 * sizeof(double) * Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&gq, 19 * sizeof(double) * Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&Ci, sizeof(double) * Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&Cj, sizeof(double) * Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **)&Psi, sizeof(double) * Np);
|
||||
|
||||
/* initialize concentration inside membrane */
|
||||
for (k=1;k<Nz-1;k++){
|
||||
for (j=1;j<Ny-1;j++){
|
||||
for (i=1;i<Nx-1;i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
int idx = Map(i,j,k);
|
||||
if (Distance(i,j,k) > 0.0)
|
||||
Ci_host[idx] = 1.0;
|
||||
else
|
||||
Ci_host[idx] = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
ScaLBL_CopyToDevice(Ci, Ci_host, sizeof(double) * Np);
|
||||
|
||||
/* initialize the distributions */
|
||||
ScaLBL_D3Q7_Ion_Init_FromFile(fq, Ci, Np);
|
||||
ScaLBL_D3Q7_Ion_Init_FromFile(gq, Ci, Np);
|
||||
|
||||
/* Streaming with the usual neighborlist */
|
||||
ScaLBL_D3Q19_AAodd_Compact(NeighborList, fq, Np);
|
||||
|
||||
/* Streaming with the membrane neighborlist*/
|
||||
ScaLBL_D3Q19_AAodd_Compact(M.NeighborList, gq, Np);
|
||||
|
||||
/* explicit mass transfer step with the membrane*/
|
||||
M.AssignCoefficients(dvcMap, Psi, "ones");
|
||||
M.IonTransport(gq, Cj);
|
||||
ScaLBL_CopyToHost(Ci_host, Cj, sizeof(double) * Np);
|
||||
|
||||
double ionError = 0.0;
|
||||
for (int n=0; n<Np; n++){
|
||||
ionError += Ci_host[n];
|
||||
}
|
||||
if (fabs(ionError) > 1e-12) {
|
||||
printf(" Failed error tolerance in membrane ion transport routine! \n");
|
||||
check = 2;
|
||||
}
|
||||
|
||||
DoubleArray Ions(Nx,Ny,Nz);
|
||||
ScaLBL_Comm->RegularLayout(Map, Cj, Ions);
|
||||
if (argc > 1)
|
||||
Dm->AggregateLabels("membrane2.raw",Ions);
|
||||
|
||||
/* now compare streaming */
|
||||
ScaLBL_D3Q7_Ion_Init_FromFile(gq, Ci, Np);
|
||||
M.IonTransport(gq, Cj);
|
||||
ScaLBL_D3Q19_AAodd_Compact(M.NeighborList, gq, Np);
|
||||
M.IonTransport(gq, Cj);
|
||||
|
||||
/* now check that the two results agree*/
|
||||
double *fq_h, *gq_h;
|
||||
fq_h = new double [7*Np];
|
||||
gq_h = new double [7*Np];
|
||||
ScaLBL_CopyToHost(fq_h, fq, 7*sizeof(double) * Np);
|
||||
ScaLBL_CopyToHost(gq_h, gq, 7*sizeof(double) * Np);
|
||||
for (int n = 0; n<Np; n++){
|
||||
for (int q=0; q<7; q++){
|
||||
double gval = gq_h[q*Np + n];
|
||||
double fval = fq_h[q*Np + n];
|
||||
if (gval != fval ){
|
||||
printf(" Membrane streaming mismatch at q=%i, n=%i \n",q,n);
|
||||
printf(" .... gq = %f, fq = %f \n",gval, fval);
|
||||
printf(" (unit test will fail) \n");
|
||||
check = 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DoubleArray MembraneErrors(Nx,Ny,Nz);
|
||||
for (k=1;k<Nz-1;k++){
|
||||
for (j=1;j<Ny-1;j++){
|
||||
for (i=1;i<Nx-1;i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
int idx = Map(i,j,k);
|
||||
MembraneErrors(i,j,k) = 0.0;
|
||||
for (int q=0; q<7; q++){
|
||||
double gval = gq_h[q*Np + idx];
|
||||
double fval = fq_h[q*Np + idx];
|
||||
MembraneErrors(i,j,k) += gval - fval;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Dm->AggregateLabels("membrane3.raw",MembraneErrors);
|
||||
|
||||
|
||||
//...........................................................................
|
||||
// Update GPU data structures
|
||||
if (rank==0) printf ("Setting up device map and neighbor list \n");
|
||||
int *TmpMap;
|
||||
TmpMap=new int[Np*sizeof(int)];
|
||||
for (k=1; k<Nz-1; k++){
|
||||
for (j=1; j<Ny-1; j++){
|
||||
for (i=1; i<Nx-1; i++){
|
||||
int idx=Map(i,j,k);
|
||||
if (!(idx < 0))
|
||||
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
|
||||
}
|
||||
}
|
||||
}
|
||||
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
|
||||
ScaLBL_DeviceBarrier();
|
||||
|
||||
// Create a dummy distribution data structure
|
||||
double *fq_host;
|
||||
fq_host = new double[19*Np];
|
||||
if (rank==0) printf ("Setting up Np=%i distributions \n",Np);
|
||||
for (k=1; k<Nz-1; k++){
|
||||
for (j=1; j<Ny-1; j++){
|
||||
for (i=1; i<Nx-1; i++){
|
||||
int idx=Map(i,j,k);
|
||||
if (!(idx<0)){
|
||||
for (int q=0; q<19; q++){
|
||||
fq_host[q*Np+idx]=(k*Nx*Ny+j*Nx+i)+0.01*q;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Run dummy communications */
|
||||
/*initialize fq from host data */
|
||||
ScaLBL_CopyToDevice(fq, fq_host, sizeof(double)*7*Np);
|
||||
|
||||
M.SendD3Q7AA(&fq[0]);
|
||||
M.RecvD3Q7AA(&gq[0]);
|
||||
// this has only the communicated values
|
||||
//ScaLBL_CopyToHost(fq_host, gq, sizeof(double)*7*Np);
|
||||
if (rank==0) printf ("Sum result \n");
|
||||
|
||||
ScaLBL_D3Q7_AAeven_IonConcentration(&gq[0 * Np * 7], &Ci[0 * Np],
|
||||
0, ScaLBL_Comm->LastExterior(),
|
||||
Np);
|
||||
DoubleArray Result(Nx,Ny,Nz);
|
||||
|
||||
ScaLBL_Comm->RegularLayout(Map, Ci, Result);
|
||||
|
||||
/* for (k=1; k<Nz-1; k++){
|
||||
for (j=1; j<Ny-1; j++){
|
||||
for (i=1; i<Nx-1; i++){
|
||||
int idx=Map(i,j,k);
|
||||
double sum = 0.0;
|
||||
if (!(idx<0)){
|
||||
for (int q=1; q<3; q++){
|
||||
sum += fq_host[q*Np+idx];
|
||||
}
|
||||
Result[k*Nx*Ny+j*Nx+i] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
FILE *OUTFILE;
|
||||
OUTFILE = fopen("D3Q7.raw","wb");
|
||||
fwrite(Result.data(),8,Nx*Ny*Nz,OUTFILE);
|
||||
fclose(OUTFILE);
|
||||
|
||||
FILE *MAPFILE;
|
||||
MAPFILE = fopen("Map.raw","wb");
|
||||
fwrite(Map.data(),4,Nx*Ny*Nz,MAPFILE);
|
||||
fclose(MAPFILE);
|
||||
|
||||
delete [] TmpMap;
|
||||
delete [] fq_host;
|
||||
|
||||
}
|
||||
Utilities::shutdown();
|
||||
|
||||
return check;
|
||||
}
|
||||
|
||||
@@ -9,8 +9,8 @@
|
||||
#include "common/ScaLBL.h"
|
||||
#include "common/Communication.h"
|
||||
#include "analysis/TwoPhase.h"
|
||||
#include "common/MPI_Helpers.h"
|
||||
|
||||
#include "common/MPI.h"
|
||||
#include "models/BGKModel.h"
|
||||
//#define WRITE_SURFACES
|
||||
|
||||
/*
|
||||
@@ -23,414 +23,33 @@ using namespace std;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
//*****************************************
|
||||
// ***** MPI STUFF ****************
|
||||
//*****************************************
|
||||
// Initialize MPI
|
||||
int rank,nprocs;
|
||||
Utilities::startup( argc, argv );
|
||||
Utilities::MPI comm( MPI_COMM_WORLD );
|
||||
int rank = comm.getRank();
|
||||
int nprocs = comm.getSize();
|
||||
Utilities::startup( argc, argv );
|
||||
Utilities::MPI comm( MPI_COMM_WORLD );
|
||||
int rank = comm.getRank();
|
||||
int nprocs = comm.getSize();
|
||||
{
|
||||
// parallel domain size (# of sub-domains)
|
||||
int nprocx,nprocy,nprocz;
|
||||
|
||||
if (rank == 0){
|
||||
printf("********************************************************\n");
|
||||
printf("Running Single Phase Permeability Calculation \n");
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
|
||||
// Variables that specify the computational domain
|
||||
string FILENAME;
|
||||
int Nx,Ny,Nz; // local sub-domain size
|
||||
int nspheres; // number of spheres in the packing
|
||||
double Lx,Ly,Lz; // Domain length
|
||||
double D = 1.0; // reference length for non-dimensionalization
|
||||
// Color Model parameters
|
||||
int timestepMax, interval;
|
||||
double tau,Fx,Fy,Fz,tol,err;
|
||||
double din,dout;
|
||||
bool pBC,Restart;
|
||||
int i,j,k,n;
|
||||
|
||||
int RESTART_INTERVAL=20000;
|
||||
|
||||
if (rank==0){
|
||||
//.............................................................
|
||||
// READ SIMULATION PARMAETERS FROM INPUT FILE
|
||||
//.............................................................
|
||||
ifstream input("Permeability.in");
|
||||
// Line 1: model parameters (tau, alpha, beta, das, dbs)
|
||||
input >> tau; // Viscosity parameter
|
||||
// Line 2: External force components (Fx,Fy, Fz)
|
||||
input >> Fx;
|
||||
input >> Fy;
|
||||
input >> Fz;
|
||||
// Line 3: Pressure Boundary conditions
|
||||
input >> Restart;
|
||||
input >> pBC;
|
||||
input >> din;
|
||||
input >> dout;
|
||||
// Line 4: time-stepping criteria
|
||||
input >> timestepMax; // max no. of timesteps
|
||||
input >> interval; // restart interval
|
||||
input >> tol; // error tolerance
|
||||
//.............................................................
|
||||
|
||||
//.......................................................................
|
||||
// Reading the domain information file
|
||||
//.......................................................................
|
||||
ifstream domain("Domain.in");
|
||||
domain >> nprocx;
|
||||
domain >> nprocy;
|
||||
domain >> nprocz;
|
||||
domain >> Nx;
|
||||
domain >> Ny;
|
||||
domain >> Nz;
|
||||
//domain >> nspheres;
|
||||
domain >> Lx;
|
||||
domain >> Ly;
|
||||
domain >> Lz;
|
||||
//.......................................................................
|
||||
|
||||
}
|
||||
// **************************************************************
|
||||
// Broadcast simulation parameters from rank 0 to all other procs
|
||||
MPI_Barrier(comm);
|
||||
//.................................................
|
||||
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
|
||||
//MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
|
||||
// MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm);
|
||||
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
|
||||
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
|
||||
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
|
||||
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
|
||||
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
|
||||
MPI_Bcast(×tepMax,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&interval,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
|
||||
// Computational domain
|
||||
MPI_Bcast(&Nx,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&Ny,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
|
||||
//MPI_Bcast(&nspheres,1,MPI_INT,0,comm);
|
||||
MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm);
|
||||
MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
|
||||
MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
|
||||
//.................................................
|
||||
MPI_Barrier(comm);
|
||||
|
||||
RESTART_INTERVAL=interval;
|
||||
// **************************************************************
|
||||
// **************************************************************
|
||||
double rlx = 1.f/tau;
|
||||
|
||||
if (nprocs != nprocx*nprocy*nprocz){
|
||||
printf("nprocx = %i \n",nprocx);
|
||||
printf("nprocy = %i \n",nprocy);
|
||||
printf("nprocz = %i \n",nprocz);
|
||||
INSIST(nprocs == nprocx*nprocy*nprocz,"Fatal error in processor count!");
|
||||
}
|
||||
|
||||
if (rank==0){
|
||||
printf("********************************************************\n");
|
||||
printf("tau = %f \n", tau);
|
||||
printf("Force(x) = %.5g \n", Fx);
|
||||
printf("Force(y) = %.5g \n", Fy);
|
||||
printf("Force(z) = %.5g \n", Fz);
|
||||
printf("Sub-domain size = %i x %i x %i\n",Nx,Ny,Nz);
|
||||
printf("Process grid = %i x %i x %i\n",nprocx,nprocy,nprocz);
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
|
||||
double viscosity=(tau-0.5)/3.0;
|
||||
// Initialized domain and averaging framework for Two-Phase Flow
|
||||
int BC=pBC;
|
||||
Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BC);
|
||||
for (i=0; i<Dm.Nx*Dm.Ny*Dm.Nz; i++) Dm.id[i] = 1;
|
||||
Dm.CommInit();
|
||||
TwoPhase Averages(Dm);
|
||||
|
||||
// Mask that excludes the solid phase
|
||||
Domain Mask(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BC);
|
||||
MPI_Barrier(comm);
|
||||
|
||||
Nx += 2; Ny += 2; Nz += 2;
|
||||
int N = Nx*Ny*Nz;
|
||||
|
||||
//.......................................................................
|
||||
if (rank == 0) printf("Read input media... \n");
|
||||
//.......................................................................
|
||||
|
||||
//.......................................................................
|
||||
// Filenames used
|
||||
char LocalRankString[8];
|
||||
char LocalRankFilename[40];
|
||||
char LocalRestartFile[40];
|
||||
char tmpstr[10];
|
||||
sprintf(LocalRankString,"%05d",rank);
|
||||
sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString);
|
||||
sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString);
|
||||
|
||||
// printf("Local File Name = %s \n",LocalRankFilename);
|
||||
// .......... READ THE INPUT FILE .......................................
|
||||
// char value;
|
||||
char *id;
|
||||
id = new char[N];
|
||||
double sum, sum_local;
|
||||
double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs);
|
||||
//if (BoundaryCondition > 0) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6));
|
||||
double porosity, pore_vol;
|
||||
//...........................................................................
|
||||
if (rank == 0) cout << "Reading in domain from signed distance function..." << endl;
|
||||
|
||||
//.......................................................................
|
||||
// Read the signed distance
|
||||
sprintf(LocalRankString,"%05d",rank);
|
||||
sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
|
||||
ReadBinaryFile(LocalRankFilename, Averages.SDs.data(), N);
|
||||
MPI_Barrier(comm);
|
||||
if (rank == 0) cout << "Domain set." << endl;
|
||||
|
||||
//.......................................................................
|
||||
// Assign the phase ID field based on the signed distance
|
||||
//.......................................................................
|
||||
|
||||
for (k=0;k<Nz;k++){
|
||||
for (j=0;j<Ny;j++){
|
||||
for (i=0;i<Nx;i++){
|
||||
int n = k*Nx*Ny+j*Nx+i;
|
||||
id[n] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
sum=0.f;
|
||||
pore_vol = 0.0;
|
||||
for ( k=0;k<Nz;k++){
|
||||
for ( j=0;j<Ny;j++){
|
||||
for ( i=0;i<Nx;i++){
|
||||
int n = k*Nx*Ny+j*Nx+i;
|
||||
if (Averages.SDs(n) > 0.0){
|
||||
id[n] = 2;
|
||||
}
|
||||
// compute the porosity (actual interface location used)
|
||||
if (Averages.SDs(n) > 0.0){
|
||||
sum++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
|
||||
sprintf(LocalRankFilename,"ID.%05i",rank);
|
||||
size_t readID;
|
||||
FILE *IDFILE = fopen(LocalRankFilename,"rb");
|
||||
if (IDFILE==NULL) ERROR("lbpm_permeability_simulator: Error opening file: ID.xxxxx");
|
||||
readID=fread(id,1,N,IDFILE);
|
||||
if (readID != size_t(N)) printf("lbpm_permeability_simulator: Error reading ID (rank=%i) \n",rank);
|
||||
fclose(IDFILE);
|
||||
|
||||
//.......................................................................
|
||||
// Compute the media porosity, assign phase labels and solid composition
|
||||
//.......................................................................
|
||||
sum_local=0.0;
|
||||
int Np=0; // number of local pore nodes
|
||||
//.......................................................................
|
||||
for (k=1;k<Nz-1;k++){
|
||||
for (j=1;j<Ny-1;j++){
|
||||
for (i=1;i<Nx-1;i++){
|
||||
n = k*Nx*Ny+j*Nx+i;
|
||||
if (id[n] > 0){
|
||||
sum_local+=1.0;
|
||||
Np++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm);
|
||||
porosity = sum*iVol_global;
|
||||
if (rank==0) printf("Media porosity = %f \n",porosity);
|
||||
|
||||
//.........................................................
|
||||
// don't perform computations at the eight corners
|
||||
id[0] = id[Nx-1] = id[(Ny-1)*Nx] = id[(Ny-1)*Nx + Nx-1] = 0;
|
||||
id[(Nz-1)*Nx*Ny] = id[(Nz-1)*Nx*Ny+Nx-1] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx + Nx-1] = 0;
|
||||
//.........................................................
|
||||
MPI_Barrier(comm);
|
||||
|
||||
// Initialize communication structures in averaging domain
|
||||
for (i=0; i<Mask.Nx*Mask.Ny*Mask.Nz; i++) Mask.id[i] = id[i];
|
||||
Mask.CommInit(comm);
|
||||
|
||||
//...........................................................................
|
||||
if (rank==0) printf ("Create ScaLBL_Communicator \n");
|
||||
// Create a communicator for the device
|
||||
|
||||
int Npad=(Np/16 + 2)*16;
|
||||
ScaLBL_Communicator ScaLBL_Comm(Mask);
|
||||
int *neighborList;
|
||||
IntArray Map(Nx,Ny,Nz);
|
||||
neighborList= new int[18*Npad];
|
||||
Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np);
|
||||
MPI_Barrier(comm);
|
||||
|
||||
// LBM variables
|
||||
if (rank==0) printf ("Allocating distributions \n");
|
||||
//......................device distributions.................................
|
||||
int dist_mem_size = Np*sizeof(double);
|
||||
int neighborSize=18*(Np*sizeof(int));
|
||||
|
||||
int *NeighborList;
|
||||
// double *f_even,*f_odd;
|
||||
double * dist;
|
||||
double * Velocity;
|
||||
double * Pressure;
|
||||
//...........................................................................
|
||||
ScaLBL_AllocateDeviceMemory((void **) &dist, 19*dist_mem_size);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np);
|
||||
ScaLBL_AllocateDeviceMemory((void **) &Pressure, 3*sizeof(double)*Np);
|
||||
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
|
||||
//...........................................................................
|
||||
|
||||
//...........................................................................
|
||||
if (rank==0) printf("Setting the distributions, size = %i\n", N);
|
||||
//...........................................................................
|
||||
|
||||
// Finalize setup for averaging domain
|
||||
//Averages.SetupCubes(Dm);
|
||||
Averages.UpdateSolid();
|
||||
// Initialize two phase flow variables (all wetting phase)
|
||||
for (k=0;k<Nz;k++){
|
||||
for (j=0;j<Ny;j++){
|
||||
for (i=0;i<Nx;i++){
|
||||
n=k*Nx*Ny+j*Nx+i;
|
||||
Averages.Phase(i,j,k) = -1.0;
|
||||
Averages.SDn(i,j,k) = Averages.Phase(i,j,k);
|
||||
Averages.Phase_tplus(i,j,k) = Averages.SDn(i,j,k);
|
||||
Averages.Phase_tminus(i,j,k) = Averages.SDn(i,j,k);
|
||||
Averages.DelPhi(i,j,k) = 0.0;
|
||||
Averages.Press(i,j,k) = 0.0;
|
||||
Averages.Vel_x(i,j,k) = 0.0;
|
||||
Averages.Vel_y(i,j,k) = 0.0;
|
||||
Averages.Vel_z(i,j,k) = 0.0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//.......................................................................
|
||||
|
||||
ScaLBL_D3Q19_Init(dist, Np);
|
||||
|
||||
int timestep = 0;
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
|
||||
|
||||
//.......create and start timer............
|
||||
double starttime,stoptime,cputime;
|
||||
MPI_Barrier(comm);
|
||||
starttime = MPI_Wtime();
|
||||
//.........................................
|
||||
|
||||
double D32,Fo,Re,velocity,err1D,mag_force,vel_prev;
|
||||
err = vel_prev = 1.0;
|
||||
if (rank==0) printf("Begin timesteps: error tolerance is %f \n", tol);
|
||||
//************ MAIN ITERATION LOOP ***************************************/
|
||||
while (timestep < timestepMax && err > tol ){
|
||||
|
||||
timestep++;
|
||||
ScaLBL_Comm.SendD3Q19AA(dist); //READ FROM NORMAL
|
||||
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
|
||||
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
|
||||
|
||||
timestep++;
|
||||
ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
|
||||
ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
|
||||
ScaLBL_D3Q19_AAeven_BGK(dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
|
||||
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
|
||||
//************************************************************************/
|
||||
|
||||
if (timestep%500 == 0){
|
||||
//...........................................................................
|
||||
// Copy the data for for the analysis timestep
|
||||
//...........................................................................
|
||||
// Copy the phase from the GPU -> CPU
|
||||
//...........................................................................
|
||||
ScaLBL_DeviceBarrier();
|
||||
ScaLBL_D3Q19_Pressure(dist,Pressure,Np);
|
||||
ScaLBL_D3Q19_Momentum(dist,Velocity,Np);
|
||||
|
||||
ScaLBL_Comm.RegularLayout(Map,Pressure,Averages.Press);
|
||||
ScaLBL_Comm.RegularLayout(Map,&Velocity[0],Averages.Vel_x);
|
||||
ScaLBL_Comm.RegularLayout(Map,&Velocity[Np],Averages.Vel_y);
|
||||
ScaLBL_Comm.RegularLayout(Map,&Velocity[2*Np],Averages.Vel_z);
|
||||
|
||||
// Way more work than necessary -- this is just to get the solid interfacial area!!
|
||||
Averages.Initialize();
|
||||
Averages.UpdateMeshValues();
|
||||
Averages.ComputeLocal();
|
||||
Averages.Reduce();
|
||||
|
||||
double vawx = Averages.vaw_global(0);
|
||||
double vawy = Averages.vaw_global(1);
|
||||
double vawz = Averages.vaw_global(2);
|
||||
if (rank==0){
|
||||
// ************* DIMENSIONLESS FORCHEIMER EQUATION *************************
|
||||
// Dye, A.L., McClure, J.E., Gray, W.G. and C.T. Miller
|
||||
// Description of Non-Darcy Flows in Porous Medium Systems
|
||||
// Physical Review E 87 (3), 033012
|
||||
// Fo := density*D32^3*(density*force) / (viscosity^2)
|
||||
// Re := density*D32*velocity / viscosity
|
||||
// Fo = a*Re + b*Re^2
|
||||
// *************************************************************************
|
||||
//viscosity = (tau-0.5)*0.333333333333333333;
|
||||
D32 = 6.0*(Dm.Volume-Averages.vol_w_global)/Averages.As_global;
|
||||
printf("Sauter Mean Diameter = %f \n",D32);
|
||||
mag_force = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
|
||||
Fo = D32*D32*D32*mag_force/viscosity/viscosity;
|
||||
// .... 1-D flow should be aligned with force ...
|
||||
velocity = vawx*Fx/mag_force + vawy*Fy/mag_force + vawz*Fz/mag_force;
|
||||
err1D = fabs(velocity-sqrt(vawx*vawx+vawy*vawy+vawz*vawz))/velocity;
|
||||
//.......... Computation of the Reynolds number Re ..............
|
||||
Re = D32*velocity/viscosity;
|
||||
printf("Force: %.5g,%.5g,%.5g \n",Fx,Fy,Fz);
|
||||
printf("Velocity: %.5g,%.5g,%.5g \n",vawx,vawy,vawz);
|
||||
printf("Relative error for 1D representation: %.5g \n",err1D);
|
||||
printf("Dimensionless force: %5g \n", Fo);
|
||||
printf("Reynolds number: %.5g \n", Re);
|
||||
printf("Dimensionless Permeability (k/D^2): %.5g \n", Re/Fo);
|
||||
}
|
||||
}
|
||||
}
|
||||
//************************************************************************/
|
||||
// Initialize compute device
|
||||
int device=ScaLBL_SetDevice(rank);
|
||||
NULL_USE( device );
|
||||
ScaLBL_DeviceBarrier();
|
||||
MPI_Barrier(comm);
|
||||
stoptime = MPI_Wtime();
|
||||
if (rank==0) printf("-------------------------------------------------------------------\n");
|
||||
// Compute the walltime per timestep
|
||||
cputime = (stoptime - starttime)/timestep;
|
||||
// Performance obtained from each node
|
||||
double MLUPS = double(Np)/cputime/1000000;
|
||||
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
if (rank==0) printf("CPU time = %f \n", cputime);
|
||||
if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
|
||||
MLUPS *= nprocs;
|
||||
if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
|
||||
if (rank==0) printf("********************************************************\n");
|
||||
|
||||
NULL_USE(RESTART_INTERVAL);
|
||||
comm.barrier();
|
||||
|
||||
ScaLBL_BGKModel BGK(rank,nprocs,comm);
|
||||
auto filename = argv[1];
|
||||
BGK.ReadParams(filename);
|
||||
BGK.SetDomain(); // this reads in the domain
|
||||
BGK.ReadInput();
|
||||
BGK.Create(); // creating the model will create data structure to match the pore structure and allocate variables
|
||||
BGK.Initialize(); // initializing the model will set initial conditions for variables
|
||||
BGK.Run();
|
||||
BGK.VelocityField();
|
||||
cout << flush;
|
||||
}
|
||||
// ****************************************************
|
||||
comm.barrier();
|
||||
Utilities::shutdown();
|
||||
// ****************************************************
|
||||
Utilities::shutdown();
|
||||
}
|
||||
|
||||
156
tests/lbpm_cell_simulator.cpp
Normal file
156
tests/lbpm_cell_simulator.cpp
Normal file
@@ -0,0 +1,156 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <iostream>
|
||||
#include <exception>
|
||||
#include <stdexcept>
|
||||
#include <fstream>
|
||||
#include <math.h>
|
||||
|
||||
#include "models/IonModel.h"
|
||||
#include "models/StokesModel.h"
|
||||
#include "models/PoissonSolver.h"
|
||||
#include "models/MultiPhysController.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "analysis/ElectroChemistry.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
//***************************************************************************
|
||||
// Test lattice-Boltzmann Ion Model coupled with Poisson equation
|
||||
//***************************************************************************
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// Initialize MPI and error handlers
|
||||
Utilities::startup( argc, argv );
|
||||
Utilities::MPI comm( MPI_COMM_WORLD );
|
||||
int rank = comm.getRank();
|
||||
int nprocs = comm.getSize();
|
||||
|
||||
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
if (rank == 0){
|
||||
printf("********************************************************\n");
|
||||
printf("Running LBPM electrokinetic single-fluid solver \n");
|
||||
printf("********************************************************\n");
|
||||
}
|
||||
// Initialize compute device
|
||||
int device=ScaLBL_SetDevice(rank);
|
||||
NULL_USE( device );
|
||||
ScaLBL_DeviceBarrier();
|
||||
comm.barrier();
|
||||
|
||||
PROFILE_ENABLE(1);
|
||||
//PROFILE_ENABLE_TRACE();
|
||||
//PROFILE_ENABLE_MEMORY();
|
||||
PROFILE_SYNCHRONIZE();
|
||||
PROFILE_START("Main");
|
||||
Utilities::setErrorHandlers();
|
||||
|
||||
auto filename = argv[1];
|
||||
ScaLBL_StokesModel StokesModel(rank,nprocs,comm);
|
||||
ScaLBL_IonModel IonModel(rank,nprocs,comm);
|
||||
ScaLBL_Poisson PoissonSolver(rank,nprocs,comm);
|
||||
ScaLBL_Multiphys_Controller Study(rank,nprocs,comm);//multiphysics controller coordinating multi-model coupling
|
||||
|
||||
bool SlipBC = false;
|
||||
|
||||
// Load controller information
|
||||
Study.ReadParams(filename);
|
||||
|
||||
// Load user input database files for Navier-Stokes and Ion solvers
|
||||
StokesModel.ReadParams(filename);
|
||||
|
||||
// Setup other model specific structures
|
||||
StokesModel.SetDomain();
|
||||
StokesModel.ReadInput();
|
||||
StokesModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables
|
||||
comm.barrier();
|
||||
if (rank == 0) printf("Stokes model setup complete\n");
|
||||
|
||||
IonModel.ReadParams(filename);
|
||||
IonModel.SetDomain();
|
||||
IonModel.ReadInput();
|
||||
IonModel.Create();
|
||||
IonModel.SetMembrane();
|
||||
comm.barrier();
|
||||
if (rank == 0) printf("Ion model setup complete\n");
|
||||
fflush(stdout);
|
||||
|
||||
// Create analysis object
|
||||
ElectroChemistryAnalyzer Analysis(IonModel.Dm);
|
||||
|
||||
// Get internal iteration number
|
||||
StokesModel.timestepMax = Study.getStokesNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
|
||||
StokesModel.Initialize(); // initializing the model will set initial conditions for variables
|
||||
comm.barrier();
|
||||
if (rank == 0) printf("Stokes model initialized \n");
|
||||
fflush(stdout);
|
||||
|
||||
IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
|
||||
IonModel.Initialize();
|
||||
comm.barrier();
|
||||
if (rank == 0) printf("Ion model initialized \n");
|
||||
// Get maximal time converting factor based on Sotkes and Ion solvers
|
||||
Study.getTimeConvMax_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
|
||||
|
||||
// Initialize LB-Poisson model
|
||||
PoissonSolver.ReadParams(filename);
|
||||
PoissonSolver.SetDomain();
|
||||
PoissonSolver.ReadInput();
|
||||
PoissonSolver.Create();
|
||||
comm.barrier();
|
||||
if (rank == 0) printf("Poisson solver created \n");
|
||||
fflush(stdout);
|
||||
PoissonSolver.Initialize(Study.time_conv_max);
|
||||
comm.barrier();
|
||||
if (rank == 0) printf("Poisson solver initialized \n");
|
||||
fflush(stdout);
|
||||
|
||||
int timestep=0;
|
||||
while (timestep < Study.timestepMax){
|
||||
|
||||
timestep++;
|
||||
PoissonSolver.Run(IonModel.ChargeDensity,SlipBC,timestep);//solve Poisson equtaion to get steady-state electrical potental
|
||||
comm.barrier();
|
||||
//if (rank == 0) printf(" Poisson step %i \n",timestep);
|
||||
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
|
||||
//fflush(stdout);
|
||||
|
||||
IonModel.RunMembrane(StokesModel.Velocity,PoissonSolver.ElectricField,PoissonSolver.Psi); //solve for ion transport with membrane
|
||||
comm.barrier();
|
||||
//if (rank == 0) printf(" Membrane step %i \n",timestep);
|
||||
//fflush(stdout);
|
||||
|
||||
timestep++;//AA operations
|
||||
|
||||
if (timestep%Study.analysis_interval==0){
|
||||
Analysis.Basic(IonModel,PoissonSolver,StokesModel,timestep);
|
||||
}
|
||||
if (timestep%Study.visualization_interval==0){
|
||||
Analysis.WriteVis(IonModel,PoissonSolver,StokesModel,Study.db,timestep);
|
||||
// PoissonSolver.getElectricPotential(timestep);
|
||||
//PoissonSolver.getElectricField(timestep);
|
||||
//IonModel.getIonConcentration(timestep);
|
||||
//StokesModel.getVelocity(timestep);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (rank==0) printf("Save simulation raw data at maximum timestep\n");
|
||||
Analysis.WriteVis(IonModel,PoissonSolver,StokesModel,Study.db,timestep);
|
||||
|
||||
if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n");
|
||||
if (rank==0) printf("*************************************************************\n");
|
||||
|
||||
PROFILE_STOP("Main");
|
||||
PROFILE_SAVE("lbpm_electrokinetic_SingleFluid_simulator",1);
|
||||
// ****************************************************
|
||||
|
||||
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
Utilities::shutdown();
|
||||
}
|
||||
|
||||
|
||||
@@ -23,7 +23,7 @@ int main( int argc, char **argv )
|
||||
{
|
||||
|
||||
// Initialize
|
||||
Utilities::startup( argc, argv );
|
||||
Utilities::startup( argc, argv, true );
|
||||
|
||||
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
@@ -198,7 +198,7 @@ int main( int argc, char **argv )
|
||||
|
||||
|
||||
} // Limit scope so variables that contain communicators will free before MPI_Finialize
|
||||
|
||||
cout << flush;
|
||||
Utilities::shutdown();
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@ using namespace std;
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// Initialize MPI
|
||||
Utilities::startup( argc, argv );
|
||||
Utilities::startup( argc, argv, false );
|
||||
Utilities::MPI comm( MPI_COMM_WORLD );
|
||||
int rank = comm.getRank();
|
||||
int nprocs = comm.getSize();
|
||||
@@ -49,6 +49,7 @@ int main(int argc, char **argv)
|
||||
MRT.Initialize(); // initializing the model will set initial conditions for variables
|
||||
MRT.Run();
|
||||
MRT.VelocityField();
|
||||
cout << flush;
|
||||
}
|
||||
Utilities::shutdown();
|
||||
}
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "common/UnitTest.h"
|
||||
#include "common/Utilities.h"
|
||||
#include "common/Utilities.hpp"
|
||||
#include "common/ScaLBL.h"
|
||||
#include "ProfilerApp.h"
|
||||
|
||||
|
||||
@@ -20,7 +21,6 @@
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
|
||||
#undef MPI_CLASS
|
||||
#define MPI_CLASS Utilities::MPI
|
||||
#define MPI_ASSERT ASSERT
|
||||
@@ -1195,6 +1195,144 @@ void testCommDup( UnitTest *ut )
|
||||
#endif
|
||||
}
|
||||
|
||||
class gpuWrapper{
|
||||
public:
|
||||
gpuWrapper(MPI_CLASS MPI_COMM, int MSG_SIZE);
|
||||
~gpuWrapper();
|
||||
void Send(double *values);
|
||||
void Recv(double *values);
|
||||
double *sendbuf, *recvbuf;
|
||||
private:
|
||||
MPI_Request req1[1],req2[1];
|
||||
MPI_CLASS comm;
|
||||
int sendCount;
|
||||
int recvCount;
|
||||
int rank, rank_x, rank_X, nprocs;
|
||||
int sendtag, recvtag;
|
||||
};
|
||||
|
||||
gpuWrapper::gpuWrapper(MPI_CLASS MPI_COMM, int MSG_SIZE){
|
||||
comm = MPI_COMM.dup();
|
||||
rank = comm.getRank();
|
||||
nprocs = comm.getSize();
|
||||
sendCount=MSG_SIZE;
|
||||
recvCount=MSG_SIZE;
|
||||
ScaLBL_AllocateZeroCopy((void **) &sendbuf, sendCount*sizeof(double)); // Allocate device memory
|
||||
ScaLBL_AllocateZeroCopy((void **) &recvbuf, sendCount*sizeof(double)); // Allocate device memory
|
||||
rank_X = rank+1;
|
||||
rank_x = rank-1;
|
||||
if (rank_x < 0) rank_x = nprocs-1;
|
||||
if (!(rank_X < nprocs)) rank_X = 0;
|
||||
}
|
||||
|
||||
gpuWrapper::~gpuWrapper(){
|
||||
ScaLBL_FreeDeviceMemory(sendbuf);
|
||||
ScaLBL_FreeDeviceMemory(recvbuf);
|
||||
}
|
||||
|
||||
void gpuWrapper::Send(double *values){
|
||||
sendtag = recvtag = 130;
|
||||
ScaLBL_CopyToDevice(sendbuf,values,sendCount*sizeof(double));
|
||||
req1[0] = comm.Isend(sendbuf,sendCount,rank_x,sendtag+0);
|
||||
req2[0] = comm.Irecv(recvbuf,recvCount,rank_X,recvtag+0);
|
||||
}
|
||||
|
||||
void gpuWrapper::Recv(double *values){
|
||||
comm.waitAll(1,req1);
|
||||
comm.waitAll(1,req2);
|
||||
ScaLBL_DeviceBarrier();
|
||||
ScaLBL_CopyToHost(values,recvbuf,recvCount*sizeof(double));
|
||||
}
|
||||
|
||||
// Test GPU aware MPI
|
||||
void test_GPU_aware( UnitTest *ut )
|
||||
{
|
||||
constexpr size_t N = 1024*1024;
|
||||
constexpr size_t N_msg = 64;
|
||||
bool test = true;
|
||||
// Get the comm to use
|
||||
MPI_CLASS comm( MPI_COMM_WORLD );
|
||||
int rank = comm.getRank();
|
||||
int size = comm.getSize();
|
||||
try {
|
||||
// Initialize the device
|
||||
int device = ScaLBL_SetDevice(rank);
|
||||
NULL_USE( device );
|
||||
// create wrapper for communications
|
||||
gpuWrapper gpuComm(comm, N);
|
||||
// Allocate and initialize the buffers
|
||||
size_t bytes = N*sizeof(double);
|
||||
double *device_send[N_msg] = { nullptr };
|
||||
double *device_recv[N_msg] = { nullptr };
|
||||
double *host_send[N_msg] = { nullptr };
|
||||
double *host_recv[N_msg] = { nullptr };
|
||||
for ( size_t k=0; k<N_msg; k++ ) {
|
||||
ScaLBL_AllocateDeviceMemory((void**)&device_send[k],bytes);
|
||||
ScaLBL_AllocateDeviceMemory((void**)&device_recv[k],bytes);
|
||||
host_send[k] = new double[N];
|
||||
host_recv[k] = new double[N];
|
||||
// Initialize the data
|
||||
for ( size_t i=0; i<N; i++ ) {
|
||||
host_send[k][i] = 1000 * k * rank + i;
|
||||
host_recv[k][i] = 0;
|
||||
}
|
||||
ScaLBL_CopyToDevice(device_send[k],host_send[k],bytes);
|
||||
ScaLBL_CopyToDevice(device_recv[k],host_recv[k],bytes);
|
||||
}
|
||||
ScaLBL_DeviceBarrier();
|
||||
// Send/recieve the data
|
||||
int rank_send = ( rank + 1 ) % size;
|
||||
int rank_recv = ( rank - 1 + size ) % size;
|
||||
MPI_Request req1[N_msg];
|
||||
MPI_Request req2[N_msg];
|
||||
for ( size_t k=0; k<N_msg; k++ ) {
|
||||
req1[k] = comm.Isend( device_send[k], N, rank_send, k );
|
||||
req2[k] = comm.Irecv( device_recv[k], N, rank_recv, k );
|
||||
}
|
||||
comm.waitAll(N_msg,req1);
|
||||
comm.waitAll(N_msg,req2);
|
||||
// Copy
|
||||
for ( size_t k=0; k<N_msg; k++ ) {
|
||||
ScaLBL_CopyToHost(host_send[k],device_send[k],bytes);
|
||||
ScaLBL_CopyToHost(host_recv[k],device_recv[k],bytes);
|
||||
}
|
||||
ScaLBL_DeviceBarrier();
|
||||
// Check the data
|
||||
for ( size_t k=0; k<N_msg; k++ ) {
|
||||
for ( size_t i=0; i<N; i++ )
|
||||
test = test && host_recv[k][i] == 1000 * k * rank_recv + i;
|
||||
}
|
||||
// Check the gpu wrapper communications the same way
|
||||
for ( size_t k=0; k<N_msg; k++ ) {
|
||||
gpuComm.Send(host_send[k]);
|
||||
gpuComm.Recv(host_recv[k]);
|
||||
}
|
||||
// Check the data
|
||||
for ( size_t k=0; k<N_msg; k++ ) {
|
||||
for ( size_t i=0; i<N; i++ )
|
||||
test = test && host_recv[k][i] == 1000 * k * rank_recv + i;
|
||||
}
|
||||
|
||||
// Free buffers
|
||||
for ( size_t k=0; k<N_msg; k++ ) {
|
||||
ScaLBL_FreeDeviceMemory(device_send[k]);
|
||||
ScaLBL_FreeDeviceMemory(device_recv[k]);
|
||||
delete [] host_send[k];
|
||||
delete [] host_recv[k];
|
||||
}
|
||||
} catch ( ... ) {
|
||||
test = false;
|
||||
}
|
||||
comm.barrier();
|
||||
if ( test ) {
|
||||
std::cout << "MPI is GPU aware" << std::endl;
|
||||
ut->passes("GPU aware MPI" );
|
||||
} else {
|
||||
std::cout << "MPI is NOT GPU aware" << std::endl;
|
||||
ut->failure("GPU aware MPI" );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This test will test the MPI class
|
||||
int main( int argc, char *argv[] )
|
||||
@@ -1513,6 +1651,9 @@ int main( int argc, char *argv[] )
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// Test GPU aware MPI
|
||||
test_GPU_aware( &ut );
|
||||
|
||||
} // Limit the scope so objects are destroyed
|
||||
|
||||
// Finished testing, report the results
|
||||
|
||||
Reference in New Issue
Block a user