merge the latest membrane into test_poisson

This commit is contained in:
Zhe Rex Li 2022-04-26 11:16:44 +10:00
commit 17f80b4637
91 changed files with 9709 additions and 3561 deletions

View File

@ -124,21 +124,7 @@ IF ( USE_CUDA )
ADD_DEFINITIONS( -DUSE_CUDA )
ENABLE_LANGUAGE( CUDA )
ELSEIF ( USE_HIP )
IF ( NOT DEFINED HIP_PATH )
IF ( NOT DEFINED ENV{HIP_PATH} )
SET( HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed" )
ELSE()
SET( HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed" )
ENDIF()
ENDIF()
SET( CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH} )
FIND_PACKAGE( HIP REQUIRED )
FIND_PACKAGE( CUDA QUIET )
MESSAGE( "HIP Found")
MESSAGE( " HIP version: ${HIP_VERSION_STRING}")
MESSAGE( " HIP platform: ${HIP_PLATFORM}")
MESSAGE( " HIP Include Path: ${HIP_INCLUDE_DIRS}")
MESSAGE( " HIP Libraries: ${HIP_LIBRARIES}")
ENABLE_LANGUAGE( HIP )
ADD_DEFINITIONS( -DUSE_HIP )
ENDIF()
@ -180,8 +166,7 @@ IF ( NOT ONLY_BUILD_DOCS )
IF ( USE_CUDA )
ADD_PACKAGE_SUBDIRECTORY( cuda )
ELSEIF ( USE_HIP )
ADD_SUBDIRECTORY( hip )
SET( LBPM_LIBRARIES lbpm-hip lbpm-wia )
ADD_PACKAGE_SUBDIRECTORY( hip )
ELSE()
ADD_PACKAGE_SUBDIRECTORY( cpu )
ENDIF()

View File

@ -1,5 +1,4 @@
#include "IO/PackData.h"
#include <string>

View File

@ -5,7 +5,7 @@
#include <map>
#include <set>
#include <vector>
#include <cstddef>
//! Template function to return the buffer size required to pack a class
template<class TYPE>

View File

@ -1263,7 +1263,7 @@ static int backtrace_thread(
if ( tid == pthread_self() ) {
count = ::backtrace( buffer, size );
} else {
// Note: this will get the backtrace, but terminates the thread in the process!!!
// Send a signal to the desired thread to get the call stack
StackTrace_mutex.lock();
struct sigaction sa;
sigfillset( &sa.sa_mask );

View File

@ -193,6 +193,9 @@ MACRO( FIND_FILES )
# Find the CUDA sources
SET( T_CUDASOURCES "" )
FILE( GLOB T_CUDASOURCES "*.cu" )
# Find the HIP sources
SET( T_HIPSOURCES "" )
FILE( GLOB T_HIPSOURCES "*.hip" )
# Find the C sources
SET( T_CSOURCES "" )
FILE( GLOB T_CSOURCES "*.c" )
@ -212,10 +215,11 @@ MACRO( FIND_FILES )
SET( HEADERS ${HEADERS} ${T_HEADERS} )
SET( CXXSOURCES ${CXXSOURCES} ${T_CXXSOURCES} )
SET( CUDASOURCES ${CUDASOURCES} ${T_CUDASOURCES} )
SET( HIPSOURCES ${HIPSOURCES} ${T_HIPSOURCES} )
SET( CSOURCES ${CSOURCES} ${T_CSOURCES} )
SET( FSOURCES ${FSOURCES} ${T_FSOURCES} )
SET( M4FSOURCES ${M4FSOURCES} ${T_M4FSOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${T_M4FSOURCES} ${CUDASOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${T_M4FSOURCES} ${CUDASOURCES} ${HIPSOURCES} )
ENDMACRO()
@ -227,6 +231,9 @@ MACRO( FIND_FILES_PATH IN_PATH )
# Find the CUDA sources
SET( T_CUDASOURCES "" )
FILE( GLOB T_CUDASOURCES "${IN_PATH}/*.cu" )
# Find the HIP sources
SET( T_HIPSOURCES "" )
FILE( GLOB T_HIPSOURCES "${IN_PATH}/*.hip" )
# Find the C sources
SET( T_CSOURCES "" )
FILE( GLOB T_CSOURCES "${IN_PATH}/*.c" )
@ -246,9 +253,10 @@ MACRO( FIND_FILES_PATH IN_PATH )
SET( HEADERS ${HEADERS} ${T_HEADERS} )
SET( CXXSOURCES ${CXXSOURCES} ${T_CXXSOURCES} )
SET( CUDASOURCES ${CUDASOURCES} ${T_CUDASOURCES} )
SET( HIPSOURCES ${HIPSOURCES} ${T_HIPSOURCES} )
SET( CSOURCES ${CSOURCES} ${T_CSOURCES} )
SET( FSOURCES ${FSOURCES} ${T_FSOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${CUDASOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${CUDASOURCES} ${HIPSOURCES} )
ENDMACRO()

View File

@ -20,10 +20,12 @@
#include "common/ArraySize.h"
#include <array>
#include <cstdint>
#include <functional>
#include <initializer_list>
#include <iostream>
#include <memory>
#include <stdint.h>
#include <string>
#include <vector>

View File

@ -4,11 +4,13 @@
#include "common/Utilities.h"
#include <array>
#include <cstdint>
#include <cmath>
#include <complex>
#include <cstdlib>
#include <cstring>
#include <initializer_list>
#include <stdexcept>
#include <vector>
#if defined(__CUDA_ARCH__)

View File

@ -208,72 +208,68 @@ inline void CommunicateSendRecvCounts(
}
//***************************************************************************************
inline void CommunicateRecvLists(
const Utilities::MPI &comm, int sendtag, int recvtag, int *sendList_x,
int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y,
int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY,
int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ,
int *sendList_Xz, int *sendList_yz, int *sendList_YZ, int *sendList_yZ,
int *sendList_Yz, int sendCount_x, int sendCount_y, int sendCount_z,
int sendCount_X, int sendCount_Y, int sendCount_Z, int sendCount_xy,
int sendCount_XY, int sendCount_xY, int sendCount_Xy, int sendCount_xz,
int sendCount_XZ, int sendCount_xZ, int sendCount_Xz, int sendCount_yz,
int sendCount_YZ, int sendCount_yZ, int sendCount_Yz, int *recvList_x,
int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y,
int *recvList_Z, int *recvList_xy, int *recvList_XY, int *recvList_xY,
int *recvList_Xy, int *recvList_xz, int *recvList_XZ, int *recvList_xZ,
int *recvList_Xz, int *recvList_yz, int *recvList_YZ, int *recvList_yZ,
int *recvList_Yz, int recvCount_x, int recvCount_y, int recvCount_z,
int recvCount_X, int recvCount_Y, int recvCount_Z, int recvCount_xy,
int recvCount_XY, int recvCount_xY, int recvCount_Xy, int recvCount_xz,
int recvCount_XZ, int recvCount_xZ, int recvCount_Xz, int recvCount_yz,
int recvCount_YZ, int recvCount_yZ, int recvCount_Yz, int rank_x,
int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy,
int rank_XY, int rank_xY, int rank_Xy, int rank_xz, int rank_XZ,
int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ,
int rank_Yz) {
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(sendList_x, sendCount_x, rank_x, sendtag);
req2[0] = comm.Irecv(recvList_X, recvCount_X, rank_X, recvtag);
req1[1] = comm.Isend(sendList_X, sendCount_X, rank_X, sendtag);
req2[1] = comm.Irecv(recvList_x, recvCount_x, rank_x, recvtag);
req1[2] = comm.Isend(sendList_y, sendCount_y, rank_y, sendtag);
req2[2] = comm.Irecv(recvList_Y, recvCount_Y, rank_Y, recvtag);
req1[3] = comm.Isend(sendList_Y, sendCount_Y, rank_Y, sendtag);
req2[3] = comm.Irecv(recvList_y, recvCount_y, rank_y, recvtag);
req1[4] = comm.Isend(sendList_z, sendCount_z, rank_z, sendtag);
req2[4] = comm.Irecv(recvList_Z, recvCount_Z, rank_Z, recvtag);
req1[5] = comm.Isend(sendList_Z, sendCount_Z, rank_Z, sendtag);
req2[5] = comm.Irecv(recvList_z, recvCount_z, rank_z, recvtag);
inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int recvtag,
int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z,
int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy,
int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz,
int *sendList_yz, int *sendList_YZ, int *sendList_yZ, int *sendList_Yz,
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z,
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
int *recvList_x, int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y, int *recvList_Z,
int *recvList_xy, int *recvList_XY, int *recvList_xY, int *recvList_Xy,
int *recvList_xz, int *recvList_XZ, int *recvList_xZ, int *recvList_Xz,
int *recvList_yz, int *recvList_YZ, int *recvList_yZ, int *recvList_Yz,
int recvCount_x, int recvCount_y, int recvCount_z, int recvCount_X, int recvCount_Y, int recvCount_Z,
int recvCount_xy, int recvCount_XY, int recvCount_xY, int recvCount_Xy,
int recvCount_xz, int recvCount_XZ, int recvCount_xZ, int recvCount_Xz,
int recvCount_yz, int recvCount_YZ, int recvCount_yZ, int recvCount_Yz,
int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY,
int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz)
{
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag+0);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag+0);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag+1);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag+1);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag+2);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag+2);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag+3);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag+3);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag+4);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag+4);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag+5);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag+5);
req1[6] = comm.Isend(sendList_xy, sendCount_xy, rank_xy, sendtag);
req2[6] = comm.Irecv(recvList_XY, recvCount_XY, rank_XY, recvtag);
req1[7] = comm.Isend(sendList_XY, sendCount_XY, rank_XY, sendtag);
req2[7] = comm.Irecv(recvList_xy, recvCount_xy, rank_xy, recvtag);
req1[8] = comm.Isend(sendList_Xy, sendCount_Xy, rank_Xy, sendtag);
req2[8] = comm.Irecv(recvList_xY, recvCount_xY, rank_xY, recvtag);
req1[9] = comm.Isend(sendList_xY, sendCount_xY, rank_xY, sendtag);
req2[9] = comm.Irecv(recvList_Xy, recvCount_Xy, rank_Xy, recvtag);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag+6);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag+6);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag+7);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag+7);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag+8);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag+8);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag+9);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag+9);
req1[10] = comm.Isend(sendList_xz, sendCount_xz, rank_xz, sendtag);
req2[10] = comm.Irecv(recvList_XZ, recvCount_XZ, rank_XZ, recvtag);
req1[11] = comm.Isend(sendList_XZ, sendCount_XZ, rank_XZ, sendtag);
req2[11] = comm.Irecv(recvList_xz, recvCount_xz, rank_xz, recvtag);
req1[12] = comm.Isend(sendList_Xz, sendCount_Xz, rank_Xz, sendtag);
req2[12] = comm.Irecv(recvList_xZ, recvCount_xZ, rank_xZ, recvtag);
req1[13] = comm.Isend(sendList_xZ, sendCount_xZ, rank_xZ, sendtag);
req2[13] = comm.Irecv(recvList_Xz, recvCount_Xz, rank_Xz, recvtag);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag+10);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag+10);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag+11);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag+11);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz,sendtag+12);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag+12);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag+13);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag+13);
req1[14] = comm.Isend(sendList_yz, sendCount_yz, rank_yz, sendtag);
req2[14] = comm.Irecv(recvList_YZ, recvCount_YZ, rank_YZ, recvtag);
req1[15] = comm.Isend(sendList_YZ, sendCount_YZ, rank_YZ, sendtag);
req2[15] = comm.Irecv(recvList_yz, recvCount_yz, rank_yz, recvtag);
req1[16] = comm.Isend(sendList_Yz, sendCount_Yz, rank_Yz, sendtag);
req2[16] = comm.Irecv(recvList_yZ, recvCount_yZ, rank_yZ, recvtag);
req1[17] = comm.Isend(sendList_yZ, sendCount_yZ, rank_yZ, sendtag);
req2[17] = comm.Irecv(recvList_Yz, recvCount_Yz, rank_Yz, recvtag);
comm.waitAll(18, req1);
comm.waitAll(18, req2);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag+14);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag+14);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag+15);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag+15);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag+16);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag+16);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag+17);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag+17);
comm.waitAll( 18, req1 );
comm.waitAll( 18, req2 );
}
//***************************************************************************************

View File

@ -1543,7 +1543,7 @@ void Domain::ReadFromFile(const std::string &Filename,
} else {
// Recieve the subdomain from rank = 0
//printf("Ready to recieve data %i at process %i \n", N,rank);
Comm.recv(id.data(), N, 0, 15);
Comm.recv(UserData, N, 0, 15);
}
Comm.barrier();
}

View File

@ -93,12 +93,11 @@ template<> long double genRand<long double>()
* axpy *
********************************************************/
template <>
void call_axpy<float>(size_t N, const float alpha, const float *x, float *y) {
void call_axpy<float>(size_t, const float, const float*, float*) {
ERROR("Not finished");
}
template <>
void call_axpy<double>(size_t N, const double alpha, const double *x,
double *y) {
void call_axpy<double>(size_t, const double, const double*, double*) {
ERROR("Not finished");
}
@ -106,22 +105,22 @@ void call_axpy<double>(size_t N, const double alpha, const double *x,
* Multiply two arrays *
********************************************************/
template <>
void call_gemv<double>(size_t M, size_t N, double alpha, double beta,
const double *A, const double *x, double *y) {
void call_gemv<double>(size_t, size_t, double, double,
const double*, const double*, double*) {
ERROR("Not finished");
}
template <>
void call_gemv<float>(size_t M, size_t N, float alpha, float beta,
const float *A, const float *x, float *y) {
void call_gemv<float>(size_t, size_t, float, float,
const float*, const float*, float*) {
ERROR("Not finished");
}
template <>
void call_gemm<double>(size_t M, size_t N, size_t K, double alpha, double beta,
const double *A, const double *B, double *C) {
void call_gemm<double>(size_t, size_t, size_t, double, double,
const double*, const double*, double*) {
ERROR("Not finished");
}
template <>
void call_gemm<float>(size_t M, size_t N, size_t K, float alpha, float beta,
const float *A, const float *B, float *C) {
void call_gemm<float>(size_t, size_t, size_t, float, float,
const float*, const float*, float*) {
ERROR("Not finished");
}

View File

@ -297,10 +297,10 @@ TYPE FunctionTable::sum(const Array<TYPE, FUN, ALLOC> &A) {
}
template <class TYPE>
inline void FunctionTable::gemmWrapper(char TRANSA, char TRANSB, int M, int N,
int K, TYPE alpha, const TYPE *A,
int LDA, const TYPE *B, int LDB,
TYPE beta, TYPE *C, int LDC) {
inline void FunctionTable::gemmWrapper(char, char, int, int,
int, TYPE, const TYPE*,
int, const TYPE*, int,
TYPE, TYPE*, int) {
ERROR("Not finished");
}

File diff suppressed because it is too large Load Diff

View File

@ -1115,15 +1115,14 @@ bool MPI_CLASS::anyReduce(const bool value) const {
template <>
void MPI_CLASS::call_sumReduce<unsigned char>(const unsigned char *send,
unsigned char *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<unsigned char>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x, int n) const {
PROFILE_START("sumReduce2<unsigned char>", profile_level);
auto send = x;
auto recv = new unsigned char[n];
@ -1136,13 +1135,13 @@ void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
// char
template <>
void MPI_CLASS::call_sumReduce<char>(const char *send, char *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<char>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<char>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<char>(char *x, int n) const {
PROFILE_START("sumReduce2<char>", profile_level);
auto send = x;
auto recv = new char[n];
@ -1155,16 +1154,14 @@ template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
// unsigned int
template <>
void MPI_CLASS::call_sumReduce<unsigned int>(const unsigned int *send,
unsigned int *recv,
const int n) const {
unsigned int *recv, int n) const {
PROFILE_START("sumReduce1<unsigned int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x, int n) const {
PROFILE_START("sumReduce2<unsigned int>", profile_level);
auto send = x;
auto recv = new unsigned int[n];
@ -1176,14 +1173,13 @@ void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
}
// int
template <>
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv,
const int n) const {
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv, int n) const {
PROFILE_START("sumReduce1<int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_INT, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<int>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<int>(int *x, int n) const {
PROFILE_START("sumReduce2<int>", profile_level);
auto send = x;
auto recv = new int[n];
@ -1196,14 +1192,13 @@ template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
// long int
template <>
void MPI_CLASS::call_sumReduce<long int>(const long int *send, long int *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<long int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_LONG, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<long int>(long int *x, int n) const {
PROFILE_START("sumReduce2<long int>", profile_level);
auto send = x;
auto recv = new long int[n];
@ -1217,15 +1212,14 @@ void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
template <>
void MPI_CLASS::call_sumReduce<unsigned long>(const unsigned long *send,
unsigned long *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<unsigned long>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned long>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x, int n) const {
PROFILE_START("sumReduce2<unsigned long>", profile_level);
auto send = x;
auto recv = new unsigned long int[n];
@ -1239,15 +1233,14 @@ void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
#ifdef USE_WINDOWS
template <>
void MPI_CLASS::call_sumReduce<size_t>(const size_t *send, size_t *recv,
const int n) const {
int n) const {
MPI_ASSERT(MPI_SIZE_T != 0);
PROFILE_START("sumReduce1<size_t>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<size_t>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<size_t>(size_t *x, int n) const {
MPI_ASSERT(MPI_SIZE_T != 0);
PROFILE_START("sumReduce2<size_t>", profile_level);
auto send = x;
@ -1263,13 +1256,13 @@ void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
// float
template <>
void MPI_CLASS::call_sumReduce<float>(const float *send, float *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<float>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_FLOAT, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<float>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<float>(float *x, int n) const {
PROFILE_START("sumReduce2<float>", profile_level);
auto send = x;
auto recv = new float[n];
@ -1282,14 +1275,13 @@ template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
// double
template <>
void MPI_CLASS::call_sumReduce<double>(const double *send, double *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<double>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<double>(double *x, int n) const {
PROFILE_START("sumReduce2<double>", profile_level);
auto send = x;
auto recv = new double[n];
@ -1302,7 +1294,7 @@ void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
// std::complex<double>
template <>
void MPI_CLASS::call_sumReduce<std::complex<double>>(
const std::complex<double> *x, std::complex<double> *y, const int n) const {
const std::complex<double> *x, std::complex<double> *y, int n) const {
PROFILE_START("sumReduce1<complex double>", profile_level);
auto send = new double[2 * n];
auto recv = new double[2 * n];
@ -1320,7 +1312,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(
}
template <>
void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
const int n) const {
int n) const {
PROFILE_START("sumReduce2<complex double>", profile_level);
auto send = new double[2 * n];
auto recv = new double[2 * n];
@ -1345,7 +1337,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
// unsigned char
template <>
void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
unsigned char *recv, const int n,
unsigned char *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned char>", profile_level);
@ -1363,7 +1355,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned char>", profile_level);
@ -1386,7 +1378,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
}
// char
template <>
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<char>", profile_level);
@ -1404,7 +1396,7 @@ void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
}
}
template <>
void MPI_CLASS::call_minReduce<char>(char *x, const int n,
void MPI_CLASS::call_minReduce<char>(char *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<char>", profile_level);
@ -1428,7 +1420,7 @@ void MPI_CLASS::call_minReduce<char>(char *x, const int n,
// unsigned int
template <>
void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
unsigned int *recv, const int n,
unsigned int *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned int>", profile_level);
@ -1446,7 +1438,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned int>", profile_level);
@ -1469,7 +1461,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
}
// int
template <>
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1492,7 +1484,7 @@ void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
PROFILE_STOP("minReduce1<int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<int>(int *x, const int n,
void MPI_CLASS::call_minReduce<int>(int *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1523,7 +1515,7 @@ void MPI_CLASS::call_minReduce<int>(int *x, const int n,
template <>
void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
unsigned long int *recv,
const int n,
int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned long>", profile_level);
@ -1541,8 +1533,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
const int n,
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned long>", profile_level);
@ -1565,8 +1556,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
}
// long int
template <>
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
const int n,
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1589,7 +1579,7 @@ void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
PROFILE_STOP("minReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
void MPI_CLASS::call_minReduce<long int>(long int *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1619,8 +1609,8 @@ void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
// unsigned long long int
template <>
void MPI_CLASS::call_minReduce<unsigned long long int>(
const unsigned long long int *send, unsigned long long int *recv,
const int n, int *comm_rank_of_min) const {
const unsigned long long int *send, unsigned long long int *recv, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
auto x = new long long int[n];
@ -1647,7 +1637,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
}
template <>
void MPI_CLASS::call_minReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_min) const {
unsigned long long int *x, int n, int *comm_rank_of_min) const {
auto recv = new unsigned long long int[n];
call_minReduce<unsigned long long int>(x, recv, n, comm_rank_of_min);
for (int i = 0; i < n; i++)
@ -1657,7 +1647,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
// long long int
template <>
void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
long long int *y, const int n,
long long int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1676,7 +1666,7 @@ void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
PROFILE_STOP("minReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
void MPI_CLASS::call_minReduce<long long int>(long long int *x, int n,
int *comm_rank_of_min) const {
auto recv = new long long int[n];
call_minReduce<long long int>(x, recv, n, comm_rank_of_min);
@ -1686,7 +1676,7 @@ void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
}
// float
template <>
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<float>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1709,7 +1699,7 @@ void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
PROFILE_STOP("minReduce1<float>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<float>(float *x, const int n,
void MPI_CLASS::call_minReduce<float>(float *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<float>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1738,7 +1728,7 @@ void MPI_CLASS::call_minReduce<float>(float *x, const int n,
}
// double
template <>
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<double>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1762,7 +1752,7 @@ void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
PROFILE_STOP("minReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<double>(double *x, const int n,
void MPI_CLASS::call_minReduce<double>(double *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<double>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1799,7 +1789,7 @@ void MPI_CLASS::call_minReduce<double>(double *x, const int n,
// unsigned char
template <>
void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
unsigned char *recv, const int n,
unsigned char *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned char>", profile_level);
@ -1817,7 +1807,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned char>", profile_level);
@ -1840,7 +1830,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
}
// char
template <>
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<char>", profile_level);
@ -1858,7 +1848,7 @@ void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
}
}
template <>
void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
void MPI_CLASS::call_maxReduce<char>(char *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<char>", profile_level);
@ -1882,7 +1872,7 @@ void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
// unsigned int
template <>
void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
unsigned int *recv, const int n,
unsigned int *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned int>", profile_level);
@ -1900,7 +1890,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned int>", profile_level);
@ -1923,7 +1913,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
}
// int
template <>
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1946,7 +1936,7 @@ void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
PROFILE_STOP("maxReduce1<int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
void MPI_CLASS::call_maxReduce<int>(int *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1975,8 +1965,7 @@ void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
}
// long int
template <>
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
const int n,
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<lond int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1999,7 +1988,7 @@ void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
PROFILE_STOP("maxReduce1<lond int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
void MPI_CLASS::call_maxReduce<long int>(long int *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<lond int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2030,7 +2019,7 @@ void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
template <>
void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
unsigned long int *recv,
const int n,
int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned long>", profile_level);
@ -2048,8 +2037,7 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
const int n,
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned long>", profile_level);
@ -2073,8 +2061,8 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
// unsigned long long int
template <>
void MPI_CLASS::call_maxReduce<unsigned long long int>(
const unsigned long long int *send, unsigned long long int *recv,
const int n, int *comm_rank_of_max) const {
const unsigned long long int *send, unsigned long long int *recv, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<long int>", profile_level);
if (comm_rank_of_max == nullptr) {
auto x = new long long int[n];
@ -2101,7 +2089,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
}
template <>
void MPI_CLASS::call_maxReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_max) const {
unsigned long long int *x, int n, int *comm_rank_of_max) const {
auto recv = new unsigned long long int[n];
call_maxReduce<unsigned long long int>(x, recv, n, comm_rank_of_max);
for (int i = 0; i < n; i++)
@ -2111,7 +2099,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
// long long int
template <>
void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
long long int *y, const int n,
long long int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<long int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2130,7 +2118,7 @@ void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
PROFILE_STOP("maxReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, int n,
int *comm_rank_of_max) const {
auto recv = new long long int[n];
call_maxReduce<long long int>(x, recv, n, comm_rank_of_max);
@ -2140,7 +2128,7 @@ void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
}
// float
template <>
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<float>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2164,7 +2152,7 @@ void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
PROFILE_STOP("maxReduce1<float>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
void MPI_CLASS::call_maxReduce<float>(float *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<float>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2193,7 +2181,7 @@ void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
}
// double
template <>
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<double>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2217,7 +2205,7 @@ void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
PROFILE_STOP("maxReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
void MPI_CLASS::call_maxReduce<double>(double *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<double>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2253,51 +2241,46 @@ void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, int n,
int root) const {
PROFILE_START("bcast<unsigned char>", profile_level);
MPI_Bcast(x, n, MPI_UNSIGNED_CHAR, root, communicator);
PROFILE_STOP("bcast<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::call_bcast<char>(char *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<char>(char *x, int n, int root) const {
PROFILE_START("bcast<char>", profile_level);
MPI_Bcast(x, n, MPI_CHAR, root, communicator);
PROFILE_STOP("bcast<char>", profile_level);
}
// int
template <>
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, int n,
int root) const {
PROFILE_START("bcast<unsigned int>", profile_level);
MPI_Bcast(x, n, MPI_UNSIGNED, root, communicator);
PROFILE_STOP("bcast<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::call_bcast<int>(int *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<int>(int *x, int n, int root) const {
PROFILE_START("bcast<int>", profile_level);
MPI_Bcast(x, n, MPI_INT, root, communicator);
PROFILE_STOP("bcast<int>", profile_level);
}
// float
template <>
void MPI_CLASS::call_bcast<float>(float *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<float>(float *x, int n, int root) const {
PROFILE_START("bcast<float>", profile_level);
MPI_Bcast(x, n, MPI_FLOAT, root, communicator);
PROFILE_STOP("bcast<float>", profile_level);
}
// double
template <>
void MPI_CLASS::call_bcast<double>(double *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<double>(double *x, int n, int root) const {
PROFILE_START("bcast<double>", profile_level);
MPI_Bcast(x, n, MPI_DOUBLE, root, communicator);
PROFILE_STOP("bcast<double>", profile_level);
}
#else
// We need a concrete instantiation of bcast<char>(x,n,root);
template <>
void MPI_CLASS::call_bcast<char>(char *, const int, const int) const {}
template <> void MPI_CLASS::call_bcast<char>(char *, int, int) const {}
#endif
/************************************************************************
@ -2316,8 +2299,8 @@ void MPI_CLASS::barrier() const {
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::send<char>(const char *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<char>(const char *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2329,8 +2312,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length,
}
// int
template <>
void MPI_CLASS::send<int>(const int *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<int>(const int *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2341,8 +2324,8 @@ void MPI_CLASS::send<int>(const int *buf, const int length,
}
// float
template <>
void MPI_CLASS::send<float>(const float *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<float>(const float *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2354,8 +2337,8 @@ void MPI_CLASS::send<float>(const float *buf, const int length,
}
// double
template <>
void MPI_CLASS::send<double>(const double *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<double>(const double *buf, int length,
int recv_proc_number, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2368,8 +2351,7 @@ void MPI_CLASS::send<double>(const double *buf, const int length,
#else
// We need a concrete instantiation of send for use without MPI
template <>
void MPI_CLASS::send<char>(const char *buf, const int length, const int,
int tag) const {
void MPI_CLASS::send<char>(const char *buf, int length, int, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("send<char>", profile_level);
@ -2391,8 +2373,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length, const int,
#ifdef USE_MPI
// char
template <>
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2404,8 +2386,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
}
// int
template <>
MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<int>(const int *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2417,8 +2399,8 @@ MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
}
// float
template <>
MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<float>(const float *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2430,8 +2412,8 @@ MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
}
// double
template <>
MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<double>(const double *buf, int length,
int recv_proc, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2444,8 +2426,8 @@ MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
#else
// We need a concrete instantiation of send for use without mpi
template <>
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
const int tag) const {
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("Isend<char>", profile_level);
@ -2472,8 +2454,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
/************************************************************************
* Send byte array to another processor. *
************************************************************************/
void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
const int recv_proc_number, int tag) const {
void MPI_CLASS::sendBytes(const void *buf, int number_bytes,
int recv_proc_number, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
send<char>((const char *)buf, number_bytes, recv_proc_number, tag);
@ -2482,7 +2464,7 @@ void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
/************************************************************************
* Non-blocking send byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
MPI_Request MPI_CLASS::IsendBytes(const void *buf, int number_bytes,
const int recv_proc, const int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
@ -2496,7 +2478,7 @@ MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<char>(char *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2518,7 +2500,7 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
}
// int
template <>
void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<int>(int *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2540,7 +2522,7 @@ void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
}
// float
template <>
void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<float>(float *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2562,9 +2544,8 @@ void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
}
// double
template <>
void MPI_CLASS::recv<double>(double *buf, int &length,
const int send_proc_number, const bool get_length,
int tag) const {
void MPI_CLASS::recv<double>(double *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2586,7 +2567,7 @@ void MPI_CLASS::recv<double>(double *buf, int &length,
#else
// We need a concrete instantiation of recv for use without mpi
template <>
void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
void MPI_CLASS::recv<char>(char *buf, int &length, int, const bool,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
@ -2609,8 +2590,8 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
#ifdef USE_MPI
// char
template <>
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2622,8 +2603,8 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
}
// int
template <>
MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<int>(int *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2635,8 +2616,8 @@ MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
}
// float
template <>
MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<float>(float *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2648,8 +2629,8 @@ MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
}
// double
template <>
MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<double>(double *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2662,8 +2643,7 @@ MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
#else
// We need a concrete instantiation of irecv for use without mpi
template <>
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
const int tag) const {
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("Irecv<char>", profile_level);
@ -2690,7 +2670,7 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, int send_proc,
int tag) const {
recv<char>((char *)buf, number_bytes, send_proc, false, tag);
}
@ -2698,8 +2678,8 @@ void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IrecvBytes(void *buf, const int number_bytes,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::IrecvBytes(void *buf, int number_bytes, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
return Irecv<char>((char *)buf, number_bytes, send_proc, tag);
@ -2913,7 +2893,7 @@ void MPI_CLASS::call_allGather<char>(const char *, int, char *, int *,
************************************************************************/
#ifdef USE_MPI
template <>
void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
void MPI_CLASS::allToAll<unsigned char>(int n, const unsigned char *send,
unsigned char *recv) const {
PROFILE_START("allToAll<unsigned char>", profile_level);
MPI_Alltoall((void *)send, n, MPI_UNSIGNED_CHAR, (void *)recv, n,
@ -2921,15 +2901,14 @@ void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
PROFILE_STOP("allToAll<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::allToAll<char>(const int n, const char *send,
char *recv) const {
void MPI_CLASS::allToAll<char>(int n, const char *send, char *recv) const {
PROFILE_START("allToAll<char>", profile_level);
MPI_Alltoall((void *)send, n, MPI_CHAR, (void *)recv, n, MPI_CHAR,
communicator);
PROFILE_STOP("allToAll<char>", profile_level);
}
template <>
void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
void MPI_CLASS::allToAll<unsigned int>(int n, const unsigned int *send,
unsigned int *recv) const {
PROFILE_START("allToAll<unsigned int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_UNSIGNED, (void *)recv, n, MPI_UNSIGNED,
@ -2937,14 +2916,14 @@ void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
PROFILE_STOP("allToAll<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<int>(const int n, const int *send, int *recv) const {
void MPI_CLASS::allToAll<int>(int n, const int *send, int *recv) const {
PROFILE_START("allToAll<int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_INT, (void *)recv, n, MPI_INT,
communicator);
PROFILE_STOP("allToAll<int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<unsigned long int>(const int n,
void MPI_CLASS::allToAll<unsigned long int>(int n,
const unsigned long int *send,
unsigned long int *recv) const {
PROFILE_START("allToAll<unsigned long>", profile_level);
@ -2953,7 +2932,7 @@ void MPI_CLASS::allToAll<unsigned long int>(const int n,
PROFILE_STOP("allToAll<unsigned long>", profile_level);
}
template <>
void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
void MPI_CLASS::allToAll<long int>(int n, const long int *send,
long int *recv) const {
PROFILE_START("allToAll<long int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_LONG, (void *)recv, n, MPI_LONG,
@ -2961,15 +2940,14 @@ void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
PROFILE_STOP("allToAll<long int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<float>(const int n, const float *send,
float *recv) const {
void MPI_CLASS::allToAll<float>(int n, const float *send, float *recv) const {
PROFILE_START("allToAll<float>", profile_level);
MPI_Alltoall((void *)send, n, MPI_FLOAT, (void *)recv, n, MPI_FLOAT,
communicator);
PROFILE_STOP("allToAll<float>", profile_level);
}
template <>
void MPI_CLASS::allToAll<double>(const int n, const double *send,
void MPI_CLASS::allToAll<double>(int n, const double *send,
double *recv) const {
PROFILE_START("allToAll<double>", profile_level);
MPI_Alltoall((void *)send, n, MPI_DOUBLE, (void *)recv, n, MPI_DOUBLE,
@ -3713,4 +3691,28 @@ MPI MPI::loadBalance(double local, std::vector<double> work) {
return split(0, key[getRank()]);
}
/****************************************************************************
* Function Persistent Communication *
****************************************************************************/
template <>
std::shared_ptr<MPI_Request> MPI::Isend_init<double>(const double *buf, int N, int proc, int tag) const
{
std::shared_ptr<MPI_Request> obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } );
MPI_Send_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
return obj;
}
template<>
std::shared_ptr<MPI_Request> MPI::Irecv_init<double>(double *buf, int N, int proc, int tag) const
{
std::shared_ptr<MPI_Request> obj(new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); } );
MPI_Recv_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
return obj;
}
void MPI::Start( MPI_Request &request )
{
MPI_Start( &request );
}
} // namespace Utilities

View File

@ -26,6 +26,7 @@ redistribution is prohibited.
#include <atomic>
#include <complex>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
@ -173,10 +174,9 @@ public: // Member functions
*
*/
static void
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD),
const int method = 1,
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD), int method = 1,
const std::vector<int> &procs = std::vector<int>(),
const int N_min = 1, const int N_max = -1);
int N_min = 1, int N_max = -1);
//! Query the level of thread support
static ThreadSupport queryThreadSupport();
@ -420,7 +420,7 @@ public: // Member functions
* \param x The input/output array for the reduce
* \param n The number of values in the array (must match on all nodes)
*/
template <class type> void sumReduce(type *x, const int n = 1) const;
template <class type> void sumReduce(type *x, int n = 1) const;
/**
* \brief Sum Reduce
@ -432,7 +432,7 @@ public: // Member functions
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void sumReduce(const type *x, type *y, const int n = 1) const;
void sumReduce(const type *x, type *y, int n = 1) const;
/**
* \brief Min Reduce
@ -457,7 +457,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void minReduce(type *x, const int n = 1, int *rank_of_min = nullptr) const;
void minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
/**
* \brief Sum Reduce
@ -475,7 +475,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void minReduce(const type *x, type *y, const int n = 1,
void minReduce(const type *x, type *y, int n = 1,
int *rank_of_min = nullptr) const;
/**
@ -501,7 +501,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void maxReduce(type *x, const int n = 1, int *rank_of_max = nullptr) const;
void maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
/**
* \brief Sum Reduce
@ -519,7 +519,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void maxReduce(const type *x, type *y, const int n = 1,
void maxReduce(const type *x, type *y, int n = 1,
int *rank_of_max = nullptr) const;
/**
@ -530,8 +530,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void sumScan(const type *x, type *y, const int n = 1) const;
template <class type> void sumScan(const type *x, type *y, int n = 1) const;
/**
* \brief Scan Min Reduce
@ -541,8 +540,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void minScan(const type *x, type *y, const int n = 1) const;
template <class type> void minScan(const type *x, type *y, int n = 1) const;
/**
* \brief Scan Max Reduce
@ -552,8 +550,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void maxScan(const type *x, type *y, const int n = 1) const;
template <class type> void maxScan(const type *x, type *y, int n = 1) const;
/**
* \brief Broadcast
@ -561,7 +558,7 @@ public: // Member functions
* \param value The input value for the broadcast.
* \param root The processor performing the broadcast
*/
template <class type> type bcast(const type &value, const int root) const;
template <class type> type bcast(const type &value, int root) const;
/**
* \brief Broadcast
@ -570,8 +567,7 @@ public: // Member functions
* \param n The number of values in the array (must match on all nodes)
* \param root The processor performing the broadcast
*/
template <class type>
void bcast(type *value, const int n, const int root) const;
template <class type> void bcast(type *value, int n, int root) const;
/**
* Perform a global barrier across all processors.
@ -595,8 +591,7 @@ public: // Member functions
* The matching recv must share this tag.
*/
template <class type>
void send(const type *buf, const int length, const int recv,
int tag = 0) const;
void send(const type *buf, int length, int recv, int tag = 0) const;
/*!
* @brief This function sends an MPI message with an array of bytes
@ -611,8 +606,7 @@ public: // Member functions
* to be sent with this message. Default tag is 0.
* The matching recv must share this tag.
*/
void sendBytes(const void *buf, const int N_bytes, const int recv,
int tag = 0) const;
void sendBytes(const void *buf, int N_bytes, int recv, int tag = 0) const;
/*!
* @brief This function sends an MPI message with an array
@ -627,8 +621,8 @@ public: // Member functions
* to be sent with this message.
*/
template <class type>
MPI_Request Isend(const type *buf, const int length, const int recv_proc,
const int tag) const;
MPI_Request Isend(const type *buf, int length, int recv_proc,
int tag) const;
/*!
* @brief This function sends an MPI message with an array of bytes
@ -642,8 +636,8 @@ public: // Member functions
* @param tag Integer argument specifying an integer tag
* to be sent with this message.
*/
MPI_Request IsendBytes(const void *buf, const int N_bytes,
const int recv_proc, const int tag) const;
MPI_Request IsendBytes(const void *buf, int N_bytes, int recv_proc,
int tag) const;
/*!
* @brief This function receives an MPI message with a data
@ -662,7 +656,7 @@ public: // Member functions
* by the tag of the incoming message. Default tag is 0.
*/
template <class type>
inline void recv(type *buf, int length, const int send, int tag) const {
inline void recv(type *buf, int length, int send, int tag) const {
int length2 = length;
recv(buf, length2, send, false, tag);
}
@ -687,7 +681,7 @@ public: // Member functions
* by the tag of the incoming message. Default tag is 0.
*/
template <class type>
void recv(type *buf, int &length, const int send, const bool get_length,
void recv(type *buf, int &length, int send, const bool get_length,
int tag) const;
/*!
@ -703,7 +697,7 @@ public: // Member functions
* must be matched by the tag of the incoming message. Default
* tag is 0.
*/
void recvBytes(void *buf, int &N_bytes, const int send, int tag = 0) const;
void recvBytes(void *buf, int &N_bytes, int send, int tag = 0) const;
/*!
* @brief This function receives an MPI message with a data
@ -716,8 +710,7 @@ public: // Member functions
* be matched by the tag of the incoming message.
*/
template <class type>
MPI_Request Irecv(type *buf, const int length, const int send_proc,
const int tag) const;
MPI_Request Irecv(type *buf, int length, int send_proc, int tag) const;
/*!
* @brief This function receives an MPI message with an array of
@ -731,8 +724,8 @@ public: // Member functions
* @param tag Integer argument specifying a tag which must
* be matched by the tag of the incoming message.
*/
MPI_Request IrecvBytes(void *buf, const int N_bytes, const int send_proc,
const int tag) const;
MPI_Request IrecvBytes(void *buf, int N_bytes, int send_proc,
int tag) const;
/*!
* @brief This function sends and recieves data using a blocking call
@ -741,6 +734,39 @@ public: // Member functions
void sendrecv(const type *sendbuf, int sendcount, int dest, int sendtag,
type *recvbuf, int recvcount, int source, int recvtag) const;
/*!
* @brief This function sets up an Isend call (see MPI_Send_init)
* @param buf Pointer to array buffer with length integers.
* @param length Number of integers in buf that we want to send.
* @param recv_proc Receiving processor number.
* @param tag Tag to send
* @return Returns an MPI_Request.
* Note this returns a unique pointer so the user does not
* need to manually free the request
*/
template <class type>
std::shared_ptr<MPI_Request> Isend_init(const type *buf, int length, int recv_proc,
int tag) const;
/*!
* @brief This function sets up an Irecv call (see MPI_Recv_init)
* @param buf Pointer to integer array buffer with capacity of length integers.
* @param length Maximum number of values that can be stored in buf.
* @param send_proc Processor number of sender.
* @param tag Tag to match
* @return Returns an MPI_Request.
* Note this returns a unique pointer so the user does not
* need to manually free the request
*/
template <class type>
std::shared_ptr<MPI_Request> Irecv_init(type *buf, int length, int send_proc, int tag) const;
/*!
* @brief Start the MPI communication
* @param request Request to start
*/
void Start( MPI_Request &request );
/*!
* Each processor sends every other processor a single value.
* @param[in] x Input value for allGather
@ -792,7 +818,7 @@ public: // Member functions
* and the sizes and displacements will be returned (if desired).
*/
template <class type>
int allGather(const type *send_data, const int send_cnt, type *recv_data,
int allGather(const type *send_data, int send_cnt, type *recv_data,
int *recv_cnt = nullptr, int *recv_disp = nullptr,
bool known_recv = false) const;
@ -822,7 +848,7 @@ public: // Member functions
* @param recv_data Output array of received values (nxN)
*/
template <class type>
void allToAll(const int n, const type *send_data, type *recv_data) const;
void allToAll(int n, const type *send_data, type *recv_data) const;
/*!
* Each processor sends an array of data to the different processors.
@ -995,23 +1021,20 @@ public: // Member functions
MPI loadBalance(double localPerformance, std::vector<double> work);
private: // Private helper functions for templated MPI operations;
template <class type> void call_sumReduce(type *x, const int n = 1) const;
template <class type> void call_sumReduce(type *x, int n = 1) const;
template <class type>
void call_sumReduce(const type *x, type *y, const int n = 1) const;
void call_sumReduce(const type *x, type *y, int n = 1) const;
template <class type>
void call_minReduce(type *x, const int n = 1,
void call_minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
template <class type>
void call_minReduce(const type *x, type *y, int n = 1,
int *rank_of_min = nullptr) const;
template <class type>
void call_minReduce(const type *x, type *y, const int n = 1,
int *rank_of_min = nullptr) const;
void call_maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
template <class type>
void call_maxReduce(type *x, const int n = 1,
void call_maxReduce(const type *x, type *y, int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_maxReduce(const type *x, type *y, const int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_bcast(type *x, const int n, const int root) const;
template <class type> void call_bcast(type *x, int n, int root) const;
template <class type>
void call_allGather(const type &x_in, type *x_out) const;
template <class type>

1429
common/Membrane.cpp Normal file

File diff suppressed because it is too large Load Diff

186
common/Membrane.h Normal file
View File

@ -0,0 +1,186 @@
/* Flow adaptor class for multiphase flow methods */
#ifndef ScaLBL_Membrane_INC
#define ScaLBL_Membrane_INC
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/ScaLBL.h"
/**
* \brief Unpack D3Q19 distributions after communication using links determined based on membrane location
* @param q - index for distribution based on D3Q19 discrete velocity structure
* @param list - list of distributions to communicate
* @param links - list of active links based on the membrane location
* @param start - index to start parsing the list
* @param count - number of values to unppack
* @param recvbuf - memory buffer where recieved values have been stored
* @param dist - memory buffer to hold the distributions
* @param N - size of the distributions (derived from Domain structure)
*/
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int count, double *recvbuf, double *dist, int N);
/**
* \brief Set custom link rules for D3Q19 distribution based on membrane location
* @param q - index for distribution based on D3Q19 discrete velocity structure
* @param list - list of distributions to communicate
* @param links - list of active links based on the membrane location
* @param coef - coefficient to determine the local mass transport for each membrane link
* @param start - index to start parsing the list
* @param offset - offset to start reading membrane links
* @param count - number of values to unppack
* @param recvbuf - memory buffer where recieved values have been stored
* @param dist - memory buffer to hold the distributions
* @param N - size of the distributions (derived from Domain structure)
*/
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N);
/**
* \class Membrane
* @brief
* The Membrane class operates on ScaLBL data structures to insert membrane
*
*/
class Membrane {
public:
int Np;
int Nx,Ny,Nz,N;
int membraneLinkCount;
int *initialNeighborList; // original neighborlist
int *NeighborList; // modified neighborlist
/* host data structures */
int *membraneLinks; // D3Q19 links that cross membrane
int *membraneTag; // label each link in the membrane
double *membraneDist; // distance to membrane for each linked site
double *membraneOrientation; // distance to membrane for each linked site
/*
* Device data structures
*/
int *MembraneLinks;
double *MembraneCoef; // mass transport coefficient for the membrane
double *MembraneDistance;
double *MembraneOrientation;
/**
* \brief Create a flow adaptor to operate on the LB model
* @param ScaLBL - originating data structures
* @param neighborList - list of neighbors for each site
*/
Membrane(std::shared_ptr <Domain> Dm, int *initialNeighborList, int Nsites);
/**
* \brief Destructor
*/
~Membrane();
/**
* \brief Create membrane
* \details Create membrane structure from signed distance function
* @param Dm - domain structure
* @param Distance - signed distance to membrane
* @param Map - mapping between regular layout and compact layout
*/
int Create(std::shared_ptr <Domain> Dm, DoubleArray &Distance, IntArray &Map);
void SendD3Q19AA(double *dist);
void RecvD3Q19AA(double *dist);
void SendD3Q7AA(double *dist);
void RecvD3Q7AA(double *dist);
void AssignCoefficients(int *Map, double *Psi, std::string method);
void IonTransport(double *dist, double *den);
//......................................................................................
// Buffers to store data sent and recieved by this MPI process
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z;
double *sendbuf_xy, *sendbuf_yz, *sendbuf_xz, *sendbuf_Xy, *sendbuf_Yz, *sendbuf_xZ;
double *sendbuf_xY, *sendbuf_yZ, *sendbuf_Xz, *sendbuf_XY, *sendbuf_YZ, *sendbuf_XZ;
double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z;
double *recvbuf_xy, *recvbuf_yz, *recvbuf_xz, *recvbuf_Xy, *recvbuf_Yz, *recvbuf_xZ;
double *recvbuf_xY, *recvbuf_yZ, *recvbuf_Xz, *recvbuf_XY, *recvbuf_YZ, *recvbuf_XZ;
//......................................................................................
private:
bool Lock; // use Lock to make sure only one call at a time to protect data in transit
int sendtag, recvtag;
int iproc,jproc,kproc;
int nprocx,nprocy,nprocz;
// Give the object it's own MPI communicator
RankInfoStruct rank_info;
Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain
MPI_Request req1[18],req2[18];
/**
* \brief Set up membrane communication
* \details associate p2p communication links to membrane where necessary
* returns the number of membrane links
* regular communications are stored in the first part of the list
* membrane communications are stored in the last part of the list
* @param Cqx - discrete velocity (x)
* @param Cqy - discrete velocity (y)
* @param Cqz - discrete velocity (z)
* @param list - list of recieved values
* @param count - number recieved values
* @param d3q19_recvlist - device array with the saved list
* @param d3q19_linkList - sorted list with regular and membrane links
* @param Distance - signed distance to membrane
* @param Map - data structure used to define mapping between dense and sparse representation
* */
int D3Q19_MapRecv(int Cqx, int Cqy, int Cqz, const int *list, int start, int count,
int *d3q19_recvlist, int *d3q19_linkList, DoubleArray &Distance, IntArray &Map);
//......................................................................................
// MPI ranks for all 18 neighbors
//......................................................................................
// These variables are all private to prevent external things from modifying them!!
//......................................................................................
int rank;
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
int rank_xy,rank_XY,rank_xY,rank_Xy;
int rank_xz,rank_XZ,rank_xZ,rank_Xz;
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//......................................................................................
int SendCount, RecvCount, CommunicationCount;
//......................................................................................
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
//......................................................................................
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
//......................................................................................
int linkCount_x[5], linkCount_y[5], linkCount_z[5], linkCount_X[5], linkCount_Y[5], linkCount_Z[5];
int linkCount_xy, linkCount_yz, linkCount_xz, linkCount_Xy, linkCount_Yz, linkCount_xZ;
int linkCount_xY, linkCount_yZ, linkCount_Xz, linkCount_XY, linkCount_YZ, linkCount_XZ;
//......................................................................................
// Send buffers that reside on the compute device
int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z;
int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ;
int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ;
// Recieve buffers that reside on the compute device
int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z;
int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ;
int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ;
// Link lists that reside on the compute device
int *dvcRecvLinks_x, *dvcRecvLinks_y, *dvcRecvLinks_z, *dvcRecvLinks_X, *dvcRecvLinks_Y, *dvcRecvLinks_Z;
int *dvcRecvLinks_xy, *dvcRecvLinks_yz, *dvcRecvLinks_xz, *dvcRecvLinks_Xy, *dvcRecvLinks_Yz, *dvcRecvLinks_xZ;
int *dvcRecvLinks_xY, *dvcRecvLinks_yZ, *dvcRecvLinks_Xz, *dvcRecvLinks_XY, *dvcRecvLinks_YZ, *dvcRecvLinks_XZ;
// Recieve buffers for the distributions
int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z;
int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
//......................................................................................
// mass transfer coefficient arrays
double *coefficient_x, *coefficient_X, *coefficient_y, *coefficient_Y, *coefficient_z, *coefficient_Z;
//......................................................................................
};
#endif

View File

@ -1,5 +1,5 @@
/*
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
Copyright 2013--2022 James E. McClure, Virginia Polytechnic & State University
Copyright Equnior ASA
This file is part of the Open Porous Media project (OPM).
@ -15,10 +15,8 @@
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
#include "common/ScaLBL.h"
#include <chrono>
ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
//......................................................................................
Lock=false; // unlock the communicator
@ -309,12 +307,12 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
MPI_COMM_SCALBL.barrier();
ScaLBL_DeviceBarrier();
//......................................................................................
SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z+
SendCount = 5*(sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z)+
sendCount_xy+sendCount_Xy+sendCount_xY+sendCount_XY+
sendCount_xZ+sendCount_Xz+sendCount_xZ+sendCount_XZ+
sendCount_yz+sendCount_Yz+sendCount_yZ+sendCount_YZ;
RecvCount = recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z+
RecvCount = 5*(recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z)+
recvCount_xy+recvCount_Xy+recvCount_xY+recvCount_XY+
recvCount_xZ+recvCount_Xz+recvCount_xZ+recvCount_XZ+
recvCount_yz+recvCount_Yz+recvCount_yZ+recvCount_YZ;
@ -322,8 +320,49 @@ ScaLBL_Communicator::ScaLBL_Communicator(std::shared_ptr <Domain> Dm){
CommunicationCount = SendCount+RecvCount;
//......................................................................................
}
//...................................................................................
// Set up the persistent communication for D3Q19AA (use tags 130-145)
//...................................................................................
req_D3Q19AA.clear();
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_x, 5*sendCount_x, rank_x, 130 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_X, 5*recvCount_X, rank_X, 130 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_X, 5*sendCount_X, rank_X, 131 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_x, 5*recvCount_x, rank_x, 131 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_y, 5*sendCount_y, rank_y, 132 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Y, 5*recvCount_Y, rank_Y, 132 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Y, 5*sendCount_Y, rank_Y, 133 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_y, 5*recvCount_y, rank_y, 133 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_z, 5*sendCount_z, rank_z, 134 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Z, 5*recvCount_Z, rank_Z, 134 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Z, 5*sendCount_Z, rank_Z, 135 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_z, 5*recvCount_z, rank_z, 135 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xy, sendCount_xy, rank_xy, 136 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XY, recvCount_XY, rank_XY, 136 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XY, sendCount_XY, rank_XY, 137 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xy, recvCount_xy, rank_xy, 137 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xy, sendCount_Xy, rank_Xy, 138 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xY, recvCount_xY, rank_xY, 138 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xY, sendCount_xY, rank_xY, 139 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xy, recvCount_Xy, rank_Xy, 139 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xz, sendCount_xz, rank_xz, 140 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_XZ, recvCount_XZ, rank_XZ, 140 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_xZ, sendCount_xZ, rank_xZ, 143 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Xz, recvCount_Xz, rank_Xz, 143 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Xz, sendCount_Xz, rank_Xz, 142 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xZ, recvCount_xZ, rank_xZ, 142 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_XZ, sendCount_XZ, rank_XZ, 141 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_xz, recvCount_xz, rank_xz, 141 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yz, sendCount_yz, rank_yz, 144 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_YZ, recvCount_YZ, rank_YZ, 144 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_yZ, sendCount_yZ, rank_yZ, 147 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_Yz, recvCount_Yz, rank_Yz, 147 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_Yz, sendCount_Yz, rank_Yz, 146 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yZ, recvCount_yZ, rank_yZ, 146 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Isend_init( sendbuf_YZ, sendCount_YZ, rank_YZ, 145 ) );
req_D3Q19AA.push_back( MPI_COMM_SCALBL.Irecv_init( recvbuf_yz, recvCount_yz, rank_yz, 145 ) );
}
ScaLBL_Communicator::~ScaLBL_Communicator()
{
@ -419,6 +458,22 @@ ScaLBL_Communicator::~ScaLBL_Communicator()
ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz );
ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ );
}
void ScaLBL_Communicator::start( std::vector<std::shared_ptr<MPI_Request>>& requests )
{
for ( auto& req : requests )
MPI_COMM_SCALBL.Start( *req );
}
void ScaLBL_Communicator::wait( std::vector<std::shared_ptr<MPI_Request>>& requests )
{
std::vector<MPI_Request> request2;
for ( auto& req : requests )
request2.push_back( *req );
MPI_COMM_SCALBL.waitAll( request2.size(), request2.data() );
}
double ScaLBL_Communicator::GetPerformance(int *NeighborList, double *fq, int Np){
/* EACH MPI PROCESS GETS ITS OWN MEASUREMENT*/
/* use MRT kernels to check performance without communication / synchronization */
@ -830,8 +885,6 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis
idx=Map(n);
//if (rank == 0) printf("r: mapped n=%d\n",idx);
TempBuffer[i]=idx;
}
ScaLBL_CopyToDevice(dvcRecvDist_x,TempBuffer,5*recvCount_x*sizeof(int));
@ -988,7 +1041,6 @@ int ScaLBL_Communicator::MemoryOptimizedLayoutAA(IntArray &Map, int *neighborLis
return(Np);
}
void ScaLBL_Communicator::SetupBounceBackList(IntArray &Map, signed char *id, int Np, bool SlippingVelBC)
{
@ -1390,15 +1442,12 @@ void ScaLBL_Communicator::SolidSlippingVelocityBCD3Q19(double *fq, double *zeta_
void ScaLBL_Communicator::SendD3Q19AA(double *dist){
// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
if (Lock==true){
ERROR("ScaLBL Error (SendD3Q19): ScaLBL_Communicator is locked -- did you forget to match Send/Recv calls?");
}
else{
Lock=true;
}
// assign tag of 19 to D3Q19 communication
sendtag = recvtag = 19;
ScaLBL_DeviceBarrier();
// Pack the distributions
//...Packing for x face(2,8,10,12,14)................................
@ -1408,8 +1457,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
ScaLBL_D3Q19_Pack(12,dvcSendList_x,3*sendCount_x,sendCount_x,sendbuf_x,dist,N);
ScaLBL_D3Q19_Pack(14,dvcSendList_x,4*sendCount_x,sendCount_x,sendbuf_x,dist,N);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 5*sendCount_x,rank_x,sendtag);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 5*recvCount_X,rank_X,recvtag);
//...Packing for X face(1,7,9,11,13)................................
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,N);
ScaLBL_D3Q19_Pack(7,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,dist,N);
@ -1417,8 +1464,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
ScaLBL_D3Q19_Pack(11,dvcSendList_X,3*sendCount_X,sendCount_X,sendbuf_X,dist,N);
ScaLBL_D3Q19_Pack(13,dvcSendList_X,4*sendCount_X,sendCount_X,sendbuf_X,dist,N);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 5*sendCount_X,rank_X,sendtag);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 5*recvCount_x,rank_x,recvtag);
//...Packing for y face(4,8,9,16,18).................................
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,N);
ScaLBL_D3Q19_Pack(8,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,dist,N);
@ -1426,8 +1471,6 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
ScaLBL_D3Q19_Pack(16,dvcSendList_y,3*sendCount_y,sendCount_y,sendbuf_y,dist,N);
ScaLBL_D3Q19_Pack(18,dvcSendList_y,4*sendCount_y,sendCount_y,sendbuf_y,dist,N);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 5*sendCount_y,rank_y,sendtag);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 5*recvCount_Y,rank_Y,recvtag);
//...Packing for Y face(3,7,10,15,17).................................
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,N);
ScaLBL_D3Q19_Pack(7,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
@ -1435,78 +1478,52 @@ void ScaLBL_Communicator::SendD3Q19AA(double *dist){
ScaLBL_D3Q19_Pack(15,dvcSendList_Y,3*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
ScaLBL_D3Q19_Pack(17,dvcSendList_Y,4*sendCount_Y,sendCount_Y,sendbuf_Y,dist,N);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 5*sendCount_Y,rank_Y,sendtag);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 5*recvCount_y,rank_y,recvtag);
//...Packing for z face(6,12,13,16,17)................................
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,N);
ScaLBL_D3Q19_Pack(12,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,dist,N);
ScaLBL_D3Q19_Pack(13,dvcSendList_z,2*sendCount_z,sendCount_z,sendbuf_z,dist,N);
ScaLBL_D3Q19_Pack(16,dvcSendList_z,3*sendCount_z,sendCount_z,sendbuf_z,dist,N);
ScaLBL_D3Q19_Pack(17,dvcSendList_z,4*sendCount_z,sendCount_z,sendbuf_z,dist,N);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 5*sendCount_z,rank_z,sendtag);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 5*recvCount_Z,rank_Z,recvtag);
//...Packing for Z face(5,11,14,15,18)................................
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,N);
ScaLBL_D3Q19_Pack(11,dvcSendList_Z,sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
ScaLBL_D3Q19_Pack(14,dvcSendList_Z,2*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
ScaLBL_D3Q19_Pack(15,dvcSendList_Z,3*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
ScaLBL_D3Q19_Pack(18,dvcSendList_Z,4*sendCount_Z,sendCount_Z,sendbuf_Z,dist,N);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 5*sendCount_Z,rank_Z,sendtag);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 5*recvCount_z,rank_z,recvtag);
//...Pack the xy edge (8)................................
//...Pack the xy edge (8)................................
ScaLBL_D3Q19_Pack(8,dvcSendList_xy,0,sendCount_xy,sendbuf_xy,dist,N);
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag);
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag);
//...Pack the Xy edge (9)................................
ScaLBL_D3Q19_Pack(9,dvcSendList_Xy,0,sendCount_Xy,sendbuf_Xy,dist,N);
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag);
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag);
//...Pack the xY edge (10)................................
ScaLBL_D3Q19_Pack(10,dvcSendList_xY,0,sendCount_xY,sendbuf_xY,dist,N);
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag);
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag);
//...Pack the XY edge (7)................................
ScaLBL_D3Q19_Pack(7,dvcSendList_XY,0,sendCount_XY,sendbuf_XY,dist,N);
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag);
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag);
//...Pack the xz edge (12)................................
ScaLBL_D3Q19_Pack(12,dvcSendList_xz,0,sendCount_xz,sendbuf_xz,dist,N);
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag);
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag);
//...Pack the xZ edge (14)................................
ScaLBL_D3Q19_Pack(14,dvcSendList_xZ,0,sendCount_xZ,sendbuf_xZ,dist,N);
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag);
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag);
//...Pack the Xz edge (13)................................
ScaLBL_D3Q19_Pack(13,dvcSendList_Xz,0,sendCount_Xz,sendbuf_Xz,dist,N);
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag);
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag);
//...Pack the XZ edge (11)................................
ScaLBL_D3Q19_Pack(11,dvcSendList_XZ,0,sendCount_XZ,sendbuf_XZ,dist,N);
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag);
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag);
//...Pack the yz edge (16)................................
ScaLBL_D3Q19_Pack(16,dvcSendList_yz,0,sendCount_yz,sendbuf_yz,dist,N);
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag);
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag);
//...Pack the yZ edge (18)................................
ScaLBL_D3Q19_Pack(18,dvcSendList_yZ,0,sendCount_yZ,sendbuf_yZ,dist,N);
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag);
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag);
//...Pack the Yz edge (17)................................
ScaLBL_D3Q19_Pack(17,dvcSendList_Yz,0,sendCount_Yz,sendbuf_Yz,dist,N);
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag);
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag);
//...Pack the YZ edge (15)................................
ScaLBL_D3Q19_Pack(15,dvcSendList_YZ,0,sendCount_YZ,sendbuf_YZ,dist,N);
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag);
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag);
//...................................................................................
ScaLBL_DeviceBarrier();
start( req_D3Q19AA );
}
void ScaLBL_Communicator::RecvD3Q19AA(double *dist){
@ -1514,8 +1531,7 @@ void ScaLBL_Communicator::RecvD3Q19AA(double *dist){
// NOTE: the center distribution f0 must NOT be at the start of feven, provide offset to start of f2
//...................................................................................
// Wait for completion of D3Q19 communication
MPI_COMM_SCALBL.waitAll(18,req1);
MPI_COMM_SCALBL.waitAll(18,req2);
wait( req_D3Q19AA );
ScaLBL_DeviceBarrier();
//...................................................................................
@ -1685,43 +1701,43 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){
Lock=true;
}
// assign tag of 19 to D3Q19 communication
sendtag = recvtag = 14;
sendtag = recvtag = 148;
ScaLBL_DeviceBarrier();
// Pack the distributions
//...Packing for x face(2,8,10,12,14)................................
ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,Aq,N);
ScaLBL_D3Q19_Pack(2,dvcSendList_x,sendCount_x,sendCount_x,sendbuf_x,Bq,N);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x,rank_x,sendtag);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X,rank_X,recvtag);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 2*sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 2*recvCount_X, rank_X,recvtag+0);
//...Packing for X face(1,7,9,11,13)................................
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,Aq,N);
ScaLBL_D3Q19_Pack(1,dvcSendList_X,sendCount_X,sendCount_X,sendbuf_X,Bq,N);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X,rank_X,sendtag);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x,rank_x,recvtag);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 2*sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 2*recvCount_x, rank_x,recvtag+1);
//...Packing for y face(4,8,9,16,18).................................
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,Aq,N);
ScaLBL_D3Q19_Pack(4,dvcSendList_y,sendCount_y,sendCount_y,sendbuf_y,Bq,N);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y,rank_y,sendtag);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y,rank_Y,recvtag);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 2*sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 2*recvCount_Y, rank_Y,recvtag+2);
//...Packing for Y face(3,7,10,15,17).................................
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,Aq,N);
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,sendCount_Y,sendCount_Y,sendbuf_Y,Bq,N);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y,rank_Y,sendtag);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y,rank_y,recvtag);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 2*sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 2*recvCount_y, rank_y,recvtag+3);
//...Packing for z face(6,12,13,16,17)................................
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,Aq,N);
ScaLBL_D3Q19_Pack(6,dvcSendList_z,sendCount_z,sendCount_z,sendbuf_z,Bq,N);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z,rank_z,sendtag);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z,rank_Z,recvtag);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 2*sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 2*recvCount_Z, rank_Z,recvtag+4);
//...Packing for Z face(5,11,14,15,18)................................
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,Aq,N);
@ -1729,8 +1745,8 @@ void ScaLBL_Communicator::BiSendD3Q7AA(double *Aq, double *Bq){
//...................................................................................
// Send all the distributions
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z,rank_Z,sendtag);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z,rank_z,recvtag);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 2*sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 2*recvCount_z, rank_z,recvtag+5);
}
@ -1801,39 +1817,39 @@ void ScaLBL_Communicator::SendD3Q7AA(double *Aq, int Component){
else{
Lock=true;
}
// assign tag of 19 to D3Q19 communication
sendtag = recvtag = 7;
// assign tag of 154 to D3Q19 communication
sendtag = recvtag = 154;
ScaLBL_DeviceBarrier();
// Pack the distributions
//...Packing for x face(2,8,10,12,14)................................
ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,&Aq[Component*7*N],N);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0);
//...Packing for X face(1,7,9,11,13)................................
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,&Aq[Component*7*N],N);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1);
//...Packing for y face(4,8,9,16,18).................................
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,&Aq[Component*7*N],N);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2);
//...Packing for Y face(3,7,10,15,17).................................
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,&Aq[Component*7*N],N);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3);
//...Packing for z face(6,12,13,16,17)................................
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,&Aq[Component*7*N],N);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4);
//...Packing for Z face(5,11,14,15,18)................................
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,&Aq[Component*7*N],N);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5);
}
@ -1896,7 +1912,7 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){
Lock=true;
}
// assign tag of 19 to D3Q19 communication
sendtag = recvtag = 15;
sendtag = recvtag = 162;
ScaLBL_DeviceBarrier();
// Pack the distributions
//...Packing for x face(2,8,10,12,14)................................
@ -1926,18 +1942,18 @@ void ScaLBL_Communicator::TriSendD3Q7AA(double *Aq, double *Bq, double *Cq){
//...................................................................................
// Send all the distributions
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x,rank_x,sendtag);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X,rank_X,recvtag);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X,rank_X,sendtag);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x,rank_x,recvtag);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y,rank_y,sendtag);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y,rank_Y,recvtag);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y,rank_Y,sendtag);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y,rank_y,recvtag);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z,rank_z,sendtag);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z,rank_Z,recvtag);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z,rank_Z,sendtag);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z,rank_z,recvtag);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, 3*sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, 3*recvCount_X, rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, 3*sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, 3*recvCount_x, rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, 3*sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, 3*recvCount_Y, rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, 3*sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, 3*recvCount_y, rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, 3*sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, 3*recvCount_Z, rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, 3*sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, 3*recvCount_z, rank_z,recvtag+5);
}
@ -2018,7 +2034,7 @@ void ScaLBL_Communicator::SendHalo(double *data){
}
ScaLBL_DeviceBarrier();
//...................................................................................
sendtag = recvtag = 1;
sendtag = recvtag = 168;
//...................................................................................
ScaLBL_Scalar_Pack(dvcSendList_x, sendCount_x,sendbuf_x, data, N);
ScaLBL_Scalar_Pack(dvcSendList_y, sendCount_y,sendbuf_y, data, N);
@ -2042,42 +2058,42 @@ void ScaLBL_Communicator::SendHalo(double *data){
// Send / Recv all the phase indcator field values
//...................................................................................
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag);
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy,rank_xy,sendtag);
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY,rank_XY,recvtag);
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY,rank_XY,sendtag);
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy,rank_xy,recvtag);
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy,rank_Xy,sendtag);
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY,rank_xY,recvtag);
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY,rank_xY,sendtag);
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy,rank_Xy,recvtag);
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz,rank_xz,sendtag);
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ,rank_XZ,recvtag);
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ,rank_XZ,sendtag);
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz,rank_xz,recvtag);
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz,rank_Xz,sendtag);
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ,rank_xZ,recvtag);
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ,rank_xZ,sendtag);
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz,rank_Xz,recvtag);
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz,rank_yz,sendtag);
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ,rank_YZ,recvtag);
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ,rank_YZ,sendtag);
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz,rank_yz,recvtag);
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz,rank_Yz,sendtag);
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ,rank_yZ,recvtag);
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ,rank_yZ,sendtag);
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz,rank_Yz,recvtag);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x, rank_x,sendtag+0);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X, rank_X,recvtag+0);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X, rank_X,sendtag+1);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x, rank_x,recvtag+1);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y, rank_y,sendtag+2);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y, rank_Y,recvtag+2);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y, rank_Y,sendtag+3);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y, rank_y,recvtag+3);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z, rank_z,sendtag+4);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z, rank_Z,recvtag+4);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z, rank_Z,sendtag+5);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z, rank_z,recvtag+5);
req1[6] = MPI_COMM_SCALBL.Isend(sendbuf_xy, sendCount_xy, rank_xy,sendtag+6);
req2[6] = MPI_COMM_SCALBL.Irecv(recvbuf_XY, recvCount_XY, rank_XY,recvtag+6);
req1[7] = MPI_COMM_SCALBL.Isend(sendbuf_XY, sendCount_XY, rank_XY,sendtag+7);
req2[7] = MPI_COMM_SCALBL.Irecv(recvbuf_xy, recvCount_xy, rank_xy,recvtag+7);
req1[8] = MPI_COMM_SCALBL.Isend(sendbuf_Xy, sendCount_Xy, rank_Xy,sendtag+8);
req2[8] = MPI_COMM_SCALBL.Irecv(recvbuf_xY, recvCount_xY, rank_xY,recvtag+8);
req1[9] = MPI_COMM_SCALBL.Isend(sendbuf_xY, sendCount_xY, rank_xY,sendtag+9);
req2[9] = MPI_COMM_SCALBL.Irecv(recvbuf_Xy, recvCount_Xy, rank_Xy,recvtag+9);
req1[10] = MPI_COMM_SCALBL.Isend(sendbuf_xz, sendCount_xz, rank_xz,sendtag+10);
req2[10] = MPI_COMM_SCALBL.Irecv(recvbuf_XZ, recvCount_XZ, rank_XZ,recvtag+10);
req1[11] = MPI_COMM_SCALBL.Isend(sendbuf_XZ, sendCount_XZ, rank_XZ,sendtag+11);
req2[11] = MPI_COMM_SCALBL.Irecv(recvbuf_xz, recvCount_xz, rank_xz,recvtag+11);
req1[12] = MPI_COMM_SCALBL.Isend(sendbuf_Xz, sendCount_Xz, rank_Xz,sendtag+12);
req2[12] = MPI_COMM_SCALBL.Irecv(recvbuf_xZ, recvCount_xZ, rank_xZ,recvtag+12);
req1[13] = MPI_COMM_SCALBL.Isend(sendbuf_xZ, sendCount_xZ, rank_xZ,sendtag+13);
req2[13] = MPI_COMM_SCALBL.Irecv(recvbuf_Xz, recvCount_Xz, rank_Xz,recvtag+13);
req1[14] = MPI_COMM_SCALBL.Isend(sendbuf_yz, sendCount_yz, rank_yz,sendtag+14);
req2[14] = MPI_COMM_SCALBL.Irecv(recvbuf_YZ, recvCount_YZ, rank_YZ,recvtag+14);
req1[15] = MPI_COMM_SCALBL.Isend(sendbuf_YZ, sendCount_YZ, rank_YZ,sendtag+15);
req2[15] = MPI_COMM_SCALBL.Irecv(recvbuf_yz, recvCount_yz, rank_yz,recvtag+15);
req1[16] = MPI_COMM_SCALBL.Isend(sendbuf_Yz, sendCount_Yz, rank_Yz,sendtag+16);
req2[16] = MPI_COMM_SCALBL.Irecv(recvbuf_yZ, recvCount_yZ, rank_yZ,recvtag+16);
req1[17] = MPI_COMM_SCALBL.Isend(sendbuf_yZ, sendCount_yZ, rank_yZ,sendtag+17);
req2[17] = MPI_COMM_SCALBL.Irecv(recvbuf_Yz, recvCount_Yz, rank_Yz,recvtag+17);
//...................................................................................
}
void ScaLBL_Communicator::RecvHalo(double *data){

View File

@ -217,6 +217,25 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int
*/
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz);
// MEMBRANE MODEL
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef, double *dist, double *Den, int memLinks, int Np);
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np);
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz);
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
double *recvbuf, double *dist, int N, double *coef);
// GREYSCALE MODEL (Single-component)
extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *Dist, int Np, double Den);
@ -262,6 +281,10 @@ extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor_CP(int *d_neighborList, int *M
//extern "C" void ScaLBL_Update_GreyscalePotential(int *Map, double *Phi, double *Psi, double *Poro, double *Perm, double alpha, double W,
// int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np);
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np);
// ION TRANSPORT MODEL
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np);
@ -278,7 +301,8 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDi
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np);
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np);
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np);
// LBM Poisson solver
@ -349,7 +373,22 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d
* @param finish - lattice node to finish loop
* @param Np - size of local sub-domain (derived from Domain structure)
*/
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np);
// LBM Stokes Model (adapted from MRT model)
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB,
@ -702,6 +741,14 @@ public:
double GetPerformance(int *NeighborList, double *fq, int Np);
int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np, int width);
/**
* \brief Create membrane data structure
* - cut lattice links based on distance map
* @param Distance - signed distance to membrane
* @param neighborList - data structure that retains lattice links
* @param Np - number of lattice sites
* @param width - halo width for the model
*/
void Barrier(){
ScaLBL_DeviceBarrier();
MPI_COMM_SCALBL.barrier();
@ -782,7 +829,6 @@ private:
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
//......................................................................................
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
@ -799,6 +845,12 @@ private:
int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z;
int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
// MPI requests for persistent communications
std::vector<std::shared_ptr<MPI_Request>> req_D3Q19AA;
std::vector<std::shared_ptr<MPI_Request>> req_BiD3Q19AA;
std::vector<std::shared_ptr<MPI_Request>> req_TriD3Q19AA;
void start( std::vector<std::shared_ptr<MPI_Request>>& requests );
void wait( std::vector<std::shared_ptr<MPI_Request>>& requests );
//......................................................................................
int *bb_dist;
int *bb_interactions;

View File

@ -69,7 +69,7 @@ void Utilities::startup(int argc, char **argv, bool multiple) {
"thread support, thread support will be disabled"
<< std::endl;
}
StackTrace::globalCallStackInitialize(MPI_COMM_WORLD);
//StackTrace::globalCallStackInitialize(MPI_COMM_WORLD);
} else {
MPI_Init(&argc, &argv);
}
@ -86,7 +86,7 @@ void Utilities::shutdown() {
int rank = 0;
#ifdef USE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
StackTrace::globalCallStackFinalize();
//StackTrace::globalCallStackFinalize();
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
#endif

View File

@ -173,8 +173,7 @@
_Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \
_Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wterminate\"" )
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" )
#define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" )
#else
#define DISABLE_WARNINGS

View File

@ -48,6 +48,7 @@ extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count,
}
}
extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np) {
int n;
for (n = 0; n < Np; n++) {
@ -1883,7 +1884,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborList, double *dist,
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char *ID, double *dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAeven_Compact(double *dist, int Np) {
for (int n = 0; n < Np; n++) {
@ -1941,7 +1942,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Compact(char *ID, double *dist, int Np) {
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char *ID, int *neighborList,
extern "C" void ScaLBL_D3Q19_AAodd_Compact(int *neighborList,
double *dist, int Np) {
int nread;

View File

@ -1,4 +1,161 @@
#include <stdio.h>
#include <math.h>
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
int link,iq,ip,nq,np,nqm,npm;
double aq, ap, membranePotential;
//double dq, dp, dist, orientation;
/* Interior Links */
for (link=0; link<memLinks; link++){
// inside //outside
aq = MassFractionIn; ap = MassFractionOut;
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
nqm = Map[nq]; npm = Map[np]; // strided layout
//dq = Distance[nqm]; dp = Distance[npm];
/* orientation for link to distance gradient*/
//orientation = 1.0/fabs(dq - dp);
/* membrane potential for this link */
membranePotential = Psi[nqm] - Psi[npm];
if (membranePotential > Threshold){
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
}
/* Save the mass transfer coefficients */
//coef[2*link] = aq*orientation; coef[2*link+1] = ap*orientation;
coef[2*link] = aq; coef[2*link+1] = ap;
}
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link, nqm, npm, i, j, k;
double distanceLocal, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient
/* second enforce custom rule for membrane links */
for (link = nlinks; link < count; link++) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
// get the distribution index
n = d3q7_recvlist[start+idx];
// get the index in strided layout
nqm = Map[n];
distanceLocal = Distance[nqm];
psiLocal = Psi[nqm];
// Get the 3-D indices from the send process
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
// Streaming link the non-local distribution
i -= Cqx; j -= Cqy; k -= Cqz;
npm = k*Nx*Ny + j*Nx + i;
distanceNonlocal = Distance[npm];
psiNonlocal = Psi[npm];
membranePotential = psiLocal - psiNonlocal;
aq = MassFractionIn;
ap = MassFractionOut;
/* link is inside membrane */
if (distanceLocal > 0.0){
if (membranePotential < Threshold*(-1.0)){
ap = MassFractionIn;
aq = MassFractionOut;
}
else {
ap = ThresholdMassFractionIn;
aq = ThresholdMassFractionOut;
}
}
else if (membranePotential > Threshold){
aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut;
}
// update link based on mass transfer coefficients
coef[2*(link-nlinks)] = aq;
coef[2*(link-nlinks)+1] = ap;
}
}
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
double *recvbuf, double *dist, int N, double *coef) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double fq,fp,fqq,ap,aq; // coefficient
/* First unpack the regular links */
for (link = 0; link < nlinks; link++) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link];
// get the distribution index
n = d3q7_recvlist[start+idx];
if (!(n < 0)){
fp = recvbuf[start + idx];
dist[q * N + n] = fp;
}
//printf(" site=%i, index=%i, value = %e \n",n,idx,fp);
}
/* second enforce custom rule for membrane links */
for (link = nlinks; link < count; link++) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link];
// get the distribution index
n = d3q7_recvlist[start+idx];
// update link based on mass transfer coefficients
if (!(n < 0)){
aq = coef[2*(link-nlinks)];
ap = coef[2*(link-nlinks)+1];
fq = dist[q * N + n];
fp = recvbuf[start + idx];
fqq = (1-aq)*fq+ap*fp;
dist[q * N + n] = fqq;
}
//printf(" LINK: site=%i, index=%i \n", n, idx);
}
}
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
int link,iq,ip,nq,np;
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
for (link=0; link<memLinks; link++){
// inside //outside
aq = coef[2*link]; ap = coef[2*link+1];
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
fq = dist[iq]; fp = dist[ip];
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
Cq = Den[nq]; Cp = Den[np];
Cq += fqq - fq; Cp += fpp - fp;
Den[nq] = Cq; Den[np] = Cp;
dist[iq] = fqq; dist[ip] = fpp;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList,
double *dist, double *Den,
@ -99,12 +256,12 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
double X,Y,Z,factor_x, factor_y, factor_z;
int nr1, nr2, nr3, nr4, nr5, nr6;
for (n = start; n < finish; n++) {
//Load data
Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
@ -137,6 +294,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
f6 = dist[nr6];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
@ -149,33 +307,50 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
@ -190,11 +365,12 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
double X,Y,Z, factor_x, factor_y, factor_z;
for (n = start; n < finish; n++) {
//Load data
Ci = Den[n];
//Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
@ -214,6 +390,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
f6 = dist[5 * Np + n];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
@ -226,33 +403,49 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
}
}

42
cpu/MembraneHelper.cpp Normal file
View File

@ -0,0 +1,42 @@
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
double *recvbuf, double *dist, int N) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
for (link=0; link<linkCount; link++){
idx = links[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = recvbuf[start + idx];
}
}
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double alpha;
for (link=offset; link<linkCount; link++){
idx = list[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
alpha = coef[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = alpha*recvbuf[start + idx];
}
}

View File

@ -1,3 +1,4 @@
#include <math.h>
extern "C" void
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map,
@ -150,25 +151,25 @@ extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map,
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi + rho_e);
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi) - rho_e;
// q = 1
dist[nr2] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[nr2] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 2
dist[nr1] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[nr1] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 3
dist[nr4] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[nr4] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 4
dist[nr3] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[nr3] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 5
dist[nr6] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[nr6] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 6
dist[nr5] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[nr5] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
//........................................................................
}
}
@ -213,25 +214,25 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist,
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi + rho_e);
dist[n] = f0 * (1.0 - rlx) + 0.25 * (rlx * psi) - rho_e;
// q = 1
dist[1 * Np + n] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[1 * Np + n] = f1 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 2
dist[2 * Np + n] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[2 * Np + n] = f2 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 3
dist[3 * Np + n] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[3 * Np + n] = f3 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 4
dist[4 * Np + n] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[4 * Np + n] = f4 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 5
dist[5 * Np + n] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[5 * Np + n] = f5 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
// q = 6
dist[6 * Np + n] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi + rho_e);
dist[6 * Np + n] = f6 * (1.0 - rlx) + 0.125 * (rlx * psi) - rho_e;
//........................................................................
}
}
@ -444,34 +445,503 @@ extern "C" void ScaLBL_D3Q7_PoissonResidualError(
// }
//}
//extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
// int n;
// // distributions
// double f1,f2,f3,f4,f5,f6;
// double Ex,Ey,Ez;
// double rlx=1.0/tau;
//
// for (n=0; n<Np; n++){
// //........................................................................
// // Registers to store the distributions
// //........................................................................
// f1 = dist[Np+n];
// f2 = dist[2*Np+n];
// f3 = dist[3*Np+n];
// f4 = dist[4*Np+n];
// f5 = dist[5*Np+n];
// f6 = dist[6*Np+n];
// //.................Compute the Electric Field...................................
// //Ex = (f1-f2)*rlx*4.5;//NOTE the unit of electric field here is V/lu
// //Ey = (f3-f4)*rlx*4.5;
// //Ez = (f5-f6)*rlx*4.5;
// Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
// Ey = (f3-f4)*rlx*4.0;
// Ez = (f5-f6)*rlx*4.0;
// //..................Write the Electric Field.....................................
// ElectricField[0*Np+n] = Ex;
// ElectricField[1*Np+n] = Ey;
// ElectricField[2*Np+n] = Ez;
// //........................................................................
// }
//}
extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
int n;
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double Ex,Ey,Ez;
double rlx=1.0/tau;
for (n=0; n<Np; n++){
//........................................................................
// Registers to store the distributions
//........................................................................
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
//.................Compute the Electric Field...................................
Ex = (f1 - f2 + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14)*rlx*3.0;//NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18)*rlx*3.0;
Ez = (f5 - f6 + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18)*rlx*3.0;
//..................Write the Electric Field.....................................
ElectricField[0*Np+n] = Ex;
ElectricField[1*Np+n] = Ey;
ElectricField[2*Np+n] = Ez;
//........................................................................
}
}
extern "C" void
ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map,
double *dist, double *Den_charge, double *Psi,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi,sum; //electric potential
double rho_e; //local charge density
double Gs;
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
int idx;
for (n = start; n < finish; n++) {
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
idx = Map[n];
Psi[idx] = psi - 0.5*rho_e;
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(
int *Map, double *dist, double *Den_charge, double *Psi, double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np) {
int n;
double psi,sum; //electric potential
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double Gs;
int idx;
for (n = start; n < finish; n++) {
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
//........................................................................
// q=0
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
idx = Map[n];
Psi[idx] = psi - 0.5*rho_e;
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
for (n = start; n < finish; n++) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
// q = 1
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
//........................................................................
// q = 7
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 8
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 9
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 10
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 11
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 12
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 13
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q= 14
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 15
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 16
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 17
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 18
dist[nr17] = W2*psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
for (n = start; n < finish; n++) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
/* Ex = (f1 - f2) * rlx *
4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4) * rlx *
4.0; //factor 4.0 is D3Q7 lattice squared speed of sound
Ez = (f5 - f6) * rlx * 4.0;
*/
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
Error[n] = error;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
// q = 0
dist[n] = W0*psi;//
// q = 1
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
//........................................................................
}
}
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np) {
int n;
int ijk;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
for (n = start; n < finish; n++) {
ijk = Map[n];
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
dist[1 * Np + n] = W1 * Psi[ijk];
dist[2 * Np + n] = W1 * Psi[ijk];
dist[3 * Np + n] = W1 * Psi[ijk];
dist[4 * Np + n] = W1 * Psi[ijk];
dist[5 * Np + n] = W1 * Psi[ijk];
dist[6 * Np + n] = W1 * Psi[ijk];
dist[7 * Np + n] = W2* Psi[ijk];
dist[8 * Np + n] = W2* Psi[ijk];
dist[9 * Np + n] = W2* Psi[ijk];
dist[10 * Np + n] = W2* Psi[ijk];
dist[11 * Np + n] = W2* Psi[ijk];
dist[12 * Np + n] = W2* Psi[ijk];
dist[13 * Np + n] = W2* Psi[ijk];
dist[14 * Np + n] = W2* Psi[ijk];
dist[15 * Np + n] = W2* Psi[ijk];
dist[16 * Np + n] = W2* Psi[ijk];
dist[17 * Np + n] = W2* Psi[ijk];
dist[18 * Np + n] = W2* Psi[ijk];
}
}

View File

@ -1,7 +1,7 @@
#include <stdio.h>
#define NBLOCKS 1024
#define NTHREADS 256
#define NTHREADS 512
__global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;

View File

@ -290,7 +290,7 @@ __global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *distev
//__launch_bounds__(512,4)
__global__ void
dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
dvc_ScaLBL_AAodd_Compact(int *d_neighborList, double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -1321,7 +1321,7 @@ dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_se
//__launch_bounds__(512,4)
__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *dist, int Np) {
__global__ void dvc_ScaLBL_AAeven_Compact( double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -2390,18 +2390,18 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np) {
cudaFuncSetCacheConfig(dvc_ScaLBL_AAeven_Compact, cudaFuncCachePreferL1);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_dist, Np);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(d_dist, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np) {
cudaFuncSetCacheConfig(dvc_ScaLBL_AAodd_Compact, cudaFuncCachePreferL1);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(ID,d_neighborList, d_dist,Np);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(d_neighborList, d_dist,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));

View File

@ -6,6 +6,16 @@
#define NTHREADS 256
#define CHECK_ERROR(KERNEL) \
do { \
auto err = cudaGetLastError(); \
if ( cudaSuccess != err ){ \
auto errString = cudaGetErrorString(err); \
printf("error in %s (kernel): %s \n",KERNEL,errString); \
} \
} while(0)
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
@ -740,28 +750,19 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighbor
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_Dirichlet_D3Q7");
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_Neumann_D3Q7");
}
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_DirichletAndNeumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_DirichletAndNeumann_D3Q7");
}
extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
@ -775,211 +776,142 @@ extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta
BounceBackDist_list, BounceBackSolid_list, FluidBoundary_list,
lattice_weight, lattice_cx, lattice_cy, lattice_cz,
count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_SlippingVelocityBC_D3Q19 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_SlippingVelocityBC_D3Q19");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z");
}
//------------Diff-----------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z");
}
//----------DiffAdvc-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z");
}
//----------DiffAdvcElec-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z");
}
//-------------------------------

View File

@ -5,6 +5,224 @@
#define NBLOCKS 1024
#define NTHREADS 256
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
double *recvbuf, double *dist, int N) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
for (link=0; link<linkCount; link++){
idx = links[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = recvbuf[start + idx];
}
}
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double alpha;
for (link=offset; link<linkCount; link++){
idx = list[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
alpha = coef[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = alpha*recvbuf[start + idx];
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
int link,iq,ip,nq,np,nqm,npm;
double aq, ap, membranePotential;
/* Interior Links */
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks) {
// inside //outside
aq = MassFractionIn; ap = MassFractionOut;
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
nqm = Map[nq]; npm = Map[np]; // strided layout
/* membrane potential for this link */
membranePotential = Psi[nqm] - Psi[npm];
if (membranePotential > Threshold){
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
}
/* Save the mass transfer coefficients */
coef[2*link] = aq; coef[2*link+1] = ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link, nqm, npm, i, j, k;
double distanceLocal, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks;
if (link < count) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
// get the distribution index
n = d3q7_recvlist[start+idx];
// get the index in strided layout
nqm = Map[n];
distanceLocal = Distance[nqm];
psiLocal = Psi[nqm];
// Get the 3-D indices from the send process
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
// Streaming link the non-local distribution
i -= Cqx; j -= Cqy; k -= Cqz;
npm = k*Nx*Ny + j*Nx + i;
distanceNonlocal = Distance[npm];
psiNonlocal = Psi[npm];
membranePotential = psiLocal - psiNonlocal;
aq = MassFractionIn;
ap = MassFractionOut;
/* link is inside membrane */
if (distanceLocal > 0.0){
if (membranePotential < Threshold*(-1.0)){
ap = MassFractionIn;
aq = MassFractionOut;
}
else {
ap = ThresholdMassFractionIn;
aq = ThresholdMassFractionOut;
}
}
else if (membranePotential > Threshold){
aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut;
}
// update link based on mass transfer coefficients
coef[2*(link-nlinks)] = aq;
coef[2*(link-nlinks)+1] = ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
double *recvbuf, double *dist, int N, double *coef) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double fq,fp,fqq,ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = count/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
/* First unpack the regular links */
if (link < nlinks) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link];
// get the distribution index
n = d3q7_recvlist[start+idx];
if (!(n < 0)){
fp = recvbuf[start + idx];
dist[q * N + n] = fp;
}
}
else if (link < count){
/* second enforce custom rule for membrane links */
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link];
// get the distribution index
n = d3q7_recvlist[start+idx];
// update link based on mass transfer coefficients
if (!(n < 0)){
aq = coef[2*(link-nlinks)];
ap = coef[2*(link-nlinks)+1];
fq = dist[q * N + n];
fp = recvbuf[start + idx];
fqq = (1-aq)*fq+ap*fp;
dist[q * N + n] = fqq;
}
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
int link,iq,ip,nq,np;
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks){
// inside //outside
aq = coef[2*link]; ap = coef[2*link+1];
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
fq = dist[iq]; fp = dist[ip];
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
Cq = Den[nq]; Cp = Den[np];
Cq += fqq - fq; Cp += fpp - fp;
Den[nq] = Cq; Den[np] = Cp;
dist[iq] = fqq; dist[ip] = fpp;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
@ -106,6 +324,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
@ -114,80 +333,96 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
//Load data
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
@ -201,6 +436,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *F
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
@ -208,67 +444,83 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *F
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
//Load data
//Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
}
}
}
@ -314,7 +566,7 @@ __global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, in
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
@ -327,10 +579,17 @@ __global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDe
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
if (ion_component == 0) CD=0.0;
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
ChargeDensity[n] = CD + CD_tmp;
// Ci = Den[n+ion_component*Np];
// CD = ChargeDensity[n];
// CD_tmp = F*IonValence*Ci;
// ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
}
}
}
@ -408,7 +667,7 @@ extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np)
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
@ -419,3 +678,61 @@ extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef<<<NBLOCKS,NTHREADS >>>(membrane, Map, Distance, Psi, coef,
Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
memLinks, Nx, Ny, Nz, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo<<<NBLOCKS,NTHREADS >>>(
Cqx, Cqy, Cqz, Map, Distance, Psi, Threshold,
MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
d3q7_recvlist, d3q7_linkList, coef, start, nlinks, count, N, Nx, Ny, Nz);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
double *recvbuf, double *dist, int N, double *coef) {
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<NBLOCKS,NTHREADS >>>(q, d3q7_recvlist, d3q7_linkList, start, nlinks, count,
recvbuf, dist, N, coef) ;
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_Unpack: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
dvc_ScaLBL_D3Q7_Membrane_IonTransport<<<NBLOCKS,NTHREADS >>>(membrane, coef, dist, Den, memLinks, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_IonTransport: %s \n",cudaGetErrorString(err));
}
}

View File

@ -4,8 +4,8 @@
//*************************************************************************
#include <cuda.h>
#define NBLOCKS 560
#define NTHREADS 128
#define NBLOCKS 1024
#define NTHREADS 512
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{

View File

@ -271,6 +271,413 @@ __global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Ps
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(
int *Map, double *dist, double *Den_charge, double *Psi, double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np) {
int n;
double psi,sum; //electric potential
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double Gs;
int idx;
for (n = start; n < finish; n++) {
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
//........................................................................
// q=0
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
idx = Map[n];
Psi[idx] = psi - 0.5*rho_e;
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
// q = 1
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
//........................................................................
// q = 7
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 8
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 9
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 10
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 11
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 12
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 13
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q= 14
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 15
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 16
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 17
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 18
dist[nr17] = W2*psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
// q = 0
dist[n] = W0*psi;//
// q = 1
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np) {
int n;
int ijk;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
dist[1 * Np + n] = W1 * Psi[ijk];
dist[2 * Np + n] = W1 * Psi[ijk];
dist[3 * Np + n] = W1 * Psi[ijk];
dist[4 * Np + n] = W1 * Psi[ijk];
dist[5 * Np + n] = W1 * Psi[ijk];
dist[6 * Np + n] = W1 * Psi[ijk];
dist[7 * Np + n] = W2* Psi[ijk];
dist[8 * Np + n] = W2* Psi[ijk];
dist[9 * Np + n] = W2* Psi[ijk];
dist[10 * Np + n] = W2* Psi[ijk];
dist[11 * Np + n] = W2* Psi[ijk];
dist[12 * Np + n] = W2* Psi[ijk];
dist[13 * Np + n] = W2* Psi[ijk];
dist[14 * Np + n] = W2* Psi[ijk];
dist[15 * Np + n] = W2* Psi[ijk];
dist[16 * Np + n] = W2* Psi[ijk];
dist[17 * Np + n] = W2* Psi[ijk];
dist[18 * Np + n] = W2* Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList, Map,
dist, Den_charge, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q19_AAodd_Poisson: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
dvc_ScaLBL_D3Q19_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>( Map, dist, Den_charge, Psi,
ElectricField, Error, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q19_AAeven_Poisson: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map, dist, Psi, start, finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Poisson_Init: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();

View File

@ -0,0 +1,18 @@
import numpy as np
import matplotlib.pylab as plt
D=np.ones((40,40,40),dtype="uint8")
cx = 20
cy = 20
cz = 20
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
if (dist < 12.5 ) :
D[i,j,k] = 2
D.tofile("bubble_40x40x40.raw")

View File

@ -0,0 +1,77 @@
import numpy as np
import matplotlib.pylab as plt
D=np.ones((40,40,40),dtype="uint8")
cx = 20
cy = 20
cz = 20
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
if (dist < 15.5 ) :
D[i,j,k] = 2
D.tofile("cell_40x40x40.raw")
C1=np.zeros((40,40,40),dtype="double")
C2=np.zeros((40,40,40),dtype="double")
C3=np.zeros((40,40,40),dtype="double")
C4=np.zeros((40,40,40),dtype="double")
C5=np.zeros((40,40,40),dtype="double")
C6=np.zeros((40,40,40),dtype="double")
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
#outside the cell
C1[i,j,k] = 4.0e-6 # K
C2[i,j,k] = 150.0e-6 # Na
C3[i,j,k] = 116.0e-6 # Cl
C4[i,j,k] = 29.0e-6 # HC03
#C5[i,j,k] = 2.4e-6 # Ca
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
# inside the cell
if (dist < 15.5 ) :
C1[i,j,k] = 145.0e-6
C2[i,j,k] = 12.0e-6
C3[i,j,k] = 4.0e-6
C4[i,j,k] = 12.0e-6 # 12 mmol / L
#C5[i,j,k] = 0.10e-6 # 100 nmol / L
# add up the total charge to make sure it is zero
TotalCharge = 0
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
TotalCharge += C1[i,j,k] + C2[i,j,k] - C3[i,j,k] - C4[i,j,k]
TotalCharge /= (40*40*40)
print("Total charge " + str(TotalCharge))
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
if TotalCharge < 0 :
# need more cation
C5[i,j,k] = abs(TotalCharge)
C6[i,j,k] = 0.0
else :
# need more anion
C5[i,j,k] = 0.0
C6[i,j,k] = abs(TotalCharge)
C1.tofile("cell_concentration_K_40x40x40.raw")
C2.tofile("cell_concentration_Na_40x40x40.raw")
C3.tofile("cell_concentration_Cl_40x40x40.raw")
C4.tofile("cell_concentration_HCO3_40x40x40.raw")
C5.tofile("cell_concentration_cation_40x40x40.raw")
C6.tofile("cell_concentration_anion_40x40x40.raw")

75
example/Bubble/cell.db Normal file
View File

@ -0,0 +1,75 @@
MultiphysController {
timestepMax = 60
num_iter_Ion_List = 2
analysis_interval = 50
tolerance = 1.0e-9
visualization_interval = 100 // Frequency to write visualization data
analysis_interval = 50 // Frequency to perform analysis
}
Stokes {
tau = 1.0
F = 0, 0, 0
ElectricField = 0, 0, 0 //body electric field; user-input unit: [V/m]
nu_phys = 0.889e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
}
Ions {
IonConcentrationFile = "cell_concentration_K_40x40x40.raw", "double", "cell_concentration_Na_40x40x40.raw", "double", "cell_concentration_Cl_40x40x40.raw", "double", "cell_concentration_HCO3_40x40x40.raw", "double", "cell_concentration_anion_40x40x40.raw", "double", "cell_concentration_cation_40x40x40.raw", "double"
temperature = 293.15 //unit [K]
number_ion_species = 6 //number of ions
tauList = 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
IonDiffusivityList = 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9 //user-input unit: [m^2/sec]
IonValenceList = 1, 1, -1, -1, 1, -1 //valence charge of ions; dimensionless; positive/negative integer
IonConcentrationList = 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6 //user-input unit: [mol/m^3]
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
//SolidLabels = 0 //solid labels for assigning solid boundary condition; ONLY for BC_Solid=1
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
FluidVelDummy = 0.0, 0.0, 1.0e-2 // dummy fluid velocity for debugging
}
Poisson {
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
BC_Solid = 2 //solid boundary condition; 1=surface potential; 2=surface charge density
SolidLabels = 0 //solid labels for assigning solid boundary condition
SolidValues = 0 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
WriteLog = true //write convergence log for LB-Poisson solver
// ------------------------------- Testing Utilities ----------------------------------------
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
TestPeriodic = false
TestPeriodicTime = 1.0 //unit:[sec]
TestPeriodicTimeConv = 0.01 //unit:[sec]
TestPeriodicSaveInterval = 0.2 //unit:[sec]
//------------------------------ advanced setting ------------------------------------
timestepMax = 100000 //max timestep for obtaining steady-state electrical potential
analysis_interval = 200 //timestep checking steady-state convergence
tolerance = 1.0e-6 //stopping criterion for steady-state solution
}
Domain {
Filename = "cell_40x40x40.raw"
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 40, 40, 40 // Size of local domain (Nx,Ny,Nz)
N = 40, 40, 40 // size of the input image
voxel_length = 1.0 //resolution; user-input unit: [um]
BC = 0 // Boundary condition type
ReadType = "8bit"
ReadValues = 0, 1, 2
WriteValues = 0, 1, 2
}
Analysis {
analysis_interval = 100
subphase_analysis_interval = 50 // Frequency to perform analysis
restart_interval = 5000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_electric_potential = true
save_concentration = true
save_velocity = true
}
Membrane {
MembraneLabels = 2
}

View File

@ -0,0 +1,41 @@
import numpy as np
import matplotlib.pylab as plt
Nx = 64
Ny = 64
Nz = 64
cx = Nx/2
cy = Ny/2
cz = Nz/2
radius = 12
D=np.ones((Nx,Ny,Nz),dtype="uint8")
for i in range(0, Nx):
for j in range (0, Ny):
for k in range (0,Nz):
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
if (dist < radius ) :
D[i,j,k] = 2
D.tofile("cell_64x64x64.raw")
C1=np.zeros((Nx,Ny,Nz),dtype="double")
C2=np.zeros((Nx,Ny,Nz),dtype="double")
for i in range(0, Nx):
for j in range (0, Ny):
for k in range (0,Nz):
#outside the cell
C1[i,j,k] = 125.0e-6 # Na
C2[i,j,k] = 125.0e-6 # Cl
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
# inside the cell
if (dist < radius ) :
C1[i,j,k] = 110.0e-6
C2[i,j,k] = 110.0e-6
C1.tofile("cell_concentration_Na_64x64x64.raw")
C2.tofile("cell_concentration_Cl_64x64x64.raw")

View File

@ -0,0 +1,74 @@
MultiphysController {
timestepMax = 2000
num_iter_Ion_List = 2
analysis_interval = 40
tolerance = 1.0e-9
visualization_interval = 40 // Frequency to write visualization data
}
Stokes {
tau = 1.0
F = 0, 0, 0
ElectricField = 0, 0, 0 //body electric field; user-input unit: [V/m]
nu_phys = 0.889e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
}
Ions {
IonConcentrationFile = "cell_concentration_Na_64x64x64.raw", "double", "cell_concentration_Cl_64x64x64.raw", "double"
temperature = 293.15 //unit [K]
number_ion_species = 2 //number of ions
tauList = 1.0, 1.0
IonDiffusivityList = 1.0e-9, 1.0e-9 //user-input unit: [m^2/sec]
IonValenceList = 1, -1 //valence charge of ions; dimensionless; positive/negative integer
IonConcentrationList = 1.0e-6, 1.0e-6 //user-input unit: [mol/m^3]
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
//SolidLabels = 0 //solid labels for assigning solid boundary condition; ONLY for BC_Solid=1
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
FluidVelDummy = 0.0, 0.0, 0.0 // dummy fluid velocity for debugging
}
Poisson {
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
BC_Solid = 2 //solid boundary condition; 1=surface potential; 2=surface charge density
SolidLabels = 0 //solid labels for assigning solid boundary condition
SolidValues = 0 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
WriteLog = true //write convergence log for LB-Poisson solver
// ------------------------------- Testing Utilities ----------------------------------------
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
TestPeriodic = false
TestPeriodicTime = 1.0 //unit:[sec]
TestPeriodicTimeConv = 0.01 //unit:[sec]
TestPeriodicSaveInterval = 0.2 //unit:[sec]
//------------------------------ advanced setting ------------------------------------
timestepMax = 4000 //max timestep for obtaining steady-state electrical potential
analysis_interval = 25 //timestep checking steady-state convergence
tolerance = 1.0e-10 //stopping criterion for steady-state solution
}
Domain {
Filename = "cell_64x64x64.raw"
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 64, 64, 64 // Size of local domain (Nx,Ny,Nz)
N = 64, 64, 64 // size of the input image
voxel_length = 0.1 //resolution; user-input unit: [um]
BC = 0 // Boundary condition type
ReadType = "8bit"
ReadValues = 0, 1, 2
WriteValues = 0, 1, 2
}
Analysis {
analysis_interval = 100
subphase_analysis_interval = 50 // Frequency to perform analysis
restart_interval = 5000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_electric_potential = true
save_concentration = true
save_velocity = true
}
Membrane {
MembraneLabels = 2
}

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J Color-dense
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/bin
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/lbpm_color_simulator input.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J MRT-a2
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/bin
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/lbpm_permeability_simulator input.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,69 @@
MRT {
timestepMax = 10000
analysis_interval = 20000
tau = 0.7
F = 0, 0, 5.0e-5
Restart = false
din = 1.0
dout = 1.0
flux = 0.0
}
Color {
tauA = 0.7;
tauB = 0.7;
rhoA = 1.0;
rhoB = 1.0;
alpha = 1e-2;
beta = 0.95;
F = 0, 0, 1.0e-5
Restart = false
flux = 0.0 // voxels per timestep
timestepMax = 10000
// rescale_force_after_timestep = 100000
ComponentLabels = 0, -1, -2
ComponentAffinity = -1.0, -1.0, -0.9
// protocol = "image sequence"
// capillary_number = 1e-5
}
Domain {
Filename = "a2_2048x2048x8192.raw"
nproc = 2, 2, 2 // Number of processors (Npx,Npy,Npz)
offset = 0, 0, 0
n = 382, 382, 382 // Size of local domain (Nx,Ny,Nz)
N = 2048, 2048, 1024 // size of the input image
voxel_length = 1.0 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
//Sw = 0.2
ReadType = "8bit"
ReadValues = 0, 1, 2, -1, -2
WriteValues = 0, 1, 2, -1, -2
ComponentLabels = 0, -1, -2
InletLayers = 0, 0, 5
OutletLayers = 0, 0, 5
}
Analysis {
visualization_interval = 1000000
//morph_interval = 100000
//morph_delta = -0.08
analysis_interval = 20000 // Frequency to perform analysis
min_steady_timesteps = 15000000
max_steady_timesteps = 15000000
restart_interval = 500000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 0 // Number of threads to use
load_balance = "default" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_8bit_raw = true
write_silo = true
}
FlowAdaptor {
}

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J MPI-multinode
#SBATCH -o %x-%j.out
#SBATCH -t 6:00:00
#SBATCH -p batch
#SBATCH -N 8
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/tests
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57"
srun --verbose -N8 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/TestCommD3Q19 multinode.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J MPI-singlenode
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/tests
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/TestCommD3Q19 multinode.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,9 @@
import numpy as np
N = 1024
data = np.random.randint(low=1,high=3,size=(N,N,N),dtype=np.uint8)
data.tofile("dense_1024x1024x1024.raw")

View File

@ -0,0 +1,69 @@
MRT {
timestepMax = 100
analysis_interval = 20000
tau = 0.7
F = 0, 0, 5.0e-5
Restart = false
din = 1.0
dout = 1.0
flux = 0.0
}
Color {
tauA = 0.7;
tauB = 0.7;
rhoA = 1.0;
rhoB = 1.0;
alpha = 1e-2;
beta = 0.95;
F = 0, 0, 0.0
Restart = false
flux = 0.0 // voxels per timestep
timestepMax = 10
// rescale_force_after_timestep = 100000
ComponentLabels = 0, -1, -2
ComponentAffinity = -1.0, -1.0, -0.9
// protocol = "image sequence"
// capillary_number = 1e-5
}
Domain {
Filename = "dense_1024x1024x1024.raw"
nproc = 2, 2, 2 // Number of processors (Npx,Npy,Npz)
offset = 0, 0, 0
n = 222, 222, 222 // Size of local domain (Nx,Ny,Nz)
N = 1024, 1024, 1024 // size of the input image
voxel_length = 1.0 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
//Sw = 0.2
ReadType = "8bit"
ReadValues = 0, 1, 2, -1, -2
WriteValues = 0, 1, 2, -1, -2
ComponentLabels = 0, -1, -2
InletLayers = 0, 0, 5
OutletLayers = 0, 0, 5
}
Analysis {
visualization_interval = 1000000
//morph_interval = 100000
//morph_delta = -0.08
analysis_interval = 20000 // Frequency to perform analysis
min_steady_timesteps = 15000000
max_steady_timesteps = 15000000
restart_interval = 500000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 0 // Number of threads to use
load_balance = "default" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_8bit_raw = true
write_silo = true
}
FlowAdaptor {
}

View File

@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J sphere_test
#SBATCH -o %x-%j.out
#SBATCH -t 00:05:00
#SBATCH -p caar
#SBATCH -N 1
module load rocm/4.2.0
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
srun -n1 --ntasks-per-node=1 $LBPM_DIR/GenerateSphereTest input.db

View File

@ -0,0 +1,17 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J sphere_test
#SBATCH -o %x-%j.out
#SBATCH -t 00:05:00
#SBATCH -p caar
#SBATCH -N 1
module load rocm/4.2.0
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
export MPICH_SMP_SINGLE_COPY_MODE=CMA
#srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/lbpm_color_simulator spheres322.db
srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/TestCommD3Q19 spheres322.db

View File

@ -0,0 +1,32 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J sphere_test
#SBATCH -o %x-%j.out
#SBATCH -e %x-%j.err
#SBATCH -t 00:05:00
#SBATCH -p caar
#SBATCH -N 1
module load craype-accel-amd-gfx908
module load PrgEnv-cray
#module load rocm
module load rocm/4.2.0
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
#export MPICH_RDMA_ENABLED_CUDA=1
#export MPICH_ENV_DISPLAY=1
#export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_GPU_NO_ASYNC_MEMCPY=0
export MPICH_SMP_SINGLE_COPY_MODE=CMA
#export MPICH_DBG_FILENAME="./mpich-dbg.log"
export MPICH_DBG_CLASS=ALL
export MPICH_DBG_LEVEL=VERBOSE
export MPICH_DBG=yes
#export PMI_DEBUG=1
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/lbpm_color_simulator spheres322.db
srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 --verbose --export=ALL $LBPM_DIR/TestCommD3Q19 test.db

View File

@ -0,0 +1,54 @@
MRT {
tau = 1.0 // relaxation time
F = 0, 0, 1e-4 // external body force applied to system
timestepMax = 1000 // max number of timesteps
din = 1.0
dout = 1.0
Restart = false
flux = 0.0
}
Color {
tauA = 0.7; // relaxation time for fluid A
tauB = 0.7; // relaxation time for fluid B
rhoA = 1.0; // mass density for fluid A
rhoB = 1.0; // mass density for fluid B
alpha = 1e-3; // controls interfacial tension between fluids
beta = 0.95; // controls interface width
F = 0, 0, 1.0e-5 // external body force applied to the system
Restart = false // restart from checkpoint file?
din = 1.0 // density at inlet (if external BC is applied)
dout = 1.0 // density at outlet (if external BC is applied )
timestepMax = 10 // maximum number of timesteps to simulate
flux = 0.0 // volumetric flux in voxels per timestep (if flux BC is applied)
ComponentLabels = 0 // comma separated list of solid mineral labels
ComponentAffinity = -1.0 // comma separated list of phase indicato field value to assign for each mineral label
}
Domain {
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 318, 320, 320 // Size of local domain (Nx,Ny,Nz)
N = 320, 320, 320
nspheres = 1896 // Number of spheres (only needed if using a sphere packing)
L = 1, 1, 1 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
// BC = 0 for periodic BC
// BC = 1 for pressure BC (applied in z direction)
// BC = 4 for flux BC (applied in z direction
ReadType = "8bit"
ReadValues = 0, 1, 2 // list of labels within the binary file (read)
WriteValues = 0, 1, 2 // list of labels within the output files (write)
}
Analysis {
analysis_interval = 1000 // Frequency to perform analysis
restart_interval = 50000 // Frequency to write restart data
visualization_interval = 50000 // Frequency to write visualization data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
}

View File

View File

@ -0,0 +1,54 @@
MRT {
tau = 1.0 // relaxation time
F = 0, 0, 1e-4 // external body force applied to system
timestepMax = 1000 // max number of timesteps
din = 1.0
dout = 1.0
Restart = false
flux = 0.0
}
Color {
tauA = 0.7; // relaxation time for fluid A
tauB = 0.7; // relaxation time for fluid B
rhoA = 1.0; // mass density for fluid A
rhoB = 1.0; // mass density for fluid B
alpha = 1e-3; // controls interfacial tension between fluids
beta = 0.95; // controls interface width
F = 0, 0, 1.0e-5 // external body force applied to the system
Restart = false // restart from checkpoint file?
din = 1.0 // density at inlet (if external BC is applied)
dout = 1.0 // density at outlet (if external BC is applied )
timestepMax = 10 // maximum number of timesteps to simulate
flux = 0.0 // volumetric flux in voxels per timestep (if flux BC is applied)
ComponentLabels = 0 // comma separated list of solid mineral labels
ComponentAffinity = -1.0 // comma separated list of phase indicato field value to assign for each mineral label
}
Domain {
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 240, 240, 240 // Size of local domain (Nx,Ny,Nz)
N = 320, 320, 320
nspheres = 1896 // Number of spheres (only needed if using a sphere packing)
L = 1, 1, 1 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
// BC = 0 for periodic BC
// BC = 1 for pressure BC (applied in z direction)
// BC = 4 for flux BC (applied in z direction
ReadType = "8bit"
ReadValues = 0, 1, 2 // list of labels within the binary file (read)
WriteValues = 0, 1, 2 // list of labels within the output files (write)
}
Analysis {
analysis_interval = 1000 // Frequency to perform analysis
restart_interval = 50000 // Frequency to write restart data
visualization_interval = 50000 // Frequency to write visualization data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
}

View File

@ -18,9 +18,11 @@
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
#define NTHREADS 512
__global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
__global__ void
__launch_bounds__(512,1) dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;
// conserved momemnts
double rho,ux,uy,uz,uu;
@ -138,7 +140,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish,
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
__global__ void
__launch_bounds__(512,1) dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;
// conserved momemnts
double rho,ux,uy,uz,uu;

View File

@ -1,10 +0,0 @@
SET( HIP_SEPERABLE_COMPILATION ON )
FILE( GLOB HIP_SOURCES "*.cu" )
SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so )
#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip )
#ADD_DEPENDENCIES( lbpm-hip copy-include )

View File

@ -21,6 +21,21 @@
#define NBLOCKS 1024
#define NTHREADS 256
__device__ __constant__ double mrt_V1=0.05263157894736842;
__device__ __constant__ double mrt_V2=0.012531328320802;
__device__ __constant__ double mrt_V3=0.04761904761904762;
__device__ __constant__ double mrt_V4=0.004594820384294068;
__device__ __constant__ double mrt_V5=0.01587301587301587;
__device__ __constant__ double mrt_V6=0.0555555555555555555555555;
__device__ __constant__ double mrt_V7=0.02777777777777778;
__device__ __constant__ double mrt_V8=0.08333333333333333;
__device__ __constant__ double mrt_V9=0.003341687552213868;
__device__ __constant__ double mrt_V10=0.003968253968253968;
__device__ __constant__ double mrt_V11=0.01388888888888889;
__device__ __constant__ double mrt_V12=0.04166666666666666;
__global__ void dvc_ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz)
{
//int i,j,k;
@ -541,7 +556,7 @@ __global__ void dvc_ColorCollide( char *ID, double *disteven, double *distodd,
}
__global__ void
__launch_bounds__(512,2)
__launch_bounds__(256,1)
dvc_ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad,
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
double alpha, double beta, double Fx, double Fy, double Fz)
@ -1257,7 +1272,8 @@ __global__ void dvc_ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny
__global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,
double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
int ijk,nn,n;
@ -1273,19 +1289,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
double ux,uy,uz;
double phi,tau,rho0,rlx_setA,rlx_setB;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
@ -1295,9 +1298,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
// read the component number densities
nA = Den[n];
nB = Den[Np + n];
nAB = 1.0/(nA+nB);
// compute phase indicator field
phi=(nA-nB)/(nA+nB);
phi=(nA-nB)*nAB;
// local density
rho0=rhoA + 0.5*(1.0-phi)*(rhoB-rhoA);
@ -1372,11 +1376,11 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
//...........Normalize the Color Gradient.................................
C = sqrt(nx*nx+ny*ny+nz*nz);
double ColorMag = C;
if (C==0.0) ColorMag=1.0;
nx = nx/ColorMag;
ny = ny/ColorMag;
nz = nz/ColorMag;
double iColorMag = 1.0/C;
if (C==0.0) iColorMag=1.0;
nx = nx*iColorMag;
ny = ny*iColorMag;
nz = nz*iColorMag;
// q=0
fq = dist[n];
@ -1651,19 +1655,20 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
//........................................................................
//..............carry out relaxation process..............................
//..........Toelke, Fruediger et. al. 2006................................
double irho0 = 1.0/rho0;
if (C == 0.0) nx = ny = nz = 0.0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2);
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)*irho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)*irho0)- m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m10 = m10 + rlx_setA*( - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m12 = m12 + rlx_setA*( - m12);
m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15);
m13 = m13 + rlx_setA*( (jx*jy*irho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz*irho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz*irho0) + 0.5*alpha*C*nx*nz - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
@ -1776,9 +1781,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
//........................................................................
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
ux = jx*irho0;
uy = jy*irho0;
uz = jz*irho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
@ -1786,7 +1791,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
// Instantiate mass transport distributions
// Stationary value - distribution 0
nAB = 1.0/(nA+nB);
Aq[n] = 0.3333333333333333*nA;
Bq[n] = 0.3333333333333333*nB;
@ -1839,8 +1843,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den,
double *Phi, double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
@ -1861,19 +1865,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
double ux,uy,uz;
double phi,tau,rho0,rlx_setA,rlx_setB;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
@ -1882,9 +1873,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
// read the component number densities
nA = Den[n];
nB = Den[Np + n];
nAB = 1.0/(nA+nB);
// compute phase indicator field
phi=(nA-nB)/(nA+nB);
phi=(nA-nB)*nAB;
// local density
rho0=rhoA + 0.5*(1.0-phi)*(rhoB-rhoA);
@ -1959,11 +1951,11 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
//...........Normalize the Color Gradient.................................
C = sqrt(nx*nx+ny*ny+nz*nz);
double ColorMag = C;
if (C==0.0) ColorMag=1.0;
nx = nx/ColorMag;
ny = ny/ColorMag;
nz = nz/ColorMag;
double iColorMag = 1.0/C;
if (C==0.0) iColorMag=1.0;
nx = nx*iColorMag;
ny = ny*iColorMag;
nz = nz*iColorMag;
// q=0
fq = dist[n];
@ -2290,18 +2282,19 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
//..............carry out relaxation process..............................
//..........Toelke, Fruediger et. al. 2006................................
if (C == 0.0) nx = ny = nz = 0.0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2);
double irho0=1.0/rho0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)*irho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)*irho0)- m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m10 = m10 + rlx_setA*( - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m12 = m12 + rlx_setA*( - m12);
m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15);
m13 = m13 + rlx_setA*( (jx*jy*irho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz*irho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz*irho0) + 0.5*alpha*C*nx*nz - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
@ -2426,16 +2419,15 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
dist[nread] = fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
ux = jx*irho0;
uy = jy*irho0;
uz = jz*irho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
// Instantiate mass transport distributions
// Stationary value - distribution 0
nAB = 1.0/(nA+nB);
Aq[n] = 0.3333333333333333*nA;
Bq[n] = 0.3333333333333333*nB;
@ -3677,7 +3669,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_ColorMass(int *neighborList, double *Aq,
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq,
double *Den, double *Phi, int start, int finish, int Np){
int idx,n,nread;
double fq,nA,nB;
@ -3747,7 +3740,8 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, d
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi,
int start, int finish, int Np){
int idx,n;
double fq,nA,nB;

View File

@ -19,7 +19,7 @@
#include "hip/hip_cooperative_groups.h"
#define NBLOCKS 1024
#define NTHREADS 256
#define NTHREADS 512
/*
1. constants that are known at compile time should be defined using preprocessor macros (e.g. #define) or via C/C++ const variables at global/file scope.
@ -321,10 +321,10 @@ __global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *distev
}
}
//__launch_bounds__(512,4)
//__launch_bounds__(512,1)
__global__ void
dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
dvc_ScaLBL_AAodd_Compact( int *d_neighborList, double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -463,7 +463,8 @@ dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
}
__global__ void
__global__ void
__launch_bounds__(512,1)
dvc_ScaLBL_AAodd_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) {
int n;
@ -932,7 +933,8 @@ dvc_ScaLBL_AAodd_MRT(int *neighborList, double *dist, int start, int finish, int
//__launch_bounds__(512,1)
__global__ void
__global__ void
__launch_bounds__(512,1)
dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) {
int n;
@ -1353,9 +1355,9 @@ dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_se
}
}
//__launch_bounds__(512,4)
//__launch_bounds__(512,1)
__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *dist, int Np) {
__global__ void dvc_ScaLBL_AAeven_Compact( double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -2374,12 +2376,12 @@ __global__ void dvc_ScaLBL_D3Q19_Init_Simple(char *ID, double *f_even, double *f
extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Pack <<<GRID,512 >>>(q, list, start, count, sendbuf, dist, N);
dvc_ScaLBL_D3Q19_Pack <<<NBLOCKS,NTHREADS >>>(q, list, start, count, sendbuf, dist, N);
}
extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Unpack <<<GRID,512 >>>(q, list, start, count, recvbuf, dist, N);
dvc_ScaLBL_D3Q19_Unpack <<<NBLOCKS,NTHREADS >>>(q, list, start, count, recvbuf, dist, N);
}
//*************************************************************************
@ -2423,19 +2425,17 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d
printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAeven_Compact, hipFuncCachePreferL1);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_dist, Np);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>( d_dist, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAodd_Compact, hipFuncCachePreferL1);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(ID,d_neighborList, d_dist,Np);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(d_neighborList, d_dist,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err));

View File

@ -1,567 +0,0 @@
#include <math.h>
#include <stdio.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 560
#define NTHREADS 128
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
}
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = value_q + value_b;
}
}
__global__ void dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_b_label,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_b_label = BoundaryLabel[ib];//get boundary label (i.e. type of BC) from a solid site
value_q = dist[iq];
if (value_b_label==1){//Dirichlet BC
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
if (value_b_label==2){//Neumann BC
dist[iq] = value_q + value_b;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vin;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vout;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
//*************************************************************************
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("hip error in ScaLBL_Solid_DirichletAndNeumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}

917
hip/D3Q7BC.hip Normal file
View File

@ -0,0 +1,917 @@
#include <math.h>
#include <stdio.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
#define CHECK_ERROR(KERNEL) \
do { \
auto err = hipGetLastError(); \
if ( hipSuccess != err ){ \
auto errString = hipGetErrorString(err); \
printf("error in %s (kernel): %s \n",KERNEL,errString); \
} \
} while(0)
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
}
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = value_q + value_b;
}
}
__global__ void dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_b_label,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_b_label = BoundaryLabel[ib];//get boundary label (i.e. type of BC) from a solid site
value_q = dist[iq];
if (value_b_label==1){//Dirichlet BC
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
if (value_b_label==2){//Neumann BC
dist[iq] = value_q + value_b;
}
}
}
__global__ void dvc_ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
double epsilon_LB, double tau, double rho0,double den_scale, double h, double time_conv,
int *BounceBackDist_list, int *BounceBackSolid_list, int *FluidBoundary_list,
double *lattice_weight, float *lattice_cx, float *lattice_cy, float *lattice_cz,
int count, int Np)
{
int idx;
int iq,ib,ifluidBC;
double value_b,value_q;
double Ex,Ey,Ez;
double Etx,Ety,Etz;//tangential part of electric field
double E_mag_normal;
double nsx,nsy,nsz;//unit normal solid gradient
double ubx,uby,ubz;//slipping velocity at fluid boundary nodes
float cx,cy,cz;//lattice velocity (D3Q19)
double LB_weight;//lattice weighting coefficient (D3Q19)
double cs2_inv = 3.0;//inverse of cs^2 for D3Q19
double nu_LB = (tau-0.5)/cs2_inv;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
ifluidBC = FluidBoundary_list[idx];
value_b = zeta_potential[ib];//get zeta potential from a solid site
value_q = dist[iq];
//Load electric field and compute its tangential componet
Ex = ElectricField[ifluidBC+0*Np];
Ey = ElectricField[ifluidBC+1*Np];
Ez = ElectricField[ifluidBC+2*Np];
nsx = SolidGrad[ifluidBC+0*Np];
nsy = SolidGrad[ifluidBC+1*Np];
nsz = SolidGrad[ifluidBC+2*Np];
E_mag_normal = Ex*nsx+Ey*nsy+Ez*nsz;//magnitude of electric field in the direction normal to solid nodes
//compute tangential electric field
Etx = Ex - E_mag_normal*nsx;
Ety = Ey - E_mag_normal*nsy;
Etz = Ez - E_mag_normal*nsz;
ubx = -epsilon_LB*value_b*Etx/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
uby = -epsilon_LB*value_b*Ety/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
ubz = -epsilon_LB*value_b*Etz/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
//compute bounce-back distribution
LB_weight = lattice_weight[idx];
cx = lattice_cx[idx];
cy = lattice_cy[idx];
cz = lattice_cz[idx];
dist[iq] = value_q - 2.0*LB_weight*rho0*cs2_inv*(cx*ubx+cy*uby+cz*ubz);
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vin;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vout;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*(f6+uz*fsum_partial))/(1.0-0.5/tau)/(1.0-uz);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*(f5-uz*fsum_partial))/(1.0-0.5/tau)/(1.0+uz);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*(f6+uz*fsum_partial))/(1.0-0.5/tau)/(1.0-uz);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*(f5-uz*fsum_partial))/(1.0-0.5/tau)/(1.0+uz);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau+0.5*uz/tau+uEPz);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau-0.5*uz/tau-uEPz);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau+0.5*uz/tau+uEPz);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau-0.5*uz/tau-uEPz);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
//*************************************************************************
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
CHECK_ERROR("ScaLBL_Solid_Dirichlet_D3Q7");
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
CHECK_ERROR("ScaLBL_Solid_Neumann_D3Q7");
}
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
CHECK_ERROR("ScaLBL_Solid_DirichletAndNeumann_D3Q7");
}
extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
double epsilon_LB, double tau, double rho0,double den_scale, double h, double time_conv,
int *BounceBackDist_list, int *BounceBackSolid_list, int *FluidBoundary_list,
double *lattice_weight, float *lattice_cx, float *lattice_cy, float *lattice_cz,
int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_SlippingVelocityBC_D3Q19<<<GRID,512>>>(dist, zeta_potential, ElectricField, SolidGrad,
epsilon_LB, tau, rho0, den_scale, h, time_conv,
BounceBackDist_list, BounceBackSolid_list, FluidBoundary_list,
lattice_weight, lattice_cx, lattice_cy, lattice_cz,
count, Np);
CHECK_ERROR("ScaLBL_Solid_SlippingVelocityBC_D3Q19");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z");
}
//------------Diff-----------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z");
}
//----------DiffAdvc-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z");
}
//----------DiffAdvcElec-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z");
}
//-------------------------------

View File

@ -2726,10 +2726,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined(int *Map, double *
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField_alt(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np){
int idx,nread;
int n,idx,nread;
double fq,phi;
// for (int n=start; n<finish; n++){
@ -2787,7 +2787,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList,
__global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np){
int idx;
int n,idx;
double fq,phi;
// for (int n=start; n<finish; n++){
int S = Np/NBLOCKS/NTHREADS + 1;
@ -2833,7 +2833,6 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double
idx = Map[n];
Phi[idx] = phi;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
@ -3396,7 +3395,7 @@ extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, doubl
extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel,
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np)
{
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<<NBLOCKS,NTHREADS >>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel,
rhoA, rhoB, tauM, W, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3406,9 +3405,9 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map
}
extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel,
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1);
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np)
{
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3419,7 +3418,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, dou
extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_ComputePhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3432,7 +3431,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou
double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel<<<NBLOCKS,NTHREADS >>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3445,7 +3444,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double
double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel<<<NBLOCKS,NTHREADS >>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3458,7 +3457,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad,
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined<<<NBLOCKS,NTHREADS >>>(Map, dist, Den, hq, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3471,7 +3470,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined(int *neighborList, int
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined<<<NBLOCKS,NTHREADS >>>(neighborList, Map, dist, hq, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3482,7 +3481,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined(int *neighborList, int
extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField<<<NBLOCKS,NTHREADS >>>( neighborList, Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3492,7 +3491,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int
extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3503,7 +3502,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq,
extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<<NBLOCKS,NTHREADS >>>(neighborList, dist, Vel, Pressure,
tau, rho0, Fx, Fy, Fz, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3513,9 +3512,9 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborLis
}
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure,
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np)
{
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<<NBLOCKS,NTHREADS >>>(dist, Vel, Pressure,
tau, rho0, Fx, Fy, Fz, start, finish, Np);
hipError_t err = hipGetLastError();

View File

@ -1,5 +1,6 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
@ -1609,7 +1610,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor_CP(int *neighborList, int
Fcpy = ny;
Fcpz = nz;
double Fcp_mag=sqrt(Fcpx*Fcpx+Fcpy*Fcpy+Fcpz*Fcpz);
if (Fcp_mag==0.0); Fcpx=Fcpy=Fcpz=0.0;
if (Fcp_mag==0.0) {
Fcpx=Fcpy=Fcpz=0.0;
}
//NOTE for open node (porosity=1.0),Fcp=0.0
Fcpx *= alpha*W*(1.0-porosity)/sqrt(perm);
Fcpy *= alpha*W*(1.0-porosity)/sqrt(perm);
@ -2396,7 +2399,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor_CP(int *Map, double *dis
Fcpy = ny;
Fcpz = nz;
double Fcp_mag=sqrt(Fcpx*Fcpx+Fcpy*Fcpy+Fcpz*Fcpz);
if (Fcp_mag==0.0); Fcpx=Fcpy=Fcpz=0.0;
if (Fcp_mag==0.0) {
Fcpx=Fcpy=Fcpz=0.0;
}
//NOTE for open node (porosity=1.0),Fcp=0.0
Fcpx *= alpha*W*(1.0-porosity)/sqrt(perm);
Fcpy *= alpha*W*(1.0-porosity)/sqrt(perm);

View File

@ -1,422 +0,0 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
int n;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
int n;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
Den[n] = DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
int n;
double DenInit;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}

731
hip/Ion.hip Normal file
View File

@ -0,0 +1,731 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
double *recvbuf, double *dist, int N) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
for (link=0; link<linkCount; link++){
idx = links[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = recvbuf[start + idx];
}
}
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double alpha;
for (link=offset; link<linkCount; link++){
idx = list[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
alpha = coef[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = alpha*recvbuf[start + idx];
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
int link,iq,ip,nq,np,nqm,npm;
double aq, ap, membranePotential;
/* Interior Links */
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks) {
// inside //outside
aq = MassFractionIn; ap = MassFractionOut;
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
nqm = Map[nq]; npm = Map[np]; // strided layout
/* membrane potential for this link */
membranePotential = Psi[nqm] - Psi[npm];
if (membranePotential > Threshold){
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
}
/* Save the mass transfer coefficients */
coef[2*link] = aq; coef[2*link+1] = ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link, nqm, npm, i, j, k;
double distanceLocal, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + nlinks;
if (link < count) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[link]; // THINK start NEEDS TO BE HERE
// get the distribution index
n = d3q7_recvlist[start+idx];
// get the index in strided layout
nqm = Map[n];
distanceLocal = Distance[nqm];
psiLocal = Psi[nqm];
// Get the 3-D indices from the send process
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
// Streaming link the non-local distribution
i -= Cqx; j -= Cqy; k -= Cqz;
npm = k*Nx*Ny + j*Nx + i;
distanceNonlocal = Distance[npm];
psiNonlocal = Psi[npm];
membranePotential = psiLocal - psiNonlocal;
aq = MassFractionIn;
ap = MassFractionOut;
/* link is inside membrane */
if (distanceLocal > 0.0){
if (membranePotential < Threshold*(-1.0)){
ap = MassFractionIn;
aq = MassFractionOut;
}
else {
ap = ThresholdMassFractionIn;
aq = ThresholdMassFractionOut;
}
}
else if (membranePotential > Threshold){
aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut;
}
// update link based on mass transfer coefficients
coef[2*(link-nlinks)] = aq;
coef[2*(link-nlinks)+1] = ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
double *recvbuf, double *dist, int N, double *coef) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double fq,fp,fqq,ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = count/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
/* First unpack the regular links */
if (link < nlinks) {
// get the index for the recv list (deal with reordering of links)
idx = d3q7_linkList[start+link];
// get the distribution index
n = d3q7_recvlist[start+idx];
if (!(n < 0)){
fp = recvbuf[start + idx];
dist[q * N + n] = fp;
}
}
// else if (link < count){
// /* second enforce custom rule for membrane links */
// // get the index for the recv list (deal with reordering of links)
// idx = d3q7_linkList[link];
// // get the distribution index
// n = d3q7_recvlist[start+idx];
// // update link based on mass transfer coefficients
// if (!(n < 0)){
// aq = coef[2*(link-nlinks)];
// ap = coef[2*(link-nlinks)+1];
// fq = dist[q * N + n];
// fp = recvbuf[start + idx];
// fqq = (1-aq)*fq+ap*fp;
// dist[q * N + n] = fqq;
// }
// }
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
int link,iq,ip,nq,np;
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks){
// inside //outside
aq = coef[2*link]; ap = coef[2*link+1];
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
fq = dist[iq]; fp = dist[ip];
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
Cq = Den[nq]; Cp = Den[np];
Cq += fqq - fq; Cp += fpp - fp;
Den[nq] = Cq; Den[np] = Cp;
dist[iq] = fqq; dist[ip] = fpp;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
int n;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
int n;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
Den[n] = DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
int n;
double DenInit;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
if (ion_component == 0) CD=0.0;
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD + CD_tmp;
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef<<<NBLOCKS,NTHREADS >>>(membrane, Map, Distance, Psi, coef,
Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
memLinks, Nx, Ny, Nz, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo<<<NBLOCKS,NTHREADS >>>(
Cqx, Cqy, Cqz, Map, Distance, Psi, Threshold,
MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
d3q7_recvlist, d3q7_linkList, coef, start, nlinks, count, N, Nx, Ny, Nz);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, int *d3q7_linkList, int start, int nlinks, int count,
double *recvbuf, double *dist, int N, double *coef) {
int GRID = count / 1024 + 1;
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<GRID,1024 >>>(q, d3q7_recvlist, d3q7_linkList, start, nlinks, count,
recvbuf, dist, N, coef) ;
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_Unpack: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
dvc_ScaLBL_D3Q7_Membrane_IonTransport<<<NBLOCKS,NTHREADS >>>(membrane, coef, dist, Den, memLinks, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_IonTransport: %s \n",hipGetErrorString(err));
}
}

View File

@ -19,8 +19,8 @@
//*************************************************************************
#include "hip/hip_runtime.h"
#define NBLOCKS 560
#define NTHREADS 128
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
@ -122,8 +122,7 @@ __global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, do
//*************************************************************************
__global__ void
__launch_bounds__(512,2)
D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz,
__launch_bounds__(256,4) D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz,
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
{

View File

@ -65,13 +65,11 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gr
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
{
hipProfilerStart();
dvc_ScaLBL_D3Q19_MixedGradient<<<NBLOCKS,NTHREADS >>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err));
}
hipProfilerStop();
}

View File

@ -1,332 +0,0 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
int ijk;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}

705
hip/Poisson.hip Normal file
View File

@ -0,0 +1,705 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
//rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
rho_e = Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = Den_charge[n] / epsilon_LB;
// rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
int ijk;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
// q = 1
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
//........................................................................
// q = 7
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 8
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 9
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 10
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 11
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 12
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 13
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q= 14
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 15
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 16
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 17
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 18
dist[nr17] = W2*psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
// q = 0
dist[n] = W0*psi;//
// q = 1
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np) {
int n;
int ijk;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
dist[1 * Np + n] = W1 * Psi[ijk];
dist[2 * Np + n] = W1 * Psi[ijk];
dist[3 * Np + n] = W1 * Psi[ijk];
dist[4 * Np + n] = W1 * Psi[ijk];
dist[5 * Np + n] = W1 * Psi[ijk];
dist[6 * Np + n] = W1 * Psi[ijk];
dist[7 * Np + n] = W2* Psi[ijk];
dist[8 * Np + n] = W2* Psi[ijk];
dist[9 * Np + n] = W2* Psi[ijk];
dist[10 * Np + n] = W2* Psi[ijk];
dist[11 * Np + n] = W2* Psi[ijk];
dist[12 * Np + n] = W2* Psi[ijk];
dist[13 * Np + n] = W2* Psi[ijk];
dist[14 * Np + n] = W2* Psi[ijk];
dist[15 * Np + n] = W2* Psi[ijk];
dist[16 * Np + n] = W2* Psi[ijk];
dist[17 * Np + n] = W2* Psi[ijk];
dist[18 * Np + n] = W2* Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_Poisson, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList, Map,
dist, Den_charge, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("Hip error in dvc_ScaLBL_D3Q19_AAodd_Poisson: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_Poisson, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>( Map, dist, Den_charge, Psi,
ElectricField, Error, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("Hip error in dvc_ScaLBL_D3Q19_AAeven_Poisson: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q19_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map, dist, Psi, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("Hip error in ScaLBL_D3Q19_Poisson_Init: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}

538
models/BGKModel.cpp Normal file
View File

@ -0,0 +1,538 @@
/*
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
Copyright Equnior ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Multi-relaxation time LBM Model
*/
#include "models/BGKModel.h"
#include "analysis/distance.h"
#include "common/ReadMicroCT.h"
ScaLBL_BGKModel::ScaLBL_BGKModel(int RANK, int NP, const Utilities::MPI &COMM)
: rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), tau(0),
Fx(0), Fy(0), Fz(0), flux(0), din(0), dout(0), mu(0), Nx(0), Ny(0), Nz(0),
N(0), Np(0), nprocx(0), nprocy(0), nprocz(0), BoundaryCondition(0), Lx(0),
Ly(0), Lz(0), comm(COMM) {}
ScaLBL_BGKModel::~ScaLBL_BGKModel() {}
void ScaLBL_BGKModel::ReadParams(string filename) {
// read the input database
db = std::make_shared<Database>(filename);
domain_db = db->getDatabase("Domain");
mrt_db = db->getDatabase("BGK");
vis_db = db->getDatabase("Visualization");
tau = 1.0;
timestepMax = 100000;
ANALYSIS_INTERVAL = 1000;
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
dout = 1.0;
din = 1.0;
// Color Model parameters
if (mrt_db->keyExists("timestepMax")) {
timestepMax = mrt_db->getScalar<int>("timestepMax");
}
if (mrt_db->keyExists("analysis_interval")) {
ANALYSIS_INTERVAL = mrt_db->getScalar<int>("analysis_interval");
}
if (mrt_db->keyExists("tolerance")) {
tolerance = mrt_db->getScalar<double>("tolerance");
}
if (mrt_db->keyExists("tau")) {
tau = mrt_db->getScalar<double>("tau");
}
if (mrt_db->keyExists("F")) {
Fx = mrt_db->getVector<double>("F")[0];
Fy = mrt_db->getVector<double>("F")[1];
Fz = mrt_db->getVector<double>("F")[2];
}
if (mrt_db->keyExists("Restart")) {
Restart = mrt_db->getScalar<bool>("Restart");
}
if (mrt_db->keyExists("din")) {
din = mrt_db->getScalar<double>("din");
}
if (mrt_db->keyExists("dout")) {
dout = mrt_db->getScalar<double>("dout");
}
if (mrt_db->keyExists("flux")) {
flux = mrt_db->getScalar<double>("flux");
}
// Read domain parameters
if (mrt_db->keyExists("BoundaryCondition")) {
BoundaryCondition = mrt_db->getScalar<int>("BC");
} else if (domain_db->keyExists("BC")) {
BoundaryCondition = domain_db->getScalar<int>("BC");
}
mu = (tau - 0.5) / 3.0;
}
void ScaLBL_BGKModel::SetDomain() {
Dm = std::shared_ptr<Domain>(
new Domain(domain_db, comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(
new Domain(domain_db, comm)); // mask domain removes immobile phases
// domain parameters
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Lx = Dm->Lx;
Ly = Dm->Ly;
Lz = Dm->Lz;
N = Nx * Ny * Nz;
Distance.resize(Nx, Ny, Nz);
Velocity_x.resize(Nx, Ny, Nz);
Velocity_y.resize(Nx, Ny, Nz);
Velocity_z.resize(Nx, Ny, Nz);
for (int i = 0; i < Nx * Ny * Nz; i++)
Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
comm.barrier();
Dm->CommInit();
comm.barrier();
rank = Dm->rank();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
}
void ScaLBL_BGKModel::ReadInput() {
sprintf(LocalRankString, "%05d", Dm->rank());
sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString);
sprintf(LocalRestartFile, "%s%s", "Restart.", LocalRankString);
if (domain_db->keyExists("Filename")) {
auto Filename = domain_db->getScalar<std::string>("Filename");
Mask->Decomp(Filename);
} else if (domain_db->keyExists("GridFile")) {
// Read the local domain data
auto input_id = readMicroCT(*domain_db, comm);
// Fill the halo (assuming GCW of 1)
array<int, 3> size0 = {(int)input_id.size(0), (int)input_id.size(1),
(int)input_id.size(2)};
ArraySize size1 = {(size_t)Mask->Nx, (size_t)Mask->Ny,
(size_t)Mask->Nz};
ASSERT((int)size1[0] == size0[0] + 2 && (int)size1[1] == size0[1] + 2 &&
(int)size1[2] == size0[2] + 2);
fillHalo<signed char> fill(comm, Mask->rank_info, size0, {1, 1, 1}, 0,
1);
Array<signed char> id_view;
id_view.viewRaw(size1, Mask->id.data());
fill.copy(input_id, id_view);
fill.fill(id_view);
} else {
Mask->ReadIDs();
}
// Generate the signed distance map
// Initialize the domain and communication
Array<char> id_solid(Nx, Ny, Nz);
// Solve for the position of the solid phase
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
int n = k * Nx * Ny + j * Nx + i;
// Initialize the solid phase
if (Mask->id[n] > 0)
id_solid(i, j, k) = 1;
else
id_solid(i, j, k) = 0;
}
}
}
// Initialize the signed distance function
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
// Initialize distance to +/- 1
Distance(i, j, k) = 2.0 * double(id_solid(i, j, k)) - 1.0;
}
}
}
// MeanFilter(Averages->SDs);
if (rank == 0)
printf("Initialized solid phase -- Converting to Signed Distance "
"function \n");
CalcDist(Distance, id_solid, *Dm);
if (rank == 0)
cout << "Domain set." << endl;
}
void ScaLBL_BGKModel::Create() {
/*
* This function creates the variables needed to run a LBM
*/
int rank = Mask->rank();
//.........................................................
// Initialize communication structures in averaging domain
for (int i = 0; i < Nx * Ny * Nz; i++)
Dm->id[i] = Mask->id[i];
Mask->CommInit();
Np = Mask->PoreCount();
//...........................................................................
if (rank == 0)
printf("Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm =
std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
int Npad = (Np / 16 + 2) * 16;
if (rank == 0)
printf("Set up memory efficient layout \n");
Map.resize(Nx, Ny, Nz);
Map.fill(-2);
auto neighborList = new int[18 * Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map, neighborList,
Mask->id.data(), Np, 1);
comm.barrier();
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank == 0)
printf("Allocating distributions \n");
//......................device distributions.................................
int dist_mem_size = Np * sizeof(double);
int neighborSize = 18 * (Np * sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **)&fq, 19 * dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **)&Pressure, sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Velocity, 3 * sizeof(double) * Np);
//...........................................................................
// Update GPU data structures
if (rank == 0)
printf("Setting up device map and neighbor list \n");
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
comm.barrier();
double MLUPS = ScaLBL_Comm->GetPerformance(NeighborList, fq, Np);
printf(" MLPUS=%f from rank %i\n", MLUPS, rank);
}
void ScaLBL_BGKModel::Initialize() {
/*
* This function initializes model
*/
if (rank == 0)
printf("Initializing distributions \n");
ScaLBL_D3Q19_Init(fq, Np);
}
void ScaLBL_BGKModel::Run() {
double rlx = 1.0 / tau;
Minkowski Morphology(Mask);
if (rank == 0) {
bool WriteHeader = false;
FILE *log_file = fopen("Permeability.csv", "r");
if (log_file != NULL)
fclose(log_file);
else
WriteHeader = true;
if (WriteHeader) {
log_file = fopen("Permeability.csv", "a+");
fprintf(log_file, "time Fx Fy Fz mu Vs As Js Xs vx vy vz k\n");
fclose(log_file);
}
}
//.......create and start timer............
ScaLBL_DeviceBarrier();
comm.barrier();
if (rank == 0)
printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
if (rank == 0)
printf("********************************************************\n");
timestep = 0;
double error = 1.0;
double flow_rate_previous = 0.0;
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
/* timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAeven_BGK(dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrie
*/
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_BGK(NeighborList, fq, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3) {
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 4) {
din =
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 5) {
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAodd_BGK(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(),
Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier();
comm.barrier();
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_BGK(fq, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3) {
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 4) {
din =
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 5) {
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAeven_BGK(fq, 0, ScaLBL_Comm->LastExterior(), Np,
rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier();
comm.barrier();
//************************************************************************/
if (timestep % ANALYSIS_INTERVAL == 0) {
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_Comm->RegularLayout(Map, &Velocity[0], Velocity_x);
ScaLBL_Comm->RegularLayout(Map, &Velocity[Np], Velocity_y);
ScaLBL_Comm->RegularLayout(Map, &Velocity[2 * Np], Velocity_z);
double count_loc = 0;
double count;
double vax, vay, vaz;
double vax_loc, vay_loc, vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
if (Distance(i, j, k) > 0) {
vax_loc += Velocity_x(i, j, k);
vay_loc += Velocity_y(i, j, k);
vaz_loc += Velocity_z(i, j, k);
count_loc += 1.0;
}
}
}
}
vax = Dm->Comm.sumReduce(vax_loc);
vay = Dm->Comm.sumReduce(vay_loc);
vaz = Dm->Comm.sumReduce(vaz_loc);
count = Dm->Comm.sumReduce(count_loc);
vax /= count;
vay /= count;
vaz /= count;
double force_mag = sqrt(Fx * Fx + Fy * Fy + Fz * Fz);
double dir_x = Fx / force_mag;
double dir_y = Fy / force_mag;
double dir_z = Fz / force_mag;
if (force_mag == 0.0) {
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
}
double flow_rate = (vax * dir_x + vay * dir_y + vaz * dir_z);
error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
flow_rate_previous = flow_rate;
//if (rank==0) printf("Computing Minkowski functionals \n");
Morphology.ComputeScalar(Distance, 0.f);
//Morphology.PrintAll();
double mu = (tau - 0.5) / 3.f;
double Vs = Morphology.V();
double As = Morphology.A();
double Hs = Morphology.H();
double Xs = Morphology.X();
Vs = Dm->Comm.sumReduce(Vs);
As = Dm->Comm.sumReduce(As);
Hs = Dm->Comm.sumReduce(Hs);
Xs = Dm->Comm.sumReduce(Xs);
double h = Dm->voxel_length;
double absperm =
h * h * mu * Mask->Porosity() * flow_rate / force_mag;
if (rank == 0) {
printf(" %f\n", absperm);
FILE *log_file = fopen("Permeability.csv", "a");
fprintf(log_file,
"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g "
"%.8g %.8g\n",
timestep, Fx, Fy, Fz, mu, h * h * h * Vs, h * h * As,
h * Hs, Xs, vax, vay, vaz, absperm);
fclose(log_file);
}
}
}
//************************************************************************/
if (rank == 0)
printf("---------------------------------------------------------------"
"----\n");
// Compute the walltime per timestep
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>(t2 - t1).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np) / cputime / 1000000;
if (rank == 0)
printf("********************************************************\n");
if (rank == 0)
printf("CPU time = %f \n", cputime);
if (rank == 0)
printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank == 0)
printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank == 0)
printf("********************************************************\n");
}
void ScaLBL_BGKModel::VelocityField() {
auto format = vis_db->getWithDefault<string>("format", "silo");
/* memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double));
Morphology.Initialize();
Morphology.UpdateMeshValues();
Morphology.ComputeLocal();
Morphology.Reduce();
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int n=0; n<ScaLBL_Comm->LastExterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
for (int n=ScaLBL_Comm->FirstInterior(); n<ScaLBL_Comm->LastInterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;
vaz /= count;
double mu = (tau-0.5)/3.f;
if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n");
if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu,
Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz);
*/
vis_db = db->getDatabase("Visualization");
if (vis_db->getWithDefault<bool>("write_silo", false)) {
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm, Dm->rank_info,
{Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2},
{1, 1, 1}, 0, 1);
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
auto SignDistVar = std::make_shared<IO::Variable>();
IO::initialize("", format, "false");
// Create the MeshDataStruct
visData.resize(1);
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>(
Dm->rank_info, Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2, Dm->Lx, Dm->Ly,
Dm->Lz);
SignDistVar->name = "SignDist";
SignDistVar->type = IO::VariableType::VolumeVariable;
SignDistVar->dim = 1;
SignDistVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(SignDistVar);
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(VxVar);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(VyVar);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(VzVar);
Array<double> &SignData = visData[0].vars[0]->data;
Array<double> &VelxData = visData[0].vars[1]->data;
Array<double> &VelyData = visData[0].vars[2]->data;
Array<double> &VelzData = visData[0].vars[3]->data;
ASSERT(visData[0].vars[0]->name == "SignDist");
ASSERT(visData[0].vars[1]->name == "Velocity_x");
ASSERT(visData[0].vars[2]->name == "Velocity_y");
ASSERT(visData[0].vars[3]->name == "Velocity_z");
fillData.copy(Distance, SignData);
fillData.copy(Velocity_x, VelxData);
fillData.copy(Velocity_y, VelyData);
fillData.copy(Velocity_z, VelzData);
IO::writeData(timestep, visData, Dm->Comm);
}
}

94
models/BGKModel.h Normal file
View File

@ -0,0 +1,94 @@
/*
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
Copyright Equnior ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Multi-relaxation time LBM Model
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/MPI.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_BGKModel {
public:
ScaLBL_BGKModel(int RANK, int NP, const Utilities::MPI &COMM);
~ScaLBL_BGKModel();
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void VelocityField();
bool Restart, pBC;
int timestep, timestepMax;
int ANALYSIS_INTERVAL;
int BoundaryCondition;
double tau, mu;
double Fx, Fy, Fz, flux;
double din, dout;
double tolerance;
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> mrt_db;
std::shared_ptr<Database> vis_db;
IntArray Map;
DoubleArray Distance;
int *NeighborList;
double *fq;
double *Velocity;
double *Pressure;
//Minkowski Morphology;
DoubleArray Velocity_x;
DoubleArray Velocity_y;
DoubleArray Velocity_z;
private:
Utilities::MPI comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
};

View File

@ -115,8 +115,7 @@ void ScaLBL_ColorModel::ReadParams(string filename) {
inletB = 0.f;
outletA = 0.f;
outletB = 1.f;
BoundaryCondition = 0;
if (color_db->keyExists("BC")) {
BoundaryCondition = color_db->getScalar<int>("BC");
@ -388,6 +387,10 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) {
AFFINITY, volume_fraction);
}
}
// clean up
delete [] label_count;
delete [] label_count_global;
}
void ScaLBL_ColorModel::Create() {
@ -483,12 +486,22 @@ void ScaLBL_ColorModel::Create() {
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
ScaLBL_Comm->Barrier();
delete[] neighborList;
// initialize phi based on PhaseLabel (include solid component labels)
double *PhaseLabel;
PhaseLabel = new double[N];
PhaseLabel = new double[Nx*Ny*Nz];
ScaLBL_Comm->Barrier();
AssignComponentLabels(PhaseLabel);
ScaLBL_CopyToDevice(Phi, PhaseLabel, N * sizeof(double));
ScaLBL_Comm->Barrier();
ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz * sizeof(double));
ScaLBL_Comm->Barrier();
if (rank == 0)
printf("Model created \n");
delete[] PhaseLabel;
}

View File

@ -17,6 +17,7 @@ ScaLBL_IonModel::~ScaLBL_IonModel() {}
void ScaLBL_IonModel::ReadParams(string filename, vector<int> &num_iter) {
USE_MEMBRANE = true;
// read the input database
db = std::make_shared<Database>(filename);
domain_db = db->getDatabase("Domain");
@ -421,7 +422,25 @@ void ScaLBL_IonModel::ReadParams(string filename) {
1.0e-18); //LB ion concentration has unit [mol/lu^3]
}
}
if (ion_db->keyExists("MembraneIonConcentrationList")) {
if (rank == 0) printf(".... Read MembraneIonConcentrationList \n");
MembraneIonConcentration.clear();
MembraneIonConcentration = ion_db->getVector<double>("MembraneIonConcentrationList");
if (MembraneIonConcentration.size() != number_ion_species) {
ERROR("Error: number_ion_species and MembraneIonConcentrationList must be "
"the same length! \n");
}
else {
for (size_t i = 0; i < MembraneIonConcentration.size(); i++) {
MembraneIonConcentration[i] =
MembraneIonConcentration[i] *
(h * h * h *
1.0e-18); //LB ion concentration has unit [mol/lu^3]
}
}
}
//Read solid boundary condition specific to Ion model
BoundaryConditionSolid = 0;
if (ion_db->keyExists("BC_Solid")) {
@ -583,6 +602,73 @@ void ScaLBL_IonModel::SetDomain() {
nprocz = Dm->nprocz();
}
void ScaLBL_IonModel::SetMembrane() {
size_t NLABELS = 0;
membrane_db = db->getDatabase("Membrane");
/* set distance based on labels inside the membrane (all other labels will be outside) */
auto MembraneLabels = membrane_db->getVector<int>("MembraneLabels");
IonMembrane = std::shared_ptr<Membrane>(new Membrane(Dm, NeighborList, Np));
signed char LABEL = 0;
double *label_count;
double *label_count_global;
Array<char> membrane_id(Nx,Ny,Nz);
label_count = new double[NLABELS];
label_count_global = new double[NLABELS];
// Assign the labels
for (size_t idx = 0; idx < NLABELS; idx++)
label_count[idx] = 0;
/* set the distance to the membrane */
MembraneDistance.resize(Nx, Ny, Nz);
MembraneDistance.fill(0);
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
membrane_id(i,j,k) = 1; // default value
LABEL = Dm->id[k*Nx*Ny + j*Nx + i];
for (size_t m=0; m<MembraneLabels.size(); m++){
if (LABEL == MembraneLabels[m]) {
label_count[m] += 1.0;
membrane_id(i,j,k) = 0; // inside
m = MembraneLabels.size(); //exit loop
}
}
}
}
}
for (size_t m=0; m<MembraneLabels.size(); m++){
label_count_global[m] = Dm->Comm.sumReduce(label_count[m]);
}
if (rank == 0) {
printf(" Membrane labels: %lu \n", MembraneLabels.size());
for (size_t m=0; m<MembraneLabels.size(); m++){
LABEL = MembraneLabels[m];
double volume_fraction = double(label_count_global[m]) /
double((Nx - 2) * (Ny - 2) * (Nz - 2) * nprocs);
printf(" label=%d, volume fraction = %f\n", LABEL, volume_fraction);
}
}
/* signed distance to the membrane ( - inside / + outside) */
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
MembraneDistance(i, j, k) = 2.0 * double(membrane_id(i, j, k)) - 1.0;
}
}
}
CalcDist(MembraneDistance, membrane_id, *Dm);
/* create the membrane data structure */
if (rank==0) printf("Creating membrane data structure...\n");
MembraneCount = IonMembrane->Create(Dm, MembraneDistance, Map);
// clean up
delete [] label_count;
delete [] label_count_global;
}
void ScaLBL_IonModel::ReadInput() {
sprintf(LocalRankString, "%05d", Dm->rank());
@ -709,6 +795,33 @@ void ScaLBL_IonModel::AssignSolidBoundary(double *ion_solid) {
}
}
void ScaLBL_IonModel::AssignIonConcentrationMembrane( double *Ci, int ic) {
// double *Ci, const vector<double> MembraneIonConcentration, const vector<double> IonConcentration, int ic) {
double VALUE = 0.f;
if (rank == 0){
printf(".... Set concentration(%i): inside=%f, outside=%f \n", ic, MembraneIonConcentration[ic], IonConcentration[ic]);
}
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
int idx = Map(i, j, k);
if (!(idx < 0)) {
if (MembraneDistance(i,j,k) < 0.0) {
VALUE = MembraneIonConcentration[ic];//* (h * h * h * 1.0e-18);
} else {
VALUE = IonConcentration[ic];//* (h * h * h * 1.0e-18);
}
Ci[idx] = VALUE;
}
}
}
}
}
void ScaLBL_IonModel::AssignIonConcentration_FromFile(
double *Ci, const vector<std::string> &File_ion, int ic) {
double *Ci_host;
@ -764,7 +877,7 @@ void ScaLBL_IonModel::Create() {
Map.fill(-2);
auto neighborList = new int[18 * Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map, neighborList,
Mask->id.data(), Np, 1);
Mask->id.data(), Npad, 1);
comm.barrier();
//...........................................................................
@ -778,6 +891,7 @@ void ScaLBL_IonModel::Create() {
int neighborSize = 18 * (Np * sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **)&dvcMap, sizeof(int) * Np);
ScaLBL_AllocateDeviceMemory((void **)&fq,
number_ion_species * 7 * dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **)&Ci,
@ -794,6 +908,37 @@ void ScaLBL_IonModel::Create() {
if (rank == 0)
printf("LB Ion Solver: Setting up device map and neighbor list \n");
// copy the neighbor list
int *TmpMap;
TmpMap = new int[Np];
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
int idx = Map(i, j, k);
if (!(idx < 0))
TmpMap[idx] = k * Nx * Ny + j * Nx + i;
}
}
}
// check that TmpMap is valid
for (int idx = 0; idx < ScaLBL_Comm->LastExterior(); idx++) {
auto n = TmpMap[idx];
if (n > Nx * Ny * Nz) {
printf("Bad value! idx=%i \n", n);
TmpMap[idx] = Nx * Ny * Nz - 1;
}
}
for (int idx = ScaLBL_Comm->FirstInterior();
idx < ScaLBL_Comm->LastInterior(); idx++) {
auto n = TmpMap[idx];
if (n > Nx * Ny * Nz) {
printf("Bad value! idx=%i \n", n);
TmpMap[idx] = Nx * Ny * Nz - 1;
}
}
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int) * Np);
ScaLBL_Comm->Barrier();
delete[] TmpMap;
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
comm.barrier();
@ -822,10 +967,26 @@ void ScaLBL_IonModel::Initialize() {
*/
if (rank == 0)
printf("LB Ion Solver: initializing D3Q7 distributions\n");
if (ion_db->keyExists("IonConcentrationFile")) {
USE_MEMBRANE = true;
if (USE_MEMBRANE){
double *Ci_host;
if (rank == 0)
printf(" ...initializing based on membrane list \n");
Ci_host = new double[number_ion_species * Np];
for (size_t ic = 0; ic < number_ion_species; ic++) {
AssignIonConcentrationMembrane( &Ci_host[ic * Np], ic);
}
ScaLBL_CopyToDevice(Ci, Ci_host, number_ion_species * sizeof(double) * Np);
comm.barrier();
for (size_t ic = 0; ic < number_ion_species; ic++) {
ScaLBL_D3Q7_Ion_Init_FromFile(&fq[ic * Np * 7], &Ci[ic * Np], Np);
}
delete[] Ci_host;
}
else if (ion_db->keyExists("IonConcentrationFile")) {
//NOTE: "IonConcentrationFile" is a vector, including "file_name, datatype"
auto File_ion = ion_db->getVector<std::string>("IonConcentrationFile");
if (File_ion.size() == 2 * number_ion_species) {
if (File_ion.size() == 2*number_ion_species) {
double *Ci_host;
Ci_host = new double[number_ion_species * Np];
for (size_t ic = 0; ic < number_ion_species; ic++) {
@ -844,7 +1005,8 @@ void ScaLBL_IonModel::Initialize() {
ERROR("Error: Number of user-input ion concentration files should "
"be equal to number of ion species!\n");
}
} else {
}
else {
for (size_t ic = 0; ic < number_ion_species; ic++) {
ScaLBL_D3Q7_Ion_Init(&fq[ic * Np * 7], &Ci[ic * Np],
IonConcentration[ic], Np);
@ -1181,6 +1343,152 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField) {
//if (rank==0) printf("********************************************************\n");
}
void ScaLBL_IonModel::RunMembrane(double *Velocity, double *ElectricField, double *Psi) {
//Input parameter:
//1. Velocity is from StokesModel
//2. ElectricField is from Poisson model
//LB-related parameter
vector<double> rlx;
for (size_t ic = 0; ic < tau.size(); ic++) {
rlx.push_back(1.0 / tau[ic]);
}
//.......create and start timer............
//double starttime,stoptime,cputime;
//ScaLBL_Comm->Barrier(); comm.barrier();
//auto t1 = std::chrono::system_clock::now();
for (size_t ic = 0; ic < number_ion_species; ic++) {
/* set the mass transfer coefficients for the membrane */
if (ic == 0)
IonMembrane->AssignCoefficients(dvcMap, Psi, "Na+");
else {
IonMembrane->AssignCoefficients(dvcMap, Psi, "impermeable");
}
timestep = 0;
while (timestep < timestepMax[ic]) {
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
//LB-Ion collison
IonMembrane->SendD3Q7AA(&fq[ic * Np * 7]); //READ FORM NORMAL
ScaLBL_D3Q7_AAodd_Ion(
IonMembrane->NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
IonDiffusivity[ic], IonValence[ic], rlx[ic], Vt,
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
IonMembrane->RecvD3Q7AA(&fq[ic * Np * 7]); //WRITE INTO OPPOSITE
ScaLBL_D3Q7_AAodd_Ion(
IonMembrane->NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
IonDiffusivity[ic], IonValence[ic], rlx[ic], Vt, 0,
ScaLBL_Comm->LastExterior(), Np);
IonMembrane->IonTransport(&fq[ic * Np * 7],&Ci[ic * Np]);
/* if (BoundaryConditionSolid == 1) {
//TODO IonSolid may also be species-dependent
ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic * Np * 7], IonSolid);
}
ScaLBL_Comm->Barrier();
comm.barrier();
*/
// *************EVEN TIMESTEP*************//
timestep++;
//LB-Ion collison
IonMembrane->SendD3Q7AA(&fq[ic * Np * 7]); //READ FORM NORMAL
ScaLBL_D3Q7_AAeven_Ion(
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
rlx[ic], Vt, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np);
IonMembrane->RecvD3Q7AA(&fq[ic * Np * 7]); //WRITE INTO OPPOSITE
ScaLBL_D3Q7_AAeven_Ion(
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
rlx[ic], Vt, 0, ScaLBL_Comm->LastExterior(), Np);
IonMembrane->IonTransport(&fq[ic * Np * 7],&Ci[ic * Np]);
ScaLBL_Comm->Barrier();
comm.barrier();
/*
if (BoundaryConditionSolid == 1) {
//TODO IonSolid may also be species-dependent
ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic * Np * 7], IonSolid);
}
ScaLBL_Comm->Barrier();
comm.barrier();
*/
}
}
//Compute charge density for Poisson equation
for (size_t ic = 0; ic < number_ion_species; ic++) {
int Valence = IonValence[ic];
if (rank==0) printf("compute charge density for ion %i, Valence =%i \n", ic,Valence);
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, Valence, ic,
ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, Valence, ic, 0,
ScaLBL_Comm->LastExterior(), Np);
}
/* DoubleArray Charge(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map, ChargeDensity, Charge);
double charge_sum=0.0;
double charge_sum_total=0.0;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
charge_sum += Charge(i,j,k);
}
}
}
printf(" Local charge value = %.8g (rank=%i)\n",charge_sum, rank);
ScaLBL_Comm->Barrier();
comm.barrier();
*/
ScaLBL_Comm->Barrier();
comm.barrier();
//if (rank==0) printf(" IonMembrane: completeted full step \n");
//fflush(stdout);
//************************************************************************/
//if (rank==0) printf("-------------------------------------------------------------------\n");
//// Compute the walltime per timestep
//auto t2 = std::chrono::system_clock::now();
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
//// Performance obtained from each node
//double MLUPS = double(Np)/cputime/1000000;
//if (rank==0) printf("********************************************************\n");
//if (rank==0) printf("CPU time = %f \n", cputime);
//if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
//MLUPS *= nprocs;
//if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
//if (rank==0) printf("********************************************************\n");
}
void ScaLBL_IonModel::getIonConcentration(DoubleArray &IonConcentration,
const size_t ic) {
//This function wirte out the data in a normal layout (by aggregating all decomposed domains)

View File

@ -1,7 +1,6 @@
/*
* Ion transporte LB Model
*/
#ifndef ScaLBL_IonModel_INC
#define ScaLBL_IonModel_INC
@ -16,6 +15,7 @@
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/Membrane.h"
#include "common/MPI.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
@ -30,10 +30,12 @@ public:
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void SetMembrane();
void ReadInput();
void Create();
void Initialize();
void Run(double *Velocity, double *ElectricField);
void RunMembrane(double *Velocity, double *ElectricField, double *Psi);
void getIonConcentration(DoubleArray &IonConcentration, const size_t ic);
void getIonConcentration_debug(int timestep);
void getIonFluxDiffusive(DoubleArray &IonFlux_x, DoubleArray &IonFlux_y,
@ -66,6 +68,7 @@ public:
vector<double> IonDiffusivity; //User input unit [m^2/sec]
vector<int> IonValence;
vector<double> IonConcentration; //unit [mol/m^3]
vector<double> MembraneIonConcentration; //unit [mol/m^3]
vector<double>
Cin; //inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double>
@ -88,6 +91,7 @@ public:
IntArray Map;
DoubleArray Distance;
int *NeighborList;
int *dvcMap;
double *fq;
double *Ci;
double *ChargeDensity;
@ -97,7 +101,14 @@ public:
double *FluxDiffusive;
double *FluxAdvective;
double *FluxElectrical;
/* these support membrane capabilities */
bool USE_MEMBRANE;
std::shared_ptr<Database> membrane_db;
std::shared_ptr<Membrane> IonMembrane;
DoubleArray MembraneDistance;
int MembraneCount; // number of links the cross the membrane
private:
Utilities::MPI comm;
@ -113,6 +124,8 @@ private:
void AssignIonConcentration_FromFile(double *Ci,
const vector<std::string> &File_ion,
int ic);
void AssignIonConcentrationMembrane( double *Ci, int ic);
void IonConcentration_LB_to_Phys(DoubleArray &Den_reg);
void IonFlux_LB_to_Phys(DoubleArray &Den_reg, const size_t ic);
};

View File

@ -36,6 +36,7 @@ void ScaLBL_MRTModel::ReadParams(string filename) {
tau = 1.0;
timestepMax = 100000;
ANALYSIS_INTERVAL = 1000;
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
@ -46,6 +47,9 @@ void ScaLBL_MRTModel::ReadParams(string filename) {
if (mrt_db->keyExists("timestepMax")) {
timestepMax = mrt_db->getScalar<int>("timestepMax");
}
if (mrt_db->keyExists("analysis_interval")) {
ANALYSIS_INTERVAL = mrt_db->getScalar<int>("analysis_interval");
}
if (mrt_db->keyExists("tolerance")) {
tolerance = mrt_db->getScalar<double>("tolerance");
}
@ -318,7 +322,7 @@ void ScaLBL_MRTModel::Run() {
comm.barrier();
//************************************************************************/
if (timestep % 1000 == 0) {
if (timestep % ANALYSIS_INTERVAL == 0) {
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
ScaLBL_DeviceBarrier();
comm.barrier();

View File

@ -48,6 +48,7 @@ public:
bool Restart, pBC;
int timestep, timestepMax;
int ANALYSIS_INTERVAL;
int BoundaryCondition;
double tau, mu;
double Fx, Fy, Fz, flux;

View File

@ -42,7 +42,7 @@ void ScaLBL_Poisson::ReadParams(string filename){
domain_db = db->getDatabase( "Domain" );
electric_db = db->getDatabase( "Poisson" );
k2_inv = 4.0;//speed of sound for D3Q7 lattice
k2_inv = 3.0;//speed of sound for D3Q19 lattice
tau = 0.5+k2_inv;
timestepMax = 100000;
tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential
@ -63,6 +63,9 @@ void ScaLBL_Poisson::ReadParams(string filename){
if (electric_db->keyExists( "timestepMax" )){
timestepMax = electric_db->getScalar<int>( "timestepMax" );
}
if (electric_db->keyExists( "tau" )){
tau = electric_db->getScalar<double>( "tau" );
}
if (electric_db->keyExists( "analysis_interval" )){
analysis_interval = electric_db->getScalar<int>( "analysis_interval" );
}
@ -330,7 +333,7 @@ void ScaLBL_Poisson::Create(){
if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n");
Map.resize(Nx,Ny,Nz); Map.fill(-2);
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1);
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Npad,1);
comm.barrier();
//...........................................................................
@ -345,7 +348,7 @@ void ScaLBL_Poisson::Create(){
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
//ScaLBL_AllocateDeviceMemory((void **) &dvcID, sizeof(signed char)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &Psi_BCLabel, sizeof(int)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np);
@ -366,6 +369,8 @@ void ScaLBL_Poisson::Create(){
}
}
}
comm.barrier();
if (rank==0) printf (" .... LB-Poisson Solver: check neighbor list \n");
// check that TmpMap is valid
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
auto n = TmpMap[idx];
@ -381,6 +386,8 @@ void ScaLBL_Poisson::Create(){
TmpMap[idx] = Nx*Ny*Nz-1;
}
}
comm.barrier();
if (rank==0) printf (" .... LB-Poisson Solver: copy neighbor list to GPU \n");
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
ScaLBL_Comm->Barrier();
delete [] TmpMap;
@ -547,7 +554,7 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
* "time_conv_from_Study" is the phys to LB time conversion factor, unit=[sec/lt]
* which is used for periodic voltage input for inlet and outlet boundaries
*/
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n");
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q19 distributions\n");
//NOTE the initialization involves two steps:
//1. assign solid boundary value (surface potential or surface change density)
//2. Initialize electric potential for pore nodes
@ -561,8 +568,9 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double));
ScaLBL_CopyToDevice(Psi_BCLabel, psi_BCLabel_host, Nx*Ny*Nz*sizeof(int));
ScaLBL_Comm->Barrier();
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
/* switch to d3Q19 model */
ScaLBL_D3Q19_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q19_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
delete [] psi_host;
delete [] psi_BCLabel_host;
@ -583,6 +591,8 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, bool UseSlippingVelBC, int times
//double starttime,stoptime,cputime;
//comm.barrier();
//auto t1 = std::chrono::system_clock::now();
double *host_Error;
host_Error = new double [Np];
timestep=0;
double error = 1.0;
@ -590,98 +600,40 @@ void ScaLBL_Poisson::Run(double *ChargeDensity, bool UseSlippingVelBC, int times
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
SolveElectricPotentialAAodd(timestep_from_Study);//update electric potential
//SolveElectricPotentialAAodd(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
SolvePoissonAAodd(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
// *************EVEN TIMESTEP*************//
timestep++;
SolveElectricPotentialAAeven(timestep_from_Study);//update electric potential
//SolveElectricPotentialAAeven(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
SolvePoissonAAeven(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
//************************************************************************/
// Check convergence of steady-state solution
if (timestep==2){
//save electric potential for convergence check
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
}
if (timestep%analysis_interval==0){
if (tolerance_method.compare("MSE")==0){
double count_loc=0;
double count;
double MSE_loc=0.0;
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
MSE_loc += (Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k));
count_loc+=1.0;
}
}
}
}
error=Dm->Comm.sumReduce(MSE_loc);
count=Dm->Comm.sumReduce(count_loc);
error /= count;
}
else if (tolerance_method.compare("MSE_max")==0){
vector<double>MSE_loc;
double MSE_loc_max;
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
MSE_loc.push_back((Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k)));
}
}
}
}
vector<double>::iterator it_max = max_element(MSE_loc.begin(),MSE_loc.end());
unsigned int idx_max=distance(MSE_loc.begin(),it_max);
MSE_loc_max=MSE_loc[idx_max];
error=Dm->Comm.maxReduce(MSE_loc_max);
}
else{
ERROR("Error: user-specified tolerance_method cannot be identified; check you input database! \n");
}
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
/* get the elecric potential */
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
if (rank==0) printf(" ... getting Poisson solver error \n");
double err = 0.0;
double max_error = 0.0;
ScaLBL_CopyToHost(host_Error,ResidualError,sizeof(double)*Np);
for (int idx=0; idx<Np; idx++){
err = host_Error[idx]*host_Error[idx];
if (err > max_error ){
max_error = err;
}
}
error=Dm->Comm.maxReduce(max_error);
/* compute the eletric field */
//ScaLBL_D3Q19_Poisson_getElectricField(fq, ElectricField, tau, Np);
//legacy code that tried to use residual to check convergence
//ScaLBL_D3Q7_PoissonResidualError(NeighborList,dvcMap,ResidualError,Psi,ChargeDensity,epsilon_LB,Nx,Nx*Ny,ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior());
//ScaLBL_D3Q7_PoissonResidualError(NeighborList,dvcMap,ResidualError,Psi,ChargeDensity,epsilon_LB,Nx,Nx*Ny,0, ScaLBL_Comm->LastExterior());
//ScaLBL_Comm->Barrier(); comm.barrier();
//vector<double> ResidualError_host(Np);
//double error_loc_max;
////calculate the maximum residual error
//ScaLBL_CopyToHost(&ResidualError_host[0],ResidualError,sizeof(double)*Np);
//vector<double>::iterator it_temp1,it_temp2;
//it_temp1=ResidualError_host.begin();
//advance(it_temp1,ScaLBL_Comm->LastExterior());
//vector<double>::iterator it_max = max_element(ResidualError_host.begin(),it_temp1);
//unsigned int idx_max1 = distance(ResidualError_host.begin(),it_max);
//it_temp1=ResidualError_host.begin();
//it_temp2=ResidualError_host.begin();
//advance(it_temp1,ScaLBL_Comm->FirstInterior());
//advance(it_temp2,ScaLBL_Comm->LastInterior());
//it_max = max_element(it_temp1,it_temp2);
//unsigned int idx_max2 = distance(ResidualError_host.begin(),it_max);
//if (ResidualError_host[idx_max1]>ResidualError_host[idx_max2]){
// error_loc_max=ResidualError_host[idx_max1];
//}
//else{
// error_loc_max=ResidualError_host[idx_max2];
//}
//error = Dm->Comm.maxReduce(error_loc_max);
}
}
if(WriteLog==true){
@ -714,11 +666,12 @@ void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){
}
}
void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC){
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
//ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
/*
// Set boundary conditions
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
@ -743,15 +696,20 @@ void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){
}
}
//-------------------------//
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
* */
//ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
}
void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC){
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
//ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC,
// ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
/*
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
@ -774,35 +732,35 @@ void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){
break;
}
}
*/
//-------------------------//
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
//ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
}
void ScaLBL_Poisson::SolvePoissonAAodd(double *ChargeDensity, bool UseSlippingVelBC){
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
//ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
//TODO: perhaps add another ScaLBL_Comm routine to update Psi values on solid boundary nodes.
//something like:
//ScaLBL_Comm->SolidDirichletBoundaryUpdates(Psi, Psi_BCLabel, timestep);
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
//if (BoundaryConditionSolid==1){
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
//}
//else if (BoundaryConditionSolid==2){
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
//}
//ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
}
void ScaLBL_Poisson::SolvePoissonAAeven(double *ChargeDensity, bool UseSlippingVelBC){
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
//if (BoundaryConditionSolid==1){
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
//}
//else if (BoundaryConditionSolid==2){
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
//}
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, ResidualError, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, ResidualError, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
//ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
}
void ScaLBL_Poisson::DummyChargeDensity(){

View File

@ -108,8 +108,8 @@ private:
void AssignSolidBoundary(double *poisson_solid, int *poisson_solid_label);
void Potential_Init(double *psi_init);
void ElectricField_LB_to_Phys(DoubleArray &Efield_reg);
void SolveElectricPotentialAAodd(int timestep_from_Study);
void SolveElectricPotentialAAeven(int timestep_from_Study);
void SolveElectricPotentialAAeven(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC);
void SolveElectricPotentialAAodd(int timestep_from_Study, double *ChargeDensity, bool UseSlippingVelBC);
//void SolveElectricField();
void SolvePoissonAAodd(double *ChargeDensity, bool UseSlippingVelBC);
void SolvePoissonAAeven(double *ChargeDensity, bool UseSlippingVelBC);

View File

@ -0,0 +1,47 @@
#module load cmake/3.21.3
#module load PrgEnv-gnu
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
# Need a new version of cmake
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
#-I${MPICH_DIR}/include
#-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
HIPFLAGS = --amdgpu-target=gfx90a
# configure
rm -rf CMake*
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D CMAKE_C_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D CMAKE_CXX_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_HIP=0 \
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
-D USE_DOXYGEN:BOOL=false \
~/LBPM-WIA

View File

@ -0,0 +1,52 @@
#module load cmake/3.21.3
#module load PrgEnv-gnu
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
# Need a new version of cmake
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
#-I${MPICH_DIR}/include
#-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
#export HIPFLAGS="--amdgpu-target=gfx90a --save-temps"
#--amdgpu-spill-vgpr-to-agpr=0
#THIS IS HOW TO CHECK FOR SPILLS (example)
# hipcc -c -g -ggdb --save-temps Color.hip
# -munsafe-fp-atomics
# configure
rm -rf CMake*
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D CMAKE_C_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D CMAKE_CXX_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_HIP=1 \
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
-D USE_DOXYGEN:BOOL=false \
~/LBPM-WIA

View File

@ -1,14 +1,13 @@
export TPL_ROOT=/ccs/home/mbt/repos
export TPL_BUILDER=/ccs/home/mbt/repos/TPL-builder
export TPL_ROOT=/ccs/proj/csc380/mcclurej/spock
export TPL_BUILDER=/ccs/home/mcclurej/tpl-builder
export TPL_WEBPAGE=http://bitbucket.org/AdvancedMultiPhysics/tpl-builder/downloads
export INSTALL_DIR=/ccs/home/mbt/spock/install
export INSTALL_DIR=/ccs/proj/csc380/mcclurej/spock/install
module load cmake
module load llvm-amdgpu
module load hip
cmake \
-D CMAKE_BUILD_TYPE=Release \
-D CXX_STD=14 \
@ -24,7 +23,7 @@ cmake \
-D ENABLE_SHARED:BOOL=OFF \
-D PROCS_INSTALL=8 \
-D TPL_LIST:STRING="TIMER;ZLIB;HDF5;SILO" \
-D TIMER_URL="${TPL_ROOT}/TimerUtility" \
-D TIMER_URL="${TPL_ROOT}/timerutility" \
-D ZLIB_URL="http://zlib.net/zlib-1.2.11.tar.gz" \
-D HDF5_URL="${TPL_ROOT}/hdf5-1.8.12.tar.gz" \
-D BUILD_TYPE=x86_64 \

View File

@ -1,30 +1,40 @@
module load cmake
module load llvm-amdgpu
module load hip
module load PrgEnv-gnu
module load rocm/4.2.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
export TPL_DIR=/gpfs/alpine/stf006/proj-shared/mbt/spock/install
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx908="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx908="-lmpi_gtl_hsa"
# Need a new version of cmake
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
# configure
rm -rf CMake*
cmake \
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_HIP=1 \
-D LINK_LIBRARIES=${HIP_PATH}/lib/libamdhip64.so \
-D USE_CUDA=0 \
-D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \
-D CMAKE_CUDA_HOST_COMPILER="gcc" \
-D USE_MPI=0 \
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${TPL_DIR}/hdf5" \
-D USE_SILO=0 \
-D SILO_DIRECTORY="${TPL_DIR}/silo" \
-D USE_DOXYGEN:BOOL=false \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
~/repos/LBPM-WIA
-D USE_DOXYGEN:BOOL=false \
~/LBPM-WIA

View File

@ -0,0 +1,39 @@
## Load the desired modules
module load PrgEnv-gcc
module load rocm/4.3.0
module load cray-mpich
module load cray-hdf5-parallel
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx908="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx908="-lmpi_gtl_hsa"
## These must be set before running
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=CMA
#export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.3/bin
export CMAKE_DIR=/ccs/home/mbt/spock/cmake-3.21.3/bin
# configure
rm -rf CMake*
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D USE_HIP=1 \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
-D USE_DOXYGEN:BOOL=false \
~/repos/LBPM-WIA

View File

@ -1,19 +1,28 @@
# load the module for cmake
#module load cmake
module load cmake
# gcc/7.5.0
module load gcc/7.5.0
module load cuda/10.2.89
module load hdf5/1.10.7
#source /gpfs/gpfs_stage1/b6p315aa/setup/setup-mpi.sh
module load cmake gcc/7.5.0
module load cuda
module load hdf5
#/ccs/proj/csc380/mcclurej
#export HDF5_DIR=/ccs/proj/csc380/mcclurej/install/hdf5/1.8.12/
#export SILO_DIR=/ccs/proj/csc380/mcclurej/install/silo/4.10.2/
#export NETCDF_DIR=/ccs/proj/geo136/install/netcdf/4.6.1
export HDF5_DIR="$OLCF_HDF5_ROOT"
# configure
rm -rf CMake*
cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=mpicc \
@ -22,7 +31,7 @@ cmake \
-D CMAKE_CXX_STANDARD=14 \
-D USE_CUDA=1 \
-D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \
-D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/6.4.0/bin/gcc" \
-D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/7.5.0-2/bin/gcc" \
-D USE_MPI=1 \
-D MPIEXEC=mpirun \
-D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \
@ -38,4 +47,6 @@ cmake \
-D USE_TIMER=0 \
~/LBPM-WIA
make VERBOSE=1 -j1 && make install
make VERBOSE=1 -j8 && make install

View File

@ -7,9 +7,10 @@ ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator )
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
ADD_LBPM_EXECUTABLE( lbpm_nernst_planck_simulator )
ADD_LBPM_EXECUTABLE( lbpm_cell_simulator )
ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator )
ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_sphere_pp )
@ -48,7 +49,6 @@ ADD_LBPM_EXECUTABLE( TestPNP_Stokes )
ADD_LBPM_EXECUTABLE( TestMixedGrad )
CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/cylindertest ${CMAKE_CURRENT_BINARY_DIR}/cylindertest COPYONLY )
# Add the tests
@ -60,6 +60,7 @@ ADD_LBPM_TEST( TestTopo3D )
ADD_LBPM_TEST( TestFluxBC )
ADD_LBPM_TEST( TestFlowAdaptor )
ADD_LBPM_TEST( TestMap )
ADD_LBPM_TEST( TestMembrane )
#ADD_LBPM_TEST( TestMRT )
#ADD_LBPM_TEST( TestColorGrad )
ADD_LBPM_TEST( TestWideHalo )

View File

@ -183,11 +183,12 @@ int main(int argc, char **argv)
int i,j,k;
// Load inputs
auto db = loadInputs( nprocs );
/* auto filename = argv[1];
auto input_db = std::make_shared<Database>( filename );
auto db = input_db->getDatabase( "Domain" );
*/
auto filename = argv[1];
auto input_db = std::make_shared<Database>( filename );
auto db = input_db->getDatabase( "Domain" );
//else {
// auto db = loadInputs( nprocs );
//}
int Nx = db->getVector<int>( "n" )[0];
int Ny = db->getVector<int>( "n" )[1];
int Nz = db->getVector<int>( "n" )[2];
@ -269,17 +270,19 @@ int main(int argc, char **argv)
//.......................................................................
//...........................................................................
comm.barrier();
//comm.barrier();
if (rank == 0) cout << "Domain set." << endl;
//...........................................................................
cout << flush;
//...........................................................................
if (rank==0) printf ("Create ScaLBL_Communicator \n");
cout << flush;
// Create a communicator for the device (will use optimized layout)
ScaLBL_Communicator ScaLBL_Comm(Dm);
int Npad=(Np/16 + 2)*16;
if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N);
cout << flush;
auto neighborList= new int[18*Npad];
IntArray Map(Nx,Ny,Nz);
Map.fill(-2);
@ -290,7 +293,8 @@ int main(int argc, char **argv)
//......................device distributions.................................
dist_mem_size = Np*sizeof(double);
if (rank==0) printf ("Allocating distributions \n");
cout << flush;
int *NeighborList;
int *dvcMap;
double *fq;
@ -320,6 +324,9 @@ int main(int argc, char **argv)
ScaLBL_DeviceBarrier();
delete [] TmpMap;
if (rank==0) printf("Map is copied to GPU \n");
cout << flush;
//...........................................................................
/* // Write the communcation structure into a file for debugging
@ -351,11 +358,13 @@ int main(int argc, char **argv)
fclose(CommFile);
*/
if (rank==0) printf("Setting the distributions, size = : %i\n", Np);
cout << flush;
//...........................................................................
GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2, iproc,jproc,kproc,nprocx,nprocy,nprocz);
ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size);
ScaLBL_DeviceBarrier();
comm.barrier();
//comm.barrier();
//*************************************************************************
// First timestep
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
@ -375,6 +384,7 @@ int main(int argc, char **argv)
int timestep = 0;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("No. of timesteps for timing: %i \n", 100);
cout << flush;
//.......create and start timer............
double starttime,stoptime,cputime;
@ -420,13 +430,16 @@ int main(int argc, char **argv)
// 18 reads and 18 writes for each lattice site
double MemoryRefs = double(Np)*36;
// number of memory references for the swap algorithm - GigaBytes / second
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)*1e-9);
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)/cputime*1e-9);
// Report bandwidth in Gigabits per second
// communication bandwidth includes both send and recieve
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/cputime*1e-9);
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/cputime*1e-9);
cout << flush;
}
// ****************************************************
cout << fflush;
comm.barrier();
Utilities::shutdown();
// ****************************************************

348
tests/TestMembrane.cpp Normal file
View File

@ -0,0 +1,348 @@
//*************************************************************************
// Lattice Boltzmann Simulator for Single Phase Flow in Porous Media
// James E. McCLure
//*************************************************************************
#include <stdio.h>
#include <iostream>
#include <fstream>
#include "common/MPI.h"
#include "common/Membrane.h"
#include "common/ScaLBL.h"
using namespace std;
std::shared_ptr<Database> loadInputs( int nprocs )
{
//auto db = std::make_shared<Database>( "Domain.in" );
auto db = std::make_shared<Database>();
db->putScalar<int>( "BC", 0 );
db->putVector<int>( "nproc", { 1, 1, 1 } );
db->putVector<int>( "n", { 32, 32, 32 } );
db->putScalar<int>( "nspheres", 1 );
db->putVector<double>( "L", { 1, 1, 1 } );
return db;
}
//***************************************************************************************
int main(int argc, char **argv)
{
// Initialize MPI
Utilities::startup( argc, argv );
Utilities::MPI comm( MPI_COMM_WORLD );
int check=0;
{
int i,j,k,n;
int rank = comm.getRank();
if (rank == 0){
printf("********************************************************\n");
printf("Running unit test: TestMembrane \n");
printf("********************************************************\n");
}
// Load inputs
auto db = loadInputs( comm.getSize() );
int Nx = db->getVector<int>( "n" )[0];
int Ny = db->getVector<int>( "n" )[1];
int Nz = db->getVector<int>( "n" )[2];
auto Dm = std::make_shared<Domain>(db,comm);
Nx += 2;
Ny += 2;
Nz += 2;
int N = Nx*Ny*Nz;
//.......................................................................
int Np = 0;
double distance,radius;
DoubleArray Distance(Nx,Ny,Nz);
for (k=0;k<Nz;k++){
for (j=0;j<Ny;j++){
for (i=0;i<Nx;i++){
n = k*Nx*Ny+j*Nx+i;
Dm->id[n] = 1;
radius = double(Nx)/4;
distance = sqrt(double((i-0.5*Nx)*(i-0.5*Nx)+ (j-0.5*Ny)*(j-0.5*Ny)+ (k-0.5*Nz)*(k-0.5*Nz)))-radius;
if (distance < 0.0 ){
Dm->id[n] = 1;
}
Distance(i,j,k) = distance;
Np++;
}
}
}
Dm->CommInit();
// Create a communicator for the device (will use optimized layout)
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm(new ScaLBL_Communicator(Dm));
//Create a second communicator based on the regular data layout
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular(new ScaLBL_Communicator(Dm));
if (rank==0){
printf("Total domain size = %i \n",N);
printf("Reduced domain size = %i \n",Np);
}
// LBM variables
if (rank==0) printf ("Set up the neighborlist \n");
int Npad=Np+32;
int neighborSize=18*Npad*sizeof(int);
int *neighborList;
IntArray Map(Nx,Ny,Nz);
neighborList= new int[18*Npad];
//......................device distributions.................................
int *NeighborList;
int *dvcMap;
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Npad);
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np,1);
comm.barrier();
ScaLBL_CopyToDevice(NeighborList, neighborList, 18*Np*sizeof(int));
double *dist;
dist = new double [19*Np];
// Check the neighborlist
printf("Check neighborlist: exterior %i, first interior %i last interior %i \n",ScaLBL_Comm->LastExterior(),ScaLBL_Comm->FirstInterior(),ScaLBL_Comm->LastInterior());
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
for (int q=0; q<18; q++){
int nn = neighborList[q*Np+idx]%Np;
if (nn>Np) printf("neighborlist error (exterior) at q=%i, idx=%i \n",q,idx);
dist[q*Np + idx] = 0.0;
}
}
for (int idx=ScaLBL_Comm->FirstInterior(); idx<ScaLBL_Comm->LastInterior(); idx++){
for (int q=0; q<18; q++){
int nn = neighborList[q*Np+idx]%Np;
if (nn>Np) printf("neighborlist error (exterior) at q=%i, idx=%i \n",q,idx);
dist[q*Np + idx] = 0.0;
}
}
/* create a membrane data structure */
Membrane M(Dm, NeighborList, Np);
int MembraneCount = M.Create(Dm, Distance, Map);
if (rank==0) printf (" Number of membrane links: %i \n", MembraneCount);
/* create a tagged array to show where the mebrane is*/
double *MembraneLinks;
MembraneLinks = new double [Nx*Ny*Nz];
for (int n=0; n<Nx*Ny*Nz; n++) {
MembraneLinks[n] = 0.0;
}
for (int mlink=0; mlink<MembraneCount; mlink++){
int iq = M.membraneLinks[2*mlink];
int jq = M.membraneLinks[2*mlink+1];
dist[iq] = -1.0; // set these distributions to non-zero
dist[jq] = 1.0;
}
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
int idx = Map(i,j,k);
double sum = 0.0;
for (int q=0; q<19; q++){
sum += dist[q*Np + idx];
}
int n = k*Nx*Ny + j*Nx + i;
MembraneLinks[n] = sum;
if (sum > 0.f){
Dm->id[n] = 127;
}
if (sum < 0.f){
Dm->id[n] = 64;
}
}
}
}
if (argc > 1)
Dm->AggregateLabels("membrane.raw");
/* create a pair of distributions to test membrane mass transport routine */
double *fq, *gq, *Ci, *Cj, *Psi, *Ci_host;
Ci_host = new double [Np];
ScaLBL_AllocateDeviceMemory((void **)&fq, 19 * sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&gq, 19 * sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Ci, sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Cj, sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Psi, sizeof(double) * Np);
/* initialize concentration inside membrane */
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n = k*Nx*Ny+j*Nx+i;
int idx = Map(i,j,k);
if (Distance(i,j,k) > 0.0)
Ci_host[idx] = 1.0;
else
Ci_host[idx] = 0.0;
}
}
}
ScaLBL_CopyToDevice(Ci, Ci_host, sizeof(double) * Np);
/* initialize the distributions */
ScaLBL_D3Q7_Ion_Init_FromFile(fq, Ci, Np);
ScaLBL_D3Q7_Ion_Init_FromFile(gq, Ci, Np);
/* Streaming with the usual neighborlist */
ScaLBL_D3Q19_AAodd_Compact(NeighborList, fq, Np);
/* Streaming with the membrane neighborlist*/
ScaLBL_D3Q19_AAodd_Compact(M.NeighborList, gq, Np);
/* explicit mass transfer step with the membrane*/
M.AssignCoefficients(dvcMap, Psi, "ones");
M.IonTransport(gq, Cj);
ScaLBL_CopyToHost(Ci_host, Cj, sizeof(double) * Np);
double ionError = 0.0;
for (int n=0; n<Np; n++){
ionError += Ci_host[n];
}
if (fabs(ionError) > 1e-12) {
printf(" Failed error tolerance in membrane ion transport routine! \n");
check = 2;
}
DoubleArray Ions(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map, Cj, Ions);
if (argc > 1)
Dm->AggregateLabels("membrane2.raw",Ions);
/* now compare streaming */
ScaLBL_D3Q7_Ion_Init_FromFile(gq, Ci, Np);
M.IonTransport(gq, Cj);
ScaLBL_D3Q19_AAodd_Compact(M.NeighborList, gq, Np);
M.IonTransport(gq, Cj);
/* now check that the two results agree*/
double *fq_h, *gq_h;
fq_h = new double [7*Np];
gq_h = new double [7*Np];
ScaLBL_CopyToHost(fq_h, fq, 7*sizeof(double) * Np);
ScaLBL_CopyToHost(gq_h, gq, 7*sizeof(double) * Np);
for (int n = 0; n<Np; n++){
for (int q=0; q<7; q++){
double gval = gq_h[q*Np + n];
double fval = fq_h[q*Np + n];
if (gval != fval ){
printf(" Membrane streaming mismatch at q=%i, n=%i \n",q,n);
printf(" .... gq = %f, fq = %f \n",gval, fval);
printf(" (unit test will fail) \n");
check = 3;
}
}
}
DoubleArray MembraneErrors(Nx,Ny,Nz);
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n = k*Nx*Ny+j*Nx+i;
int idx = Map(i,j,k);
MembraneErrors(i,j,k) = 0.0;
for (int q=0; q<7; q++){
double gval = gq_h[q*Np + idx];
double fval = fq_h[q*Np + idx];
MembraneErrors(i,j,k) += gval - fval;
}
}
}
}
Dm->AggregateLabels("membrane3.raw",MembraneErrors);
//...........................................................................
// Update GPU data structures
if (rank==0) printf ("Setting up device map and neighbor list \n");
int *TmpMap;
TmpMap=new int[Np*sizeof(int)];
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
if (!(idx < 0))
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
}
}
}
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
ScaLBL_DeviceBarrier();
// Create a dummy distribution data structure
double *fq_host;
fq_host = new double[19*Np];
if (rank==0) printf ("Setting up Np=%i distributions \n",Np);
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
if (!(idx<0)){
for (int q=0; q<19; q++){
fq_host[q*Np+idx]=(k*Nx*Ny+j*Nx+i)+0.01*q;
}
}
}
}
}
/* Run dummy communications */
/*initialize fq from host data */
ScaLBL_CopyToDevice(fq, fq_host, sizeof(double)*7*Np);
M.SendD3Q7AA(&fq[0]);
M.RecvD3Q7AA(&gq[0]);
// this has only the communicated values
//ScaLBL_CopyToHost(fq_host, gq, sizeof(double)*7*Np);
if (rank==0) printf ("Sum result \n");
ScaLBL_D3Q7_AAeven_IonConcentration(&gq[0 * Np * 7], &Ci[0 * Np],
0, ScaLBL_Comm->LastExterior(),
Np);
DoubleArray Result(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map, Ci, Result);
/* for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
double sum = 0.0;
if (!(idx<0)){
for (int q=1; q<3; q++){
sum += fq_host[q*Np+idx];
}
Result[k*Nx*Ny+j*Nx+i] = sum;
}
}
}
}
*/
FILE *OUTFILE;
OUTFILE = fopen("D3Q7.raw","wb");
fwrite(Result.data(),8,Nx*Ny*Nz,OUTFILE);
fclose(OUTFILE);
FILE *MAPFILE;
MAPFILE = fopen("Map.raw","wb");
fwrite(Map.data(),4,Nx*Ny*Nz,MAPFILE);
fclose(MAPFILE);
delete [] TmpMap;
delete [] fq_host;
}
Utilities::shutdown();
return check;
}

View File

@ -9,8 +9,8 @@
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "analysis/TwoPhase.h"
#include "common/MPI_Helpers.h"
#include "common/MPI.h"
#include "models/BGKModel.h"
//#define WRITE_SURFACES
/*
@ -23,414 +23,33 @@ using namespace std;
int main(int argc, char **argv)
{
//*****************************************
// ***** MPI STUFF ****************
//*****************************************
// Initialize MPI
int rank,nprocs;
Utilities::startup( argc, argv );
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
Utilities::startup( argc, argv );
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
{
// parallel domain size (# of sub-domains)
int nprocx,nprocy,nprocz;
if (rank == 0){
printf("********************************************************\n");
printf("Running Single Phase Permeability Calculation \n");
printf("********************************************************\n");
}
// Variables that specify the computational domain
string FILENAME;
int Nx,Ny,Nz; // local sub-domain size
int nspheres; // number of spheres in the packing
double Lx,Ly,Lz; // Domain length
double D = 1.0; // reference length for non-dimensionalization
// Color Model parameters
int timestepMax, interval;
double tau,Fx,Fy,Fz,tol,err;
double din,dout;
bool pBC,Restart;
int i,j,k,n;
int RESTART_INTERVAL=20000;
if (rank==0){
//.............................................................
// READ SIMULATION PARMAETERS FROM INPUT FILE
//.............................................................
ifstream input("Permeability.in");
// Line 1: model parameters (tau, alpha, beta, das, dbs)
input >> tau; // Viscosity parameter
// Line 2: External force components (Fx,Fy, Fz)
input >> Fx;
input >> Fy;
input >> Fz;
// Line 3: Pressure Boundary conditions
input >> Restart;
input >> pBC;
input >> din;
input >> dout;
// Line 4: time-stepping criteria
input >> timestepMax; // max no. of timesteps
input >> interval; // restart interval
input >> tol; // error tolerance
//.............................................................
//.......................................................................
// Reading the domain information file
//.......................................................................
ifstream domain("Domain.in");
domain >> nprocx;
domain >> nprocy;
domain >> nprocz;
domain >> Nx;
domain >> Ny;
domain >> Nz;
//domain >> nspheres;
domain >> Lx;
domain >> Ly;
domain >> Lz;
//.......................................................................
}
// **************************************************************
// Broadcast simulation parameters from rank 0 to all other procs
MPI_Barrier(comm);
//.................................................
MPI_Bcast(&tau,1,MPI_DOUBLE,0,comm);
//MPI_Bcast(&pBC,1,MPI_LOGICAL,0,comm);
// MPI_Bcast(&Restart,1,MPI_LOGICAL,0,comm);
MPI_Bcast(&din,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&dout,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fy,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Fz,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&timestepMax,1,MPI_INT,0,comm);
MPI_Bcast(&interval,1,MPI_INT,0,comm);
MPI_Bcast(&tol,1,MPI_DOUBLE,0,comm);
// Computational domain
MPI_Bcast(&Nx,1,MPI_INT,0,comm);
MPI_Bcast(&Ny,1,MPI_INT,0,comm);
MPI_Bcast(&Nz,1,MPI_INT,0,comm);
MPI_Bcast(&nprocx,1,MPI_INT,0,comm);
MPI_Bcast(&nprocy,1,MPI_INT,0,comm);
MPI_Bcast(&nprocz,1,MPI_INT,0,comm);
//MPI_Bcast(&nspheres,1,MPI_INT,0,comm);
MPI_Bcast(&Lx,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Ly,1,MPI_DOUBLE,0,comm);
MPI_Bcast(&Lz,1,MPI_DOUBLE,0,comm);
//.................................................
MPI_Barrier(comm);
RESTART_INTERVAL=interval;
// **************************************************************
// **************************************************************
double rlx = 1.f/tau;
if (nprocs != nprocx*nprocy*nprocz){
printf("nprocx = %i \n",nprocx);
printf("nprocy = %i \n",nprocy);
printf("nprocz = %i \n",nprocz);
INSIST(nprocs == nprocx*nprocy*nprocz,"Fatal error in processor count!");
}
if (rank==0){
printf("********************************************************\n");
printf("tau = %f \n", tau);
printf("Force(x) = %.5g \n", Fx);
printf("Force(y) = %.5g \n", Fy);
printf("Force(z) = %.5g \n", Fz);
printf("Sub-domain size = %i x %i x %i\n",Nx,Ny,Nz);
printf("Process grid = %i x %i x %i\n",nprocx,nprocy,nprocz);
printf("********************************************************\n");
}
double viscosity=(tau-0.5)/3.0;
// Initialized domain and averaging framework for Two-Phase Flow
int BC=pBC;
Domain Dm(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BC);
for (i=0; i<Dm.Nx*Dm.Ny*Dm.Nz; i++) Dm.id[i] = 1;
Dm.CommInit();
TwoPhase Averages(Dm);
// Mask that excludes the solid phase
Domain Mask(Nx,Ny,Nz,rank,nprocx,nprocy,nprocz,Lx,Ly,Lz,BC);
MPI_Barrier(comm);
Nx += 2; Ny += 2; Nz += 2;
int N = Nx*Ny*Nz;
//.......................................................................
if (rank == 0) printf("Read input media... \n");
//.......................................................................
//.......................................................................
// Filenames used
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
char tmpstr[10];
sprintf(LocalRankString,"%05d",rank);
sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString);
sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString);
// printf("Local File Name = %s \n",LocalRankFilename);
// .......... READ THE INPUT FILE .......................................
// char value;
char *id;
id = new char[N];
double sum, sum_local;
double iVol_global = 1.0/(1.0*(Nx-2)*(Ny-2)*(Nz-2)*nprocs);
//if (BoundaryCondition > 0) iVol_global = 1.0/(1.0*(Nx-2)*nprocx*(Ny-2)*nprocy*((Nz-2)*nprocz-6));
double porosity, pore_vol;
//...........................................................................
if (rank == 0) cout << "Reading in domain from signed distance function..." << endl;
//.......................................................................
// Read the signed distance
sprintf(LocalRankString,"%05d",rank);
sprintf(LocalRankFilename,"%s%s","SignDist.",LocalRankString);
ReadBinaryFile(LocalRankFilename, Averages.SDs.data(), N);
MPI_Barrier(comm);
if (rank == 0) cout << "Domain set." << endl;
//.......................................................................
// Assign the phase ID field based on the signed distance
//.......................................................................
for (k=0;k<Nz;k++){
for (j=0;j<Ny;j++){
for (i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
id[n] = 0;
}
}
}
sum=0.f;
pore_vol = 0.0;
for ( k=0;k<Nz;k++){
for ( j=0;j<Ny;j++){
for ( i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
if (Averages.SDs(n) > 0.0){
id[n] = 2;
}
// compute the porosity (actual interface location used)
if (Averages.SDs(n) > 0.0){
sum++;
}
}
}
}
if (rank==0) printf("Initialize from segmented data: solid=0, NWP=1, WP=2 \n");
sprintf(LocalRankFilename,"ID.%05i",rank);
size_t readID;
FILE *IDFILE = fopen(LocalRankFilename,"rb");
if (IDFILE==NULL) ERROR("lbpm_permeability_simulator: Error opening file: ID.xxxxx");
readID=fread(id,1,N,IDFILE);
if (readID != size_t(N)) printf("lbpm_permeability_simulator: Error reading ID (rank=%i) \n",rank);
fclose(IDFILE);
//.......................................................................
// Compute the media porosity, assign phase labels and solid composition
//.......................................................................
sum_local=0.0;
int Np=0; // number of local pore nodes
//.......................................................................
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n = k*Nx*Ny+j*Nx+i;
if (id[n] > 0){
sum_local+=1.0;
Np++;
}
}
}
}
MPI_Allreduce(&sum_local,&sum,1,MPI_DOUBLE,MPI_SUM,comm);
porosity = sum*iVol_global;
if (rank==0) printf("Media porosity = %f \n",porosity);
//.........................................................
// don't perform computations at the eight corners
id[0] = id[Nx-1] = id[(Ny-1)*Nx] = id[(Ny-1)*Nx + Nx-1] = 0;
id[(Nz-1)*Nx*Ny] = id[(Nz-1)*Nx*Ny+Nx-1] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx] = id[(Nz-1)*Nx*Ny+(Ny-1)*Nx + Nx-1] = 0;
//.........................................................
MPI_Barrier(comm);
// Initialize communication structures in averaging domain
for (i=0; i<Mask.Nx*Mask.Ny*Mask.Nz; i++) Mask.id[i] = id[i];
Mask.CommInit(comm);
//...........................................................................
if (rank==0) printf ("Create ScaLBL_Communicator \n");
// Create a communicator for the device
int Npad=(Np/16 + 2)*16;
ScaLBL_Communicator ScaLBL_Comm(Mask);
int *neighborList;
IntArray Map(Nx,Ny,Nz);
neighborList= new int[18*Npad];
Np = ScaLBL_Comm.MemoryOptimizedLayoutAA(Map,neighborList,Mask.id,Np);
MPI_Barrier(comm);
// LBM variables
if (rank==0) printf ("Allocating distributions \n");
//......................device distributions.................................
int dist_mem_size = Np*sizeof(double);
int neighborSize=18*(Np*sizeof(int));
int *NeighborList;
// double *f_even,*f_odd;
double * dist;
double * Velocity;
double * Pressure;
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &dist, 19*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &Velocity, 3*sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &Pressure, 3*sizeof(double)*Np);
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
//...........................................................................
//...........................................................................
if (rank==0) printf("Setting the distributions, size = %i\n", N);
//...........................................................................
// Finalize setup for averaging domain
//Averages.SetupCubes(Dm);
Averages.UpdateSolid();
// Initialize two phase flow variables (all wetting phase)
for (k=0;k<Nz;k++){
for (j=0;j<Ny;j++){
for (i=0;i<Nx;i++){
n=k*Nx*Ny+j*Nx+i;
Averages.Phase(i,j,k) = -1.0;
Averages.SDn(i,j,k) = Averages.Phase(i,j,k);
Averages.Phase_tplus(i,j,k) = Averages.SDn(i,j,k);
Averages.Phase_tminus(i,j,k) = Averages.SDn(i,j,k);
Averages.DelPhi(i,j,k) = 0.0;
Averages.Press(i,j,k) = 0.0;
Averages.Vel_x(i,j,k) = 0.0;
Averages.Vel_y(i,j,k) = 0.0;
Averages.Vel_z(i,j,k) = 0.0;
}
}
}
//.......................................................................
ScaLBL_D3Q19_Init(dist, Np);
int timestep = 0;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("No. of timesteps: %i \n", timestepMax);
//.......create and start timer............
double starttime,stoptime,cputime;
MPI_Barrier(comm);
starttime = MPI_Wtime();
//.........................................
double D32,Fo,Re,velocity,err1D,mag_force,vel_prev;
err = vel_prev = 1.0;
if (rank==0) printf("Begin timesteps: error tolerance is %f \n", tol);
//************ MAIN ITERATION LOOP ***************************************/
while (timestep < timestepMax && err > tol ){
timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAeven_BGK(dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
//************************************************************************/
if (timestep%500 == 0){
//...........................................................................
// Copy the data for for the analysis timestep
//...........................................................................
// Copy the phase from the GPU -> CPU
//...........................................................................
ScaLBL_DeviceBarrier();
ScaLBL_D3Q19_Pressure(dist,Pressure,Np);
ScaLBL_D3Q19_Momentum(dist,Velocity,Np);
ScaLBL_Comm.RegularLayout(Map,Pressure,Averages.Press);
ScaLBL_Comm.RegularLayout(Map,&Velocity[0],Averages.Vel_x);
ScaLBL_Comm.RegularLayout(Map,&Velocity[Np],Averages.Vel_y);
ScaLBL_Comm.RegularLayout(Map,&Velocity[2*Np],Averages.Vel_z);
// Way more work than necessary -- this is just to get the solid interfacial area!!
Averages.Initialize();
Averages.UpdateMeshValues();
Averages.ComputeLocal();
Averages.Reduce();
double vawx = Averages.vaw_global(0);
double vawy = Averages.vaw_global(1);
double vawz = Averages.vaw_global(2);
if (rank==0){
// ************* DIMENSIONLESS FORCHEIMER EQUATION *************************
// Dye, A.L., McClure, J.E., Gray, W.G. and C.T. Miller
// Description of Non-Darcy Flows in Porous Medium Systems
// Physical Review E 87 (3), 033012
// Fo := density*D32^3*(density*force) / (viscosity^2)
// Re := density*D32*velocity / viscosity
// Fo = a*Re + b*Re^2
// *************************************************************************
//viscosity = (tau-0.5)*0.333333333333333333;
D32 = 6.0*(Dm.Volume-Averages.vol_w_global)/Averages.As_global;
printf("Sauter Mean Diameter = %f \n",D32);
mag_force = sqrt(Fx*Fx+Fy*Fy+Fz*Fz);
Fo = D32*D32*D32*mag_force/viscosity/viscosity;
// .... 1-D flow should be aligned with force ...
velocity = vawx*Fx/mag_force + vawy*Fy/mag_force + vawz*Fz/mag_force;
err1D = fabs(velocity-sqrt(vawx*vawx+vawy*vawy+vawz*vawz))/velocity;
//.......... Computation of the Reynolds number Re ..............
Re = D32*velocity/viscosity;
printf("Force: %.5g,%.5g,%.5g \n",Fx,Fy,Fz);
printf("Velocity: %.5g,%.5g,%.5g \n",vawx,vawy,vawz);
printf("Relative error for 1D representation: %.5g \n",err1D);
printf("Dimensionless force: %5g \n", Fo);
printf("Reynolds number: %.5g \n", Re);
printf("Dimensionless Permeability (k/D^2): %.5g \n", Re/Fo);
}
}
}
//************************************************************************/
// Initialize compute device
int device=ScaLBL_SetDevice(rank);
NULL_USE( device );
ScaLBL_DeviceBarrier();
MPI_Barrier(comm);
stoptime = MPI_Wtime();
if (rank==0) printf("-------------------------------------------------------------------\n");
// Compute the walltime per timestep
cputime = (stoptime - starttime)/timestep;
// Performance obtained from each node
double MLUPS = double(Np)/cputime/1000000;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("CPU time = %f \n", cputime);
if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank==0) printf("********************************************************\n");
NULL_USE(RESTART_INTERVAL);
comm.barrier();
ScaLBL_BGKModel BGK(rank,nprocs,comm);
auto filename = argv[1];
BGK.ReadParams(filename);
BGK.SetDomain(); // this reads in the domain
BGK.ReadInput();
BGK.Create(); // creating the model will create data structure to match the pore structure and allocate variables
BGK.Initialize(); // initializing the model will set initial conditions for variables
BGK.Run();
BGK.VelocityField();
cout << flush;
}
// ****************************************************
comm.barrier();
Utilities::shutdown();
// ****************************************************
Utilities::shutdown();
}

View File

@ -0,0 +1,156 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include <math.h>
#include "models/IonModel.h"
#include "models/StokesModel.h"
#include "models/PoissonSolver.h"
#include "models/MultiPhysController.h"
#include "common/Utilities.h"
#include "analysis/ElectroChemistry.h"
using namespace std;
//***************************************************************************
// Test lattice-Boltzmann Ion Model coupled with Poisson equation
//***************************************************************************
int main(int argc, char **argv)
{
// Initialize MPI and error handlers
Utilities::startup( argc, argv );
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
if (rank == 0){
printf("********************************************************\n");
printf("Running LBPM electrokinetic single-fluid solver \n");
printf("********************************************************\n");
}
// Initialize compute device
int device=ScaLBL_SetDevice(rank);
NULL_USE( device );
ScaLBL_DeviceBarrier();
comm.barrier();
PROFILE_ENABLE(1);
//PROFILE_ENABLE_TRACE();
//PROFILE_ENABLE_MEMORY();
PROFILE_SYNCHRONIZE();
PROFILE_START("Main");
Utilities::setErrorHandlers();
auto filename = argv[1];
ScaLBL_StokesModel StokesModel(rank,nprocs,comm);
ScaLBL_IonModel IonModel(rank,nprocs,comm);
ScaLBL_Poisson PoissonSolver(rank,nprocs,comm);
ScaLBL_Multiphys_Controller Study(rank,nprocs,comm);//multiphysics controller coordinating multi-model coupling
bool SlipBC = false;
// Load controller information
Study.ReadParams(filename);
// Load user input database files for Navier-Stokes and Ion solvers
StokesModel.ReadParams(filename);
// Setup other model specific structures
StokesModel.SetDomain();
StokesModel.ReadInput();
StokesModel.Create(); // creating the model will create data structure to match the pore structure and allocate variables
comm.barrier();
if (rank == 0) printf("Stokes model setup complete\n");
IonModel.ReadParams(filename);
IonModel.SetDomain();
IonModel.ReadInput();
IonModel.Create();
IonModel.SetMembrane();
comm.barrier();
if (rank == 0) printf("Ion model setup complete\n");
fflush(stdout);
// Create analysis object
ElectroChemistryAnalyzer Analysis(IonModel.Dm);
// Get internal iteration number
StokesModel.timestepMax = Study.getStokesNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
StokesModel.Initialize(); // initializing the model will set initial conditions for variables
comm.barrier();
if (rank == 0) printf("Stokes model initialized \n");
fflush(stdout);
IonModel.timestepMax = Study.getIonNumIter_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
IonModel.Initialize();
comm.barrier();
if (rank == 0) printf("Ion model initialized \n");
// Get maximal time converting factor based on Sotkes and Ion solvers
Study.getTimeConvMax_PNP_coupling(StokesModel.time_conv,IonModel.time_conv);
// Initialize LB-Poisson model
PoissonSolver.ReadParams(filename);
PoissonSolver.SetDomain();
PoissonSolver.ReadInput();
PoissonSolver.Create();
comm.barrier();
if (rank == 0) printf("Poisson solver created \n");
fflush(stdout);
PoissonSolver.Initialize(Study.time_conv_max);
comm.barrier();
if (rank == 0) printf("Poisson solver initialized \n");
fflush(stdout);
int timestep=0;
while (timestep < Study.timestepMax){
timestep++;
PoissonSolver.Run(IonModel.ChargeDensity,SlipBC,timestep);//solve Poisson equtaion to get steady-state electrical potental
comm.barrier();
//if (rank == 0) printf(" Poisson step %i \n",timestep);
StokesModel.Run_Lite(IonModel.ChargeDensity, PoissonSolver.ElectricField);// Solve the N-S equations to get velocity
//fflush(stdout);
IonModel.RunMembrane(StokesModel.Velocity,PoissonSolver.ElectricField,PoissonSolver.Psi); //solve for ion transport with membrane
comm.barrier();
//if (rank == 0) printf(" Membrane step %i \n",timestep);
//fflush(stdout);
timestep++;//AA operations
if (timestep%Study.analysis_interval==0){
Analysis.Basic(IonModel,PoissonSolver,StokesModel,timestep);
}
if (timestep%Study.visualization_interval==0){
Analysis.WriteVis(IonModel,PoissonSolver,StokesModel,Study.db,timestep);
// PoissonSolver.getElectricPotential(timestep);
//PoissonSolver.getElectricField(timestep);
//IonModel.getIonConcentration(timestep);
//StokesModel.getVelocity(timestep);
}
}
if (rank==0) printf("Save simulation raw data at maximum timestep\n");
Analysis.WriteVis(IonModel,PoissonSolver,StokesModel,Study.db,timestep);
if (rank==0) printf("Maximum timestep is reached and the simulation is completed\n");
if (rank==0) printf("*************************************************************\n");
PROFILE_STOP("Main");
PROFILE_SAVE("lbpm_electrokinetic_SingleFluid_simulator",1);
// ****************************************************
} // Limit scope so variables that contain communicators will free before MPI_Finialize
Utilities::shutdown();
}

View File

@ -23,7 +23,7 @@ int main( int argc, char **argv )
{
// Initialize
Utilities::startup( argc, argv );
Utilities::startup( argc, argv, true );
{ // Limit scope so variables that contain communicators will free before MPI_Finialize
@ -198,7 +198,7 @@ int main( int argc, char **argv )
} // Limit scope so variables that contain communicators will free before MPI_Finialize
cout << flush;
Utilities::shutdown();
return 0;
}

View File

@ -24,7 +24,7 @@ using namespace std;
int main(int argc, char **argv)
{
// Initialize MPI
Utilities::startup( argc, argv );
Utilities::startup( argc, argv, false );
Utilities::MPI comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int nprocs = comm.getSize();
@ -49,6 +49,7 @@ int main(int argc, char **argv)
MRT.Initialize(); // initializing the model will set initial conditions for variables
MRT.Run();
MRT.VelocityField();
cout << flush;
}
Utilities::shutdown();
}

View File

@ -10,6 +10,7 @@
#include "common/UnitTest.h"
#include "common/Utilities.h"
#include "common/Utilities.hpp"
#include "common/ScaLBL.h"
#include "ProfilerApp.h"
@ -20,7 +21,6 @@
#include <sched.h>
#endif
#undef MPI_CLASS
#define MPI_CLASS Utilities::MPI
#define MPI_ASSERT ASSERT
@ -1195,6 +1195,144 @@ void testCommDup( UnitTest *ut )
#endif
}
class gpuWrapper{
public:
gpuWrapper(MPI_CLASS MPI_COMM, int MSG_SIZE);
~gpuWrapper();
void Send(double *values);
void Recv(double *values);
double *sendbuf, *recvbuf;
private:
MPI_Request req1[1],req2[1];
MPI_CLASS comm;
int sendCount;
int recvCount;
int rank, rank_x, rank_X, nprocs;
int sendtag, recvtag;
};
gpuWrapper::gpuWrapper(MPI_CLASS MPI_COMM, int MSG_SIZE){
comm = MPI_COMM.dup();
rank = comm.getRank();
nprocs = comm.getSize();
sendCount=MSG_SIZE;
recvCount=MSG_SIZE;
ScaLBL_AllocateZeroCopy((void **) &sendbuf, sendCount*sizeof(double)); // Allocate device memory
ScaLBL_AllocateZeroCopy((void **) &recvbuf, sendCount*sizeof(double)); // Allocate device memory
rank_X = rank+1;
rank_x = rank-1;
if (rank_x < 0) rank_x = nprocs-1;
if (!(rank_X < nprocs)) rank_X = 0;
}
gpuWrapper::~gpuWrapper(){
ScaLBL_FreeDeviceMemory(sendbuf);
ScaLBL_FreeDeviceMemory(recvbuf);
}
void gpuWrapper::Send(double *values){
sendtag = recvtag = 130;
ScaLBL_CopyToDevice(sendbuf,values,sendCount*sizeof(double));
req1[0] = comm.Isend(sendbuf,sendCount,rank_x,sendtag+0);
req2[0] = comm.Irecv(recvbuf,recvCount,rank_X,recvtag+0);
}
void gpuWrapper::Recv(double *values){
comm.waitAll(1,req1);
comm.waitAll(1,req2);
ScaLBL_DeviceBarrier();
ScaLBL_CopyToHost(values,recvbuf,recvCount*sizeof(double));
}
// Test GPU aware MPI
void test_GPU_aware( UnitTest *ut )
{
constexpr size_t N = 1024*1024;
constexpr size_t N_msg = 64;
bool test = true;
// Get the comm to use
MPI_CLASS comm( MPI_COMM_WORLD );
int rank = comm.getRank();
int size = comm.getSize();
try {
// Initialize the device
int device = ScaLBL_SetDevice(rank);
NULL_USE( device );
// create wrapper for communications
gpuWrapper gpuComm(comm, N);
// Allocate and initialize the buffers
size_t bytes = N*sizeof(double);
double *device_send[N_msg] = { nullptr };
double *device_recv[N_msg] = { nullptr };
double *host_send[N_msg] = { nullptr };
double *host_recv[N_msg] = { nullptr };
for ( size_t k=0; k<N_msg; k++ ) {
ScaLBL_AllocateDeviceMemory((void**)&device_send[k],bytes);
ScaLBL_AllocateDeviceMemory((void**)&device_recv[k],bytes);
host_send[k] = new double[N];
host_recv[k] = new double[N];
// Initialize the data
for ( size_t i=0; i<N; i++ ) {
host_send[k][i] = 1000 * k * rank + i;
host_recv[k][i] = 0;
}
ScaLBL_CopyToDevice(device_send[k],host_send[k],bytes);
ScaLBL_CopyToDevice(device_recv[k],host_recv[k],bytes);
}
ScaLBL_DeviceBarrier();
// Send/recieve the data
int rank_send = ( rank + 1 ) % size;
int rank_recv = ( rank - 1 + size ) % size;
MPI_Request req1[N_msg];
MPI_Request req2[N_msg];
for ( size_t k=0; k<N_msg; k++ ) {
req1[k] = comm.Isend( device_send[k], N, rank_send, k );
req2[k] = comm.Irecv( device_recv[k], N, rank_recv, k );
}
comm.waitAll(N_msg,req1);
comm.waitAll(N_msg,req2);
// Copy
for ( size_t k=0; k<N_msg; k++ ) {
ScaLBL_CopyToHost(host_send[k],device_send[k],bytes);
ScaLBL_CopyToHost(host_recv[k],device_recv[k],bytes);
}
ScaLBL_DeviceBarrier();
// Check the data
for ( size_t k=0; k<N_msg; k++ ) {
for ( size_t i=0; i<N; i++ )
test = test && host_recv[k][i] == 1000 * k * rank_recv + i;
}
// Check the gpu wrapper communications the same way
for ( size_t k=0; k<N_msg; k++ ) {
gpuComm.Send(host_send[k]);
gpuComm.Recv(host_recv[k]);
}
// Check the data
for ( size_t k=0; k<N_msg; k++ ) {
for ( size_t i=0; i<N; i++ )
test = test && host_recv[k][i] == 1000 * k * rank_recv + i;
}
// Free buffers
for ( size_t k=0; k<N_msg; k++ ) {
ScaLBL_FreeDeviceMemory(device_send[k]);
ScaLBL_FreeDeviceMemory(device_recv[k]);
delete [] host_send[k];
delete [] host_recv[k];
}
} catch ( ... ) {
test = false;
}
comm.barrier();
if ( test ) {
std::cout << "MPI is GPU aware" << std::endl;
ut->passes("GPU aware MPI" );
} else {
std::cout << "MPI is NOT GPU aware" << std::endl;
ut->failure("GPU aware MPI" );
}
}
// This test will test the MPI class
int main( int argc, char *argv[] )
@ -1513,6 +1651,9 @@ int main( int argc, char *argv[] )
std::cout << std::endl;
}
// Test GPU aware MPI
test_GPU_aware( &ut );
} // Limit the scope so objects are destroyed
// Finished testing, report the results