fix minor merge conflict

This commit is contained in:
James McClure 2022-08-15 05:17:47 -04:00
commit d69bff263c
108 changed files with 13083 additions and 4364 deletions

View File

@ -16,8 +16,7 @@ jobs:
LBPM_SILO_DIR: /home/runner/extlib/silo
MPI_DIR: /home/runner/.openmpi
steps:
steps:
- name: download dependencies
run: |
echo $LBPM_ZLIB_DIR

View File

@ -124,21 +124,7 @@ IF ( USE_CUDA )
ADD_DEFINITIONS( -DUSE_CUDA )
ENABLE_LANGUAGE( CUDA )
ELSEIF ( USE_HIP )
IF ( NOT DEFINED HIP_PATH )
IF ( NOT DEFINED ENV{HIP_PATH} )
SET( HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed" )
ELSE()
SET( HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed" )
ENDIF()
ENDIF()
SET( CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH} )
FIND_PACKAGE( HIP REQUIRED )
FIND_PACKAGE( CUDA QUIET )
MESSAGE( "HIP Found")
MESSAGE( " HIP version: ${HIP_VERSION_STRING}")
MESSAGE( " HIP platform: ${HIP_PLATFORM}")
MESSAGE( " HIP Include Path: ${HIP_INCLUDE_DIRS}")
MESSAGE( " HIP Libraries: ${HIP_LIBRARIES}")
ENABLE_LANGUAGE( HIP )
ADD_DEFINITIONS( -DUSE_HIP )
ENDIF()
@ -180,8 +166,7 @@ IF ( NOT ONLY_BUILD_DOCS )
IF ( USE_CUDA )
ADD_PACKAGE_SUBDIRECTORY( cuda )
ELSEIF ( USE_HIP )
ADD_SUBDIRECTORY( hip )
SET( LBPM_LIBRARIES lbpm-hip lbpm-wia )
ADD_PACKAGE_SUBDIRECTORY( hip )
ELSE()
ADD_PACKAGE_SUBDIRECTORY( cpu )
ENDIF()
@ -190,5 +175,6 @@ IF ( NOT ONLY_BUILD_DOCS )
ADD_SUBDIRECTORY( example )
#ADD_SUBDIRECTORY( workflows )
INSTALL_PROJ_LIB()
CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/ValgrindSuppresionFile ${CMAKE_CURRENT_BINARY_DIR}/test/ValgrindSuppresionFile COPYONLY )
ENDIF()

View File

@ -1,5 +1,4 @@
#include "IO/PackData.h"
#include <string>

View File

@ -5,7 +5,7 @@
#include <map>
#include <set>
#include <vector>
#include <cstddef>
//! Template function to return the buffer size required to pack a class
template<class TYPE>

View File

@ -1263,7 +1263,7 @@ static int backtrace_thread(
if ( tid == pthread_self() ) {
count = ::backtrace( buffer, size );
} else {
// Note: this will get the backtrace, but terminates the thread in the process!!!
// Send a signal to the desired thread to get the call stack
StackTrace_mutex.lock();
struct sigaction sa;
sigfillset( &sa.sa_mask );

View File

@ -1,52 +1,225 @@
# ACML suppressions
# To run valgrind:
# mpirun -np 2 valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes --verbose --suppressions=ValgrindSuppresionFile --log-file=valgrind-out.txt ./lbpm_nernst_planck_cell_simulator test.db
# MPI supressions
{
IdentifyCPUCond
MPI_init_cond
Memcheck:Cond
...
fun:acmlcpuid2
fun:PMPI_Init
...
}
{
IdentifyCPUValue
MPI_init_value
Memcheck:Value8
...
fun:acmlcpuid_once
fun:acmlcpuid2
fun:PMPI_Init
...
}
# MPI suppressions
{
HYD_pmci_wait_for_completion
Memcheck:Leak
...
fun:HYD_pmci_wait_for_completion
fun:main
}
{
HYDT_dmxu_poll_wait_for_event
Memcheck:Leak
...
fun:HYDT_dmxu_poll_wait_for_event
fun:main
}
{
PMPI_Init
Memcheck:Leak
MPI_init_addr16
Memcheck:Addr16
...
fun:PMPI_Init
fun:main
...
}
{
MPI_init_addr8
Memcheck:Addr8
...
fun:PMPI_Init
...
}
{
MPI_init_addr4
Memcheck:Addr4
...
fun:PMPI_Init
...
}
{
MPI_init_addr1
Memcheck:Addr1
...
fun:PMPI_Init
...
}
{
gethostname_cond
Memcheck:Cond
...
fun:gethostbyname_r
fun:gethostbyname
...
}
{
gethostname_value
Memcheck:Value8
...
fun:gethostbyname_r
fun:gethostbyname
...
}
# System suppressions
# System errors
{
map_doit_memory
Memcheck:Cond
fun:index
fun:expand_dynamic_string_token
fun:_dl_map_object
fun:map_doit
fun:_dl_catch_error
...
}
{
expand_dynamic_string_token
Memcheck:Cond
fun:index
fun:expand_dynamic_string_token
...
fun:dl_main
fun:_dl_sysdep_start
fun:_dl_start
...
}
{
call_init
Memcheck:Leak
match-leak-kinds: reachable
...
fun:call_init
fun:_dl_init
...
}
# pthread errors
{
pthread_initialize_param
Memcheck:Param
set_robust_list(head)
fun:__pthread_initialize_minimal
fun:(below main)
}
{
pthread_initialize_cond
Memcheck:Cond
fun:__register_atfork
fun:__libc_pthread_init
fun:__pthread_initialize_minimal
fun:(below main)
}
# gfortran
{
gfortran_leak
Memcheck:Leak
match-leak-kinds: reachable
fun:malloc
obj:/usr/lib/x86_64-linux-gnu/libgfortran.so.3.0.0
...
}
# std
{
libc_cond
Memcheck:Cond
...
fun:_dl_init_paths
fun:_dl_non_dynamic_init
fun:__libc_init_first
fun:(below main)
}
{
libc_val8
Memcheck:Value8
...
fun:_dl_init_paths
fun:_dl_non_dynamic_init
fun:__libc_init_first
fun:(below main)
}
{
mallinfo_cond
Memcheck:Cond
fun:int_mallinfo
fun:mallinfo
...
}
{
mallinfo_value
Memcheck:Value8
fun:int_mallinfo
fun:mallinfo
...
}
{
int_free_cond
Memcheck:Cond
fun:_int_free
...
}
{
string_len_cond
Memcheck:Cond
fun:strlen
...
}
{
int_malloc_cond
Memcheck:Cond
fun:_int_malloc
fun:malloc
...
}
{
malloc_consolidate_malloc
Memcheck:Cond
fun:malloc_consolidate
fun:_int_malloc
...
}
{
malloc_consolidate_free
Memcheck:Cond
fun:malloc_consolidate
fun:_int_free
...
}
{
catch_cond
Memcheck:Cond
fun:__cxa_begin_catch
...
}
{
popen
Memcheck:Param
set_robust_list(head)
fun:__nptl_set_robust
fun:__libc_fork
fun:_IO_proc_open
fun:popen
...
}
{
exit
Memcheck:Value8
fun:__run_exit_handlers
...
fun:exit
...
}
{
sse42
Memcheck:Cond
fun:__strstr_sse42
...
}

View File

@ -49,7 +49,7 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr<Domain> dm)
IonFluxElectrical_y.fill(0);
IonFluxElectrical_z.resize(Nx, Ny, Nz);
IonFluxElectrical_z.fill(0);
if (Dm->rank() == 0) {
bool WriteHeader = false;
TIMELOG = fopen("electrokinetic.csv", "r");
@ -67,6 +67,87 @@ ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(std::shared_ptr<Domain> dm)
}
}
ElectroChemistryAnalyzer::ElectroChemistryAnalyzer(ScaLBL_IonModel &IonModel)
: Dm(IonModel.Dm) {
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Volume = (Nx - 2) * (Ny - 2) * (Nz - 2) * Dm->nprocx() * Dm->nprocy() *
Dm->nprocz() * 1.0;
if (Dm->rank()==0) printf("Analyze system with sub-domain size = %i x %i x %i \n",Nx,Ny,Nz);
USE_MEMBRANE = IonModel.USE_MEMBRANE;
ChemicalPotential.resize(Nx, Ny, Nz);
ChemicalPotential.fill(0);
ElectricalPotential.resize(Nx, Ny, Nz);
ElectricalPotential.fill(0);
ElectricalField_x.resize(Nx, Ny, Nz);
ElectricalField_x.fill(0);
ElectricalField_y.resize(Nx, Ny, Nz);
ElectricalField_y.fill(0);
ElectricalField_z.resize(Nx, Ny, Nz);
ElectricalField_z.fill(0);
Pressure.resize(Nx, Ny, Nz);
Pressure.fill(0);
Rho.resize(Nx, Ny, Nz);
Rho.fill(0);
Vel_x.resize(Nx, Ny, Nz);
Vel_x.fill(0); // Gradient of the phase indicator field
Vel_y.resize(Nx, Ny, Nz);
Vel_y.fill(0);
Vel_z.resize(Nx, Ny, Nz);
Vel_z.fill(0);
SDs.resize(Nx, Ny, Nz);
SDs.fill(0);
IonFluxDiffusive_x.resize(Nx, Ny, Nz);
IonFluxDiffusive_x.fill(0);
IonFluxDiffusive_y.resize(Nx, Ny, Nz);
IonFluxDiffusive_y.fill(0);
IonFluxDiffusive_z.resize(Nx, Ny, Nz);
IonFluxDiffusive_z.fill(0);
IonFluxAdvective_x.resize(Nx, Ny, Nz);
IonFluxAdvective_x.fill(0);
IonFluxAdvective_y.resize(Nx, Ny, Nz);
IonFluxAdvective_y.fill(0);
IonFluxAdvective_z.resize(Nx, Ny, Nz);
IonFluxAdvective_z.fill(0);
IonFluxElectrical_x.resize(Nx, Ny, Nz);
IonFluxElectrical_x.fill(0);
IonFluxElectrical_y.resize(Nx, Ny, Nz);
IonFluxElectrical_y.fill(0);
IonFluxElectrical_z.resize(Nx, Ny, Nz);
IonFluxElectrical_z.fill(0);
if (Dm->rank() == 0) {
printf("Set up analysis routines for %lu ions \n",IonModel.number_ion_species);
bool WriteHeader = false;
TIMELOG = fopen("electrokinetic.csv", "r");
if (TIMELOG != NULL)
fclose(TIMELOG);
else
WriteHeader = true;
TIMELOG = fopen("electrokinetic.csv", "a+");
if (WriteHeader) {
// If timelog is empty, write a short header to list the averages
//fprintf(TIMELOG,"--------------------------------------------------------------------------------------\n");
fprintf(TIMELOG, "timestep voltage_out voltage_in ");
fprintf(TIMELOG, "voltage_out_membrane voltage_in_membrane ");
for (size_t i=0; i<IonModel.number_ion_species; i++){
fprintf(TIMELOG, "rho_%lu_out rho_%lu_in ",i, i);
fprintf(TIMELOG, "rho_%lu_out_membrane rho_%lu_in_membrane ", i, i);
}
fprintf(TIMELOG, "count_out count_in ");
fprintf(TIMELOG, "count_out_membrane count_in_membrane\n");
}
}
}
ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer() {
if (Dm->rank() == 0) {
fclose(TIMELOG);
@ -75,6 +156,163 @@ ElectroChemistryAnalyzer::~ElectroChemistryAnalyzer() {
void ElectroChemistryAnalyzer::SetParams() {}
void ElectroChemistryAnalyzer::Membrane(ScaLBL_IonModel &Ion,
ScaLBL_Poisson &Poisson,
int timestep) {
int i, j, k;
Poisson.getElectricPotential(ElectricalPotential);
if (Dm->rank() == 0)
fprintf(TIMELOG, "%i ", timestep);
/* int iq, ip, nq, np, nqm, npm;
Ion.MembraneDistance(i,j,k); // inside (-) or outside (+) the ion
for (int link; link<Ion.IonMembrane->membraneLinkCount; link++){
int iq = Ion.IonMembrane->membraneLinks[2*link];
int ip = Ion.IonMembrane->membraneLinks[2*link+1];
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
nqm = Map[nq]; npm = Map[np];
}
*/
unsigned long int in_local_count, out_local_count;
unsigned long int in_global_count, out_global_count;
double value_in_local, value_out_local;
double value_in_global, value_out_global;
double value_membrane_in_local, value_membrane_out_local;
double value_membrane_in_global, value_membrane_out_global;
unsigned long int membrane_in_local_count, membrane_out_local_count;
unsigned long int membrane_in_global_count, membrane_out_global_count;
double memdist,value;
in_local_count = 0;
out_local_count = 0;
membrane_in_local_count = 0;
membrane_out_local_count = 0;
value_membrane_in_local = 0.0;
value_membrane_out_local = 0.0;
value_in_local = 0.0;
value_out_local = 0.0;
for (k = 1; k < Nz; k++) {
for (j = 1; j < Ny; j++) {
for (i = 1; i < Nx; i++) {
/* electric potential */
memdist = Ion.MembraneDistance(i,j,k);
value = ElectricalPotential(i,j,k);
if (memdist < 0.0){
// inside the membrane
if (fabs(memdist) < 1.0){
value_membrane_in_local += value;
membrane_in_local_count++;
}
value_in_local += value;
in_local_count++;
}
else {
// outside the membrane
if (fabs(memdist) < 1.0){
value_membrane_out_local += value;
membrane_out_local_count++;
}
value_out_local += value;
out_local_count++;
}
}
}
}
/* these only need to be computed the first time through */
out_global_count = Dm->Comm.sumReduce(out_local_count);
in_global_count = Dm->Comm.sumReduce(in_local_count);
membrane_out_global_count = Dm->Comm.sumReduce(membrane_out_local_count);
membrane_in_global_count = Dm->Comm.sumReduce(membrane_in_local_count);
value_out_global = Dm->Comm.sumReduce(value_out_local);
value_in_global = Dm->Comm.sumReduce(value_in_local);
value_membrane_out_global = Dm->Comm.sumReduce(value_membrane_out_local);
value_membrane_in_global = Dm->Comm.sumReduce(value_membrane_in_local);
value_out_global /= out_global_count;
value_in_global /= in_global_count;
value_membrane_out_global /= membrane_out_global_count;
value_membrane_in_global /= membrane_in_global_count;
if (Dm->rank() == 0) {
fprintf(TIMELOG, "%.8g ", value_out_global);
fprintf(TIMELOG, "%.8g ", value_in_global);
fprintf(TIMELOG, "%.8g ", value_membrane_out_global);
fprintf(TIMELOG, "%.8g ", value_membrane_in_global);
}
value_membrane_in_local = 0.0;
value_membrane_out_local = 0.0;
value_in_local = 0.0;
value_out_local = 0.0;
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
Ion.getIonConcentration(Rho, ion);
value_membrane_in_local = 0.0;
value_membrane_out_local = 0.0;
value_in_local = 0.0;
value_out_local = 0.0;
for (k = 1; k < Nz; k++) {
for (j = 1; j < Ny; j++) {
for (i = 1; i < Nx; i++) {
/* electric potential */
memdist = Ion.MembraneDistance(i,j,k);
value = Rho(i,j,k);
if (memdist < 0.0){
// inside the membrane
if (fabs(memdist) < 1.0){
value_membrane_in_local += value;
}
value_in_local += value;
}
else {
// outside the membrane
if (fabs(memdist) < 1.0){
value_membrane_out_local += value;
}
value_out_local += value;
}
}
}
}
value_out_global = Dm->Comm.sumReduce(value_out_local);
value_in_global = Dm->Comm.sumReduce(value_in_local);
value_membrane_out_global = Dm->Comm.sumReduce(value_membrane_out_local);
value_membrane_in_global = Dm->Comm.sumReduce(value_membrane_in_local);
value_out_global /= out_global_count;
value_in_global /= in_global_count;
value_membrane_out_global /= membrane_out_global_count;
value_membrane_in_global /= membrane_in_global_count;
if (Dm->rank() == 0) {
fprintf(TIMELOG, "%.8g ", value_out_global);
fprintf(TIMELOG, "%.8g ", value_in_global);
fprintf(TIMELOG, "%.8g ", value_membrane_out_global);
fprintf(TIMELOG, "%.8g ", value_membrane_in_global);
}
}
if (Dm->rank() == 0) {
fprintf(TIMELOG, "%lu ", out_global_count);
fprintf(TIMELOG, "%lu ", in_global_count);
fprintf(TIMELOG, "%lu ", membrane_out_global_count);
fprintf(TIMELOG, "%lu\n", membrane_in_global_count);
fflush(TIMELOG);
}
}
void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion,
ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes, int timestep) {
@ -595,3 +833,408 @@ void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion,
}
*/
}
void ElectroChemistryAnalyzer::Basic(ScaLBL_IonModel &Ion,
ScaLBL_Poisson &Poisson,
int timestep) {
int i, j, k;
double Vin = 0.0;
double Vout = 0.0;
Poisson.getElectricPotential(ElectricalPotential);
/* local sub-domain averages */
double *rho_avg_local;
double *rho_mu_avg_local;
double *rho_mu_fluctuation_local;
double *rho_psi_avg_local;
double *rho_psi_fluctuation_local;
/* global averages */
double *rho_avg_global;
double *rho_mu_avg_global;
double *rho_mu_fluctuation_global;
double *rho_psi_avg_global;
double *rho_psi_fluctuation_global;
/* Get the distance to the membrane */
if (Ion.USE_MEMBRANE){
//Ion.MembraneDistance;
}
/* local sub-domain averages */
rho_avg_local = new double[Ion.number_ion_species];
rho_mu_avg_local = new double[Ion.number_ion_species];
rho_mu_fluctuation_local = new double[Ion.number_ion_species];
rho_psi_avg_local = new double[Ion.number_ion_species];
rho_psi_fluctuation_local = new double[Ion.number_ion_species];
/* global averages */
rho_avg_global = new double[Ion.number_ion_species];
rho_mu_avg_global = new double[Ion.number_ion_species];
rho_mu_fluctuation_global = new double[Ion.number_ion_species];
rho_psi_avg_global = new double[Ion.number_ion_species];
rho_psi_fluctuation_global = new double[Ion.number_ion_species];
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
rho_avg_local[ion] = 0.0;
rho_mu_avg_local[ion] = 0.0;
rho_psi_avg_local[ion] = 0.0;
Ion.getIonConcentration(Rho, ion);
/* Compute averages for each ion */
for (k = 1; k < Nz; k++) {
for (j = 1; j < Ny; j++) {
for (i = 1; i < Nx; i++) {
rho_avg_local[ion] += Rho(i, j, k);
rho_mu_avg_local[ion] += Rho(i, j, k) * Rho(i, j, k);
rho_psi_avg_local[ion] +=
Rho(i, j, k) * ElectricalPotential(i, j, k);
}
}
}
rho_avg_global[ion] = Dm->Comm.sumReduce(rho_avg_local[ion]) / Volume;
rho_mu_avg_global[ion] =
Dm->Comm.sumReduce(rho_mu_avg_local[ion]) / Volume;
rho_psi_avg_global[ion] =
Dm->Comm.sumReduce(rho_psi_avg_local[ion]) / Volume;
if (rho_avg_global[ion] > 0.0) {
rho_mu_avg_global[ion] /= rho_avg_global[ion];
rho_psi_avg_global[ion] /= rho_avg_global[ion];
}
}
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
rho_mu_fluctuation_local[ion] = 0.0;
rho_psi_fluctuation_local[ion] = 0.0;
/* Compute averages for each ion */
for (k = 1; k < Nz; k++) {
for (j = 1; j < Ny; j++) {
for (i = 1; i < Nx; i++) {
rho_mu_fluctuation_local[ion] +=
(Rho(i, j, k) * Rho(i, j, k) - rho_mu_avg_global[ion]);
rho_psi_fluctuation_local[ion] +=
(Rho(i, j, k) * ElectricalPotential(i, j, k) -
rho_psi_avg_global[ion]);
}
}
}
rho_mu_fluctuation_global[ion] =
Dm->Comm.sumReduce(rho_mu_fluctuation_local[ion]);
rho_psi_fluctuation_global[ion] =
Dm->Comm.sumReduce(rho_psi_fluctuation_local[ion]);
}
if (Dm->rank() == 0) {
fprintf(TIMELOG, "%i ", timestep);
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
fprintf(TIMELOG, "%.8g ", rho_avg_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_mu_avg_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_psi_avg_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_mu_fluctuation_global[ion]);
fprintf(TIMELOG, "%.8g ", rho_psi_fluctuation_global[ion]);
}
fprintf(TIMELOG, "%.8g %.8g\n", Vin, Vout);
fflush(TIMELOG);
}
/* else{
fprintf(TIMELOG,"%i ",timestep);
for (int ion=0; ion<Ion.number_ion_species; ion++){
fprintf(TIMELOG,"%.8g ",rho_avg_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_avg_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_avg_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_mu_fluctuation_local[ion]);
fprintf(TIMELOG,"%.8g ",rho_psi_fluctuation_local[ion]);
}
fflush(TIMELOG);
} */
}
void ElectroChemistryAnalyzer::WriteVis(ScaLBL_IonModel &Ion,
ScaLBL_Poisson &Poisson,
std::shared_ptr<Database> input_db,
int timestep) {
auto vis_db = input_db->getDatabase("Visualization");
char VisName[40];
auto format = vis_db->getWithDefault<string>( "format", "hdf5" );
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm, Dm->rank_info,
{Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2}, {1, 1, 1},
0, 1);
IO::initialize("",format,"false");
// Create the MeshDataStruct
visData.resize(1);
visData[0].meshName = "domain";
visData[0].mesh =
std::make_shared<IO::DomainMesh>(Dm->rank_info, Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2, Dm->Lx, Dm->Ly, Dm->Lz);
//electric potential
auto ElectricPotentialVar = std::make_shared<IO::Variable>();
//electric field
auto ElectricFieldVar_x = std::make_shared<IO::Variable>();
auto ElectricFieldVar_y = std::make_shared<IO::Variable>();
auto ElectricFieldVar_z = std::make_shared<IO::Variable>();
//ion concentration
std::vector<shared_ptr<IO::Variable>> IonConcentration;
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
IonConcentration.push_back(std::make_shared<IO::Variable>());
}
// diffusive ion flux
std::vector<shared_ptr<IO::Variable>> IonFluxDiffusive;
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
//push in x-,y-, and z-component for each ion species
IonFluxDiffusive.push_back(std::make_shared<IO::Variable>());
IonFluxDiffusive.push_back(std::make_shared<IO::Variable>());
IonFluxDiffusive.push_back(std::make_shared<IO::Variable>());
}
// electro-migrational ion flux
std::vector<shared_ptr<IO::Variable>> IonFluxElectrical;
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
//push in x-,y-, and z-component for each ion species
IonFluxElectrical.push_back(std::make_shared<IO::Variable>());
IonFluxElectrical.push_back(std::make_shared<IO::Variable>());
IonFluxElectrical.push_back(std::make_shared<IO::Variable>());
}
//--------------------------------------------------------------------------------------------------------------------
//-------------------------------------Create Names for Variables------------------------------------------------------
if (vis_db->getWithDefault<bool>("save_electric_potential", true)) {
ElectricPotentialVar->name = "ElectricPotential";
ElectricPotentialVar->type = IO::VariableType::VolumeVariable;
ElectricPotentialVar->dim = 1;
ElectricPotentialVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(ElectricPotentialVar);
}
if (vis_db->getWithDefault<bool>("save_concentration", true)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
sprintf(VisName, "IonConcentration_%zu", ion + 1);
IonConcentration[ion]->name = VisName;
IonConcentration[ion]->type = IO::VariableType::VolumeVariable;
IonConcentration[ion]->dim = 1;
IonConcentration[ion]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonConcentration[ion]);
}
}
if (vis_db->getWithDefault<bool>("save_ion_flux_diffusive", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxDiffusive_x", ion + 1);
IonFluxDiffusive[3 * ion + 0]->name = VisName;
IonFluxDiffusive[3 * ion + 0]->type =
IO::VariableType::VolumeVariable;
IonFluxDiffusive[3 * ion + 0]->dim = 1;
IonFluxDiffusive[3 * ion + 0]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxDiffusive[3 * ion + 0]);
// y-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxDiffusive_y", ion + 1);
IonFluxDiffusive[3 * ion + 1]->name = VisName;
IonFluxDiffusive[3 * ion + 1]->type =
IO::VariableType::VolumeVariable;
IonFluxDiffusive[3 * ion + 1]->dim = 1;
IonFluxDiffusive[3 * ion + 1]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxDiffusive[3 * ion + 1]);
// z-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxDiffusive_z", ion + 1);
IonFluxDiffusive[3 * ion + 2]->name = VisName;
IonFluxDiffusive[3 * ion + 2]->type =
IO::VariableType::VolumeVariable;
IonFluxDiffusive[3 * ion + 2]->dim = 1;
IonFluxDiffusive[3 * ion + 2]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxDiffusive[3 * ion + 2]);
}
}
if (vis_db->getWithDefault<bool>("save_ion_flux_electrical", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of electro-migrational flux
sprintf(VisName, "Ion%zu_FluxElectrical_x", ion + 1);
IonFluxElectrical[3 * ion + 0]->name = VisName;
IonFluxElectrical[3 * ion + 0]->type =
IO::VariableType::VolumeVariable;
IonFluxElectrical[3 * ion + 0]->dim = 1;
IonFluxElectrical[3 * ion + 0]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxElectrical[3 * ion + 0]);
// y-component of electro-migrational flux
sprintf(VisName, "Ion%zu_FluxElectrical_y", ion + 1);
IonFluxElectrical[3 * ion + 1]->name = VisName;
IonFluxElectrical[3 * ion + 1]->type =
IO::VariableType::VolumeVariable;
IonFluxElectrical[3 * ion + 1]->dim = 1;
IonFluxElectrical[3 * ion + 1]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxElectrical[3 * ion + 1]);
// z-component of electro-migrational flux
sprintf(VisName, "Ion%zu_FluxElectrical_z", ion + 1);
IonFluxElectrical[3 * ion + 2]->name = VisName;
IonFluxElectrical[3 * ion + 2]->type =
IO::VariableType::VolumeVariable;
IonFluxElectrical[3 * ion + 2]->dim = 1;
IonFluxElectrical[3 * ion + 2]->data.resize(Dm->Nx - 2, Dm->Ny - 2,
Dm->Nz - 2);
visData[0].vars.push_back(IonFluxElectrical[3 * ion + 2]);
}
}
if (vis_db->getWithDefault<bool>("save_electric_field", false)) {
ElectricFieldVar_x->name = "ElectricField_x";
ElectricFieldVar_x->type = IO::VariableType::VolumeVariable;
ElectricFieldVar_x->dim = 1;
ElectricFieldVar_x->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(ElectricFieldVar_x);
ElectricFieldVar_y->name = "ElectricField_y";
ElectricFieldVar_y->type = IO::VariableType::VolumeVariable;
ElectricFieldVar_y->dim = 1;
ElectricFieldVar_y->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(ElectricFieldVar_y);
ElectricFieldVar_z->name = "ElectricField_z";
ElectricFieldVar_z->type = IO::VariableType::VolumeVariable;
ElectricFieldVar_z->dim = 1;
ElectricFieldVar_z->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(ElectricFieldVar_z);
}
//--------------------------------------------------------------------------------------------------------------------
//------------------------------------Save All Variables--------------------------------------------------------------
if (vis_db->getWithDefault<bool>("save_electric_potential", true)) {
ASSERT(visData[0].vars[0]->name == "ElectricPotential");
Poisson.getElectricPotential(ElectricalPotential);
Array<double> &ElectricPotentialData = visData[0].vars[0]->data;
fillData.copy(ElectricalPotential, ElectricPotentialData);
}
if (vis_db->getWithDefault<bool>("save_concentration", true)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
sprintf(VisName, "IonConcentration_%zu", ion + 1);
//IonConcentration[ion]->name = VisName;
ASSERT(visData[0].vars[1 + ion]->name == VisName);
Array<double> &IonConcentrationData =
visData[0].vars[1 + ion]->data;
Ion.getIonConcentration(Rho, ion);
fillData.copy(Rho, IonConcentrationData);
}
}
if (vis_db->getWithDefault<bool>("save_ion_flux_diffusive", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxDiffusive_x", ion + 1);
//IonFluxDiffusive[3*ion+0]->name = VisName;
ASSERT(visData[0]
.vars[4 + Ion.number_ion_species + 3 * ion + 0]
->name == VisName);
// y-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxDiffusive_y", ion + 1);
//IonFluxDiffusive[3*ion+1]->name = VisName;
ASSERT(visData[0]
.vars[4 + Ion.number_ion_species + 3 * ion + 1]
->name == VisName);
// z-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxDiffusive_z", ion + 1);
//IonFluxDiffusive[3*ion+2]->name = VisName;
ASSERT(visData[0]
.vars[4 + Ion.number_ion_species + 3 * ion + 2]
->name == VisName);
Array<double> &IonFluxData_x =
visData[0].vars[4 + Ion.number_ion_species + 3 * ion + 0]->data;
Array<double> &IonFluxData_y =
visData[0].vars[4 + Ion.number_ion_species + 3 * ion + 1]->data;
Array<double> &IonFluxData_z =
visData[0].vars[4 + Ion.number_ion_species + 3 * ion + 2]->data;
Ion.getIonFluxDiffusive(IonFluxDiffusive_x, IonFluxDiffusive_y,
IonFluxDiffusive_z, ion);
fillData.copy(IonFluxDiffusive_x, IonFluxData_x);
fillData.copy(IonFluxDiffusive_y, IonFluxData_y);
fillData.copy(IonFluxDiffusive_z, IonFluxData_z);
}
}
if (vis_db->getWithDefault<bool>("save_ion_flux_electrical", false)) {
for (size_t ion = 0; ion < Ion.number_ion_species; ion++) {
// x-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxElectrical_x", ion + 1);
//IonFluxDiffusive[3*ion+0]->name = VisName;
ASSERT(visData[0]
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 0]
->name == VisName);
// y-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxElectrical_y", ion + 1);
//IonFluxDiffusive[3*ion+1]->name = VisName;
ASSERT(visData[0]
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 1]
->name == VisName);
// z-component of diffusive flux
sprintf(VisName, "Ion%zu_FluxElectrical_z", ion + 1);
//IonFluxDiffusive[3*ion+2]->name = VisName;
ASSERT(visData[0]
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 2]
->name == VisName);
Array<double> &IonFluxData_x =
visData[0]
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 0]
->data;
Array<double> &IonFluxData_y =
visData[0]
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 1]
->data;
Array<double> &IonFluxData_z =
visData[0]
.vars[4 + Ion.number_ion_species * (1 + 6) + 3 * ion + 2]
->data;
Ion.getIonFluxElectrical(IonFluxElectrical_x, IonFluxElectrical_y,
IonFluxElectrical_z, ion);
fillData.copy(IonFluxElectrical_x, IonFluxData_x);
fillData.copy(IonFluxElectrical_y, IonFluxData_y);
fillData.copy(IonFluxElectrical_z, IonFluxData_z);
}
}
if (vis_db->getWithDefault<bool>("save_electric_field", false)) {
ASSERT(
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 0]->name ==
"ElectricField_x");
ASSERT(
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 1]->name ==
"ElectricField_y");
ASSERT(
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 2]->name ==
"ElectricField_z");
Poisson.getElectricField(ElectricalField_x, ElectricalField_y,
ElectricalField_z);
Array<double> &ElectricalFieldxData =
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 0]->data;
Array<double> &ElectricalFieldyData =
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 1]->data;
Array<double> &ElectricalFieldzData =
visData[0].vars[4 + Ion.number_ion_species * (1 + 9) + 2]->data;
fillData.copy(ElectricalField_x, ElectricalFieldxData);
fillData.copy(ElectricalField_y, ElectricalFieldyData);
fillData.copy(ElectricalField_z, ElectricalFieldzData);
}
if (vis_db->getWithDefault<bool>("write_silo", true))
IO::writeData(timestep, visData, Dm->Comm);
//--------------------------------------------------------------------------------------------------------------------
/* if (vis_db->getWithDefault<bool>( "save_8bit_raw", true )){
char CurrentIDFilename[40];
sprintf(CurrentIDFilename,"id_t%d.raw",timestep);
Averages.AggregateLabels(CurrentIDFilename);
}
*/
}

View File

@ -29,6 +29,8 @@ public:
double nu_n, nu_w;
double gamma_wn, beta;
double Fx, Fy, Fz;
bool USE_MEMBRANE;
//...........................................................................
int Nx, Ny, Nz;
@ -54,13 +56,16 @@ public:
DoubleArray IonFluxElectrical_z;
ElectroChemistryAnalyzer(std::shared_ptr<Domain> Dm);
ElectroChemistryAnalyzer( ScaLBL_IonModel &IonModel);
~ElectroChemistryAnalyzer();
void SetParams();
void Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes, int timestep);
void Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, ScaLBL_StokesModel &Stokes, int timestep);
void Membrane(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, int timestep);
void WriteVis(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes,std::shared_ptr<Database> input_db, int timestep);
void Basic(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson, int timestep);
void WriteVis(ScaLBL_IonModel &Ion, ScaLBL_Poisson &Poisson,
ScaLBL_StokesModel &Stokes,
std::shared_ptr<Database> input_db, int timestep);
private:

View File

@ -67,6 +67,7 @@ Minkowski::~Minkowski() {
void Minkowski::ComputeScalar(const DoubleArray &Field, const double isovalue) {
PROFILE_START("ComputeScalar");
Xi = Ji = Ai = 0.0;
DCEL object;
int e1, e2, e3;
@ -160,6 +161,7 @@ void Minkowski::MeasureObject() {
* 1 - labels the rest of the
*/
//DoubleArray smooth_distance(Nx,Ny,Nz);
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
@ -168,6 +170,8 @@ void Minkowski::MeasureObject() {
}
}
CalcDist(distance, id, *Dm);
Dm->CommunicateMeshHalo(distance);
//Mean3D(distance,smooth_distance);
//Eikonal(distance, id, *Dm, 20, {true, true, true});
ComputeScalar(distance, 0.0);
@ -179,7 +183,7 @@ void Minkowski::MeasureObject(double factor, const DoubleArray &Phi) {
*
* THIS ALGORITHM ASSUMES THAT id() is populated with phase id to distinguish objects
* 0 - labels the object
* 1 - labels the rest of the
* 1 - labels the rest
*/
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {

View File

@ -411,6 +411,7 @@ void SubPhase::Basic() {
dir_z = 1.0;
force_mag = 1.0;
}
double Porosity = (gwb.V + gnb.V)/Dm->Volume;
double saturation = gwb.V / (gwb.V + gnb.V);
double water_flow_rate =
gwb.V * (gwb.Px * dir_x + gwb.Py * dir_y + gwb.Pz * dir_z) / gwb.M /
@ -429,11 +430,11 @@ void SubPhase::Basic() {
//double total_flow_rate = water_flow_rate + not_water_flow_rate;
//double fractional_flow = water_flow_rate / total_flow_rate;
double h = Dm->voxel_length;
double krn = h * h * nu_n * not_water_flow_rate / force_mag;
double krw = h * h * nu_w * water_flow_rate / force_mag;
double krn = h * h * nu_n * Porosity * not_water_flow_rate / force_mag;
double krw = h * h * nu_w * Porosity* water_flow_rate / force_mag;
/* not counting films */
double krnf = krn - h * h * nu_n * not_water_film_flow_rate / force_mag;
double krwf = krw - h * h * nu_w * water_film_flow_rate / force_mag;
double krnf = krn - h * h * nu_n * Porosity * not_water_film_flow_rate / force_mag;
double krwf = krw - h * h * nu_w * Porosity * water_film_flow_rate / force_mag;
double eff_pressure = 1.0 / (krn + krw); // effective pressure drop
fprintf(TIMELOG,
@ -595,7 +596,7 @@ void SubPhase::Full() {
for (j = 0; j < Ny; j++) {
for (i = 0; i < Nx; i++) {
n = k * Nx * Ny + j * Nx + i;
if (!(Dm->id[n] > 0)) {
if (SDs(n) <= 0.0) {
// Solid phase
morph_n->id(i, j, k) = 1;
@ -642,7 +643,7 @@ void SubPhase::Full() {
for (j = 0; j < Ny; j++) {
for (i = 0; i < Nx; i++) {
n = k * Nx * Ny + j * Nx + i;
if (!(Dm->id[n] > 0)) {
if (SDs(n) <= 0.0) {
// Solid phase
morph_w->id(i, j, k) = 1;
@ -688,7 +689,7 @@ void SubPhase::Full() {
for (j = 0; j < Ny; j++) {
for (i = 0; i < Nx; i++) {
n = k * Nx * Ny + j * Nx + i;
if (!(Dm->id[n] > 0)) {
if (SDs(n) <= 0.0) {
// Solid phase
morph_i->id(i, j, k) = 1;
} else if (DelPhi(n) > 1e-4) {
@ -731,7 +732,7 @@ void SubPhase::Full() {
for (i = imin; i < Nx - 1; i++) {
n = k * Nx * Ny + j * Nx + i;
// Compute volume averages
if (Dm->id[n] > 0) {
if (SDs(n) > 0.0) {
// compute density
double nA = Rho_n(n);
double nB = Rho_w(n);

View File

@ -193,6 +193,9 @@ MACRO( FIND_FILES )
# Find the CUDA sources
SET( T_CUDASOURCES "" )
FILE( GLOB T_CUDASOURCES "*.cu" )
# Find the HIP sources
SET( T_HIPSOURCES "" )
FILE( GLOB T_HIPSOURCES "*.hip" )
# Find the C sources
SET( T_CSOURCES "" )
FILE( GLOB T_CSOURCES "*.c" )
@ -212,10 +215,11 @@ MACRO( FIND_FILES )
SET( HEADERS ${HEADERS} ${T_HEADERS} )
SET( CXXSOURCES ${CXXSOURCES} ${T_CXXSOURCES} )
SET( CUDASOURCES ${CUDASOURCES} ${T_CUDASOURCES} )
SET( HIPSOURCES ${HIPSOURCES} ${T_HIPSOURCES} )
SET( CSOURCES ${CSOURCES} ${T_CSOURCES} )
SET( FSOURCES ${FSOURCES} ${T_FSOURCES} )
SET( M4FSOURCES ${M4FSOURCES} ${T_M4FSOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${T_M4FSOURCES} ${CUDASOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${T_M4FSOURCES} ${CUDASOURCES} ${HIPSOURCES} )
ENDMACRO()
@ -227,6 +231,9 @@ MACRO( FIND_FILES_PATH IN_PATH )
# Find the CUDA sources
SET( T_CUDASOURCES "" )
FILE( GLOB T_CUDASOURCES "${IN_PATH}/*.cu" )
# Find the HIP sources
SET( T_HIPSOURCES "" )
FILE( GLOB T_HIPSOURCES "${IN_PATH}/*.hip" )
# Find the C sources
SET( T_CSOURCES "" )
FILE( GLOB T_CSOURCES "${IN_PATH}/*.c" )
@ -246,9 +253,10 @@ MACRO( FIND_FILES_PATH IN_PATH )
SET( HEADERS ${HEADERS} ${T_HEADERS} )
SET( CXXSOURCES ${CXXSOURCES} ${T_CXXSOURCES} )
SET( CUDASOURCES ${CUDASOURCES} ${T_CUDASOURCES} )
SET( HIPSOURCES ${HIPSOURCES} ${T_HIPSOURCES} )
SET( CSOURCES ${CSOURCES} ${T_CSOURCES} )
SET( FSOURCES ${FSOURCES} ${T_FSOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${CUDASOURCES} )
SET( SOURCES ${SOURCES} ${T_CXXSOURCES} ${T_CSOURCES} ${T_FSOURCES} ${CUDASOURCES} ${HIPSOURCES} )
ENDMACRO()

View File

@ -20,10 +20,12 @@
#include "common/ArraySize.h"
#include <array>
#include <cstdint>
#include <functional>
#include <initializer_list>
#include <iostream>
#include <memory>
#include <stdint.h>
#include <string>
#include <vector>

View File

@ -4,11 +4,13 @@
#include "common/Utilities.h"
#include <array>
#include <cstdint>
#include <cmath>
#include <complex>
#include <cstdlib>
#include <cstring>
#include <initializer_list>
#include <stdexcept>
#include <vector>
#if defined(__CUDA_ARCH__)

View File

@ -208,72 +208,68 @@ inline void CommunicateSendRecvCounts(
}
//***************************************************************************************
inline void CommunicateRecvLists(
const Utilities::MPI &comm, int sendtag, int recvtag, int *sendList_x,
int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y,
int *sendList_Z, int *sendList_xy, int *sendList_XY, int *sendList_xY,
int *sendList_Xy, int *sendList_xz, int *sendList_XZ, int *sendList_xZ,
int *sendList_Xz, int *sendList_yz, int *sendList_YZ, int *sendList_yZ,
int *sendList_Yz, int sendCount_x, int sendCount_y, int sendCount_z,
int sendCount_X, int sendCount_Y, int sendCount_Z, int sendCount_xy,
int sendCount_XY, int sendCount_xY, int sendCount_Xy, int sendCount_xz,
int sendCount_XZ, int sendCount_xZ, int sendCount_Xz, int sendCount_yz,
int sendCount_YZ, int sendCount_yZ, int sendCount_Yz, int *recvList_x,
int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y,
int *recvList_Z, int *recvList_xy, int *recvList_XY, int *recvList_xY,
int *recvList_Xy, int *recvList_xz, int *recvList_XZ, int *recvList_xZ,
int *recvList_Xz, int *recvList_yz, int *recvList_YZ, int *recvList_yZ,
int *recvList_Yz, int recvCount_x, int recvCount_y, int recvCount_z,
int recvCount_X, int recvCount_Y, int recvCount_Z, int recvCount_xy,
int recvCount_XY, int recvCount_xY, int recvCount_Xy, int recvCount_xz,
int recvCount_XZ, int recvCount_xZ, int recvCount_Xz, int recvCount_yz,
int recvCount_YZ, int recvCount_yZ, int recvCount_Yz, int rank_x,
int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy,
int rank_XY, int rank_xY, int rank_Xy, int rank_xz, int rank_XZ,
int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ,
int rank_Yz) {
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(sendList_x, sendCount_x, rank_x, sendtag);
req2[0] = comm.Irecv(recvList_X, recvCount_X, rank_X, recvtag);
req1[1] = comm.Isend(sendList_X, sendCount_X, rank_X, sendtag);
req2[1] = comm.Irecv(recvList_x, recvCount_x, rank_x, recvtag);
req1[2] = comm.Isend(sendList_y, sendCount_y, rank_y, sendtag);
req2[2] = comm.Irecv(recvList_Y, recvCount_Y, rank_Y, recvtag);
req1[3] = comm.Isend(sendList_Y, sendCount_Y, rank_Y, sendtag);
req2[3] = comm.Irecv(recvList_y, recvCount_y, rank_y, recvtag);
req1[4] = comm.Isend(sendList_z, sendCount_z, rank_z, sendtag);
req2[4] = comm.Irecv(recvList_Z, recvCount_Z, rank_Z, recvtag);
req1[5] = comm.Isend(sendList_Z, sendCount_Z, rank_Z, sendtag);
req2[5] = comm.Irecv(recvList_z, recvCount_z, rank_z, recvtag);
inline void CommunicateRecvLists( const Utilities::MPI& comm, int sendtag, int recvtag,
int *sendList_x, int *sendList_y, int *sendList_z, int *sendList_X, int *sendList_Y, int *sendList_Z,
int *sendList_xy, int *sendList_XY, int *sendList_xY, int *sendList_Xy,
int *sendList_xz, int *sendList_XZ, int *sendList_xZ, int *sendList_Xz,
int *sendList_yz, int *sendList_YZ, int *sendList_yZ, int *sendList_Yz,
int sendCount_x, int sendCount_y, int sendCount_z, int sendCount_X, int sendCount_Y, int sendCount_Z,
int sendCount_xy, int sendCount_XY, int sendCount_xY, int sendCount_Xy,
int sendCount_xz, int sendCount_XZ, int sendCount_xZ, int sendCount_Xz,
int sendCount_yz, int sendCount_YZ, int sendCount_yZ, int sendCount_Yz,
int *recvList_x, int *recvList_y, int *recvList_z, int *recvList_X, int *recvList_Y, int *recvList_Z,
int *recvList_xy, int *recvList_XY, int *recvList_xY, int *recvList_Xy,
int *recvList_xz, int *recvList_XZ, int *recvList_xZ, int *recvList_Xz,
int *recvList_yz, int *recvList_YZ, int *recvList_yZ, int *recvList_Yz,
int recvCount_x, int recvCount_y, int recvCount_z, int recvCount_X, int recvCount_Y, int recvCount_Z,
int recvCount_xy, int recvCount_XY, int recvCount_xY, int recvCount_Xy,
int recvCount_xz, int recvCount_XZ, int recvCount_xZ, int recvCount_Xz,
int recvCount_yz, int recvCount_YZ, int recvCount_yZ, int recvCount_Yz,
int rank_x, int rank_y, int rank_z, int rank_X, int rank_Y, int rank_Z, int rank_xy, int rank_XY, int rank_xY,
int rank_Xy, int rank_xz, int rank_XZ, int rank_xZ, int rank_Xz, int rank_yz, int rank_YZ, int rank_yZ, int rank_Yz)
{
MPI_Request req1[18], req2[18];
req1[0] = comm.Isend(sendList_x,sendCount_x,rank_x,sendtag+0);
req2[0] = comm.Irecv(recvList_X,recvCount_X,rank_X,recvtag+0);
req1[1] = comm.Isend(sendList_X,sendCount_X,rank_X,sendtag+1);
req2[1] = comm.Irecv(recvList_x,recvCount_x,rank_x,recvtag+1);
req1[2] = comm.Isend(sendList_y,sendCount_y,rank_y,sendtag+2);
req2[2] = comm.Irecv(recvList_Y,recvCount_Y,rank_Y,recvtag+2);
req1[3] = comm.Isend(sendList_Y,sendCount_Y,rank_Y,sendtag+3);
req2[3] = comm.Irecv(recvList_y,recvCount_y,rank_y,recvtag+3);
req1[4] = comm.Isend(sendList_z,sendCount_z,rank_z,sendtag+4);
req2[4] = comm.Irecv(recvList_Z,recvCount_Z,rank_Z,recvtag+4);
req1[5] = comm.Isend(sendList_Z,sendCount_Z,rank_Z,sendtag+5);
req2[5] = comm.Irecv(recvList_z,recvCount_z,rank_z,recvtag+5);
req1[6] = comm.Isend(sendList_xy, sendCount_xy, rank_xy, sendtag);
req2[6] = comm.Irecv(recvList_XY, recvCount_XY, rank_XY, recvtag);
req1[7] = comm.Isend(sendList_XY, sendCount_XY, rank_XY, sendtag);
req2[7] = comm.Irecv(recvList_xy, recvCount_xy, rank_xy, recvtag);
req1[8] = comm.Isend(sendList_Xy, sendCount_Xy, rank_Xy, sendtag);
req2[8] = comm.Irecv(recvList_xY, recvCount_xY, rank_xY, recvtag);
req1[9] = comm.Isend(sendList_xY, sendCount_xY, rank_xY, sendtag);
req2[9] = comm.Irecv(recvList_Xy, recvCount_Xy, rank_Xy, recvtag);
req1[6] = comm.Isend(sendList_xy,sendCount_xy,rank_xy,sendtag+6);
req2[6] = comm.Irecv(recvList_XY,recvCount_XY,rank_XY,recvtag+6);
req1[7] = comm.Isend(sendList_XY,sendCount_XY,rank_XY,sendtag+7);
req2[7] = comm.Irecv(recvList_xy,recvCount_xy,rank_xy,recvtag+7);
req1[8] = comm.Isend(sendList_Xy,sendCount_Xy,rank_Xy,sendtag+8);
req2[8] = comm.Irecv(recvList_xY,recvCount_xY,rank_xY,recvtag+8);
req1[9] = comm.Isend(sendList_xY,sendCount_xY,rank_xY,sendtag+9);
req2[9] = comm.Irecv(recvList_Xy,recvCount_Xy,rank_Xy,recvtag+9);
req1[10] = comm.Isend(sendList_xz, sendCount_xz, rank_xz, sendtag);
req2[10] = comm.Irecv(recvList_XZ, recvCount_XZ, rank_XZ, recvtag);
req1[11] = comm.Isend(sendList_XZ, sendCount_XZ, rank_XZ, sendtag);
req2[11] = comm.Irecv(recvList_xz, recvCount_xz, rank_xz, recvtag);
req1[12] = comm.Isend(sendList_Xz, sendCount_Xz, rank_Xz, sendtag);
req2[12] = comm.Irecv(recvList_xZ, recvCount_xZ, rank_xZ, recvtag);
req1[13] = comm.Isend(sendList_xZ, sendCount_xZ, rank_xZ, sendtag);
req2[13] = comm.Irecv(recvList_Xz, recvCount_Xz, rank_Xz, recvtag);
req1[10] = comm.Isend(sendList_xz,sendCount_xz,rank_xz,sendtag+10);
req2[10] = comm.Irecv(recvList_XZ,recvCount_XZ,rank_XZ,recvtag+10);
req1[11] = comm.Isend(sendList_XZ,sendCount_XZ,rank_XZ,sendtag+11);
req2[11] = comm.Irecv(recvList_xz,recvCount_xz,rank_xz,recvtag+11);
req1[12] = comm.Isend(sendList_Xz,sendCount_Xz,rank_Xz,sendtag+12);
req2[12] = comm.Irecv(recvList_xZ,recvCount_xZ,rank_xZ,recvtag+12);
req1[13] = comm.Isend(sendList_xZ,sendCount_xZ,rank_xZ,sendtag+13);
req2[13] = comm.Irecv(recvList_Xz,recvCount_Xz,rank_Xz,recvtag+13);
req1[14] = comm.Isend(sendList_yz, sendCount_yz, rank_yz, sendtag);
req2[14] = comm.Irecv(recvList_YZ, recvCount_YZ, rank_YZ, recvtag);
req1[15] = comm.Isend(sendList_YZ, sendCount_YZ, rank_YZ, sendtag);
req2[15] = comm.Irecv(recvList_yz, recvCount_yz, rank_yz, recvtag);
req1[16] = comm.Isend(sendList_Yz, sendCount_Yz, rank_Yz, sendtag);
req2[16] = comm.Irecv(recvList_yZ, recvCount_yZ, rank_yZ, recvtag);
req1[17] = comm.Isend(sendList_yZ, sendCount_yZ, rank_yZ, sendtag);
req2[17] = comm.Irecv(recvList_Yz, recvCount_Yz, rank_Yz, recvtag);
comm.waitAll(18, req1);
comm.waitAll(18, req2);
req1[14] = comm.Isend(sendList_yz,sendCount_yz,rank_yz,sendtag+14);
req2[14] = comm.Irecv(recvList_YZ,recvCount_YZ,rank_YZ,recvtag+14);
req1[15] = comm.Isend(sendList_YZ,sendCount_YZ,rank_YZ,sendtag+15);
req2[15] = comm.Irecv(recvList_yz,recvCount_yz,rank_yz,recvtag+15);
req1[16] = comm.Isend(sendList_Yz,sendCount_Yz,rank_Yz,sendtag+16);
req2[16] = comm.Irecv(recvList_yZ,recvCount_yZ,rank_yZ,recvtag+16);
req1[17] = comm.Isend(sendList_yZ,sendCount_yZ,rank_yZ,sendtag+17);
req2[17] = comm.Irecv(recvList_Yz,recvCount_Yz,rank_Yz,recvtag+17);
comm.waitAll( 18, req1 );
comm.waitAll( 18, req2 );
}
//***************************************************************************************

View File

@ -40,6 +40,214 @@ static inline void fgetl(char *str, int num, FILE *stream) {
}
}
void Domain::read_swc(const std::string &Filename) {
//...... READ IN SWC FILE...................................
int count = 0;
int number_of_lines = 0;
if (rank() == 0){
cout << "Reading SWC file..." << endl;
{
std::string line;
std::ifstream myfile(Filename);
while (std::getline(myfile, line))
++number_of_lines;
number_of_lines -= 1;
}
std::cout << " Number of lines in SWC file: " << number_of_lines << endl;
}
count = Comm.sumReduce(number_of_lines); // nonzero only for rank=0
number_of_lines = count;
// set up structures to read
double *List_cx = new double [number_of_lines];
double *List_cy = new double [number_of_lines];
double *List_cz = new double [number_of_lines];
double *List_rad = new double [number_of_lines];
int *List_index = new int [number_of_lines];
int *List_parent = new int [number_of_lines];
int *List_type = new int [number_of_lines];
if (rank()==0){
FILE *fid = fopen(Filename.c_str(), "rb");
INSIST(fid != NULL, "Error opening SWC file");
//.........Trash the header lines (x 1)..........
char line[100];
fgetl(line, 100, fid);
//........read the spheres..................
// We will read until a blank like or end-of-file is reached
count = 0;
while (!feof(fid) && fgets(line, 100, fid) != NULL) {
char *line2 = line;
List_index[count] = int(strtod(line2, &line2));
List_type[count] = int(strtod(line2, &line2));
List_cx[count] = strtod(line2, &line2);
List_cy[count] = strtod(line2, &line2);
List_cz[count] = strtod(line2, &line2);
List_rad[count] = strtod(line2, &line2);
List_parent[count] = int(strtod(line2, &line2));
count++;
}
fclose( fid );
cout << " Number of lines extracted is: " << count << endl;
INSIST(count == number_of_lines, "Problem reading swc file!");
double min_cx = List_cx[0]-List_rad[0];
double min_cy = List_cy[0]-List_rad[0];
double min_cz = List_cz[0]-List_rad[0];
for (count=1; count<number_of_lines; count++){
double value_x = List_cx[count]-List_rad[count];
double value_y = List_cy[count]-List_rad[count];
double value_z = List_cz[count]-List_rad[count];
if (value_x < min_cx) min_cx = value_x;
if (value_y < min_cy) min_cy = value_y;
if (value_z < min_cz) min_cz = value_z;
}
/* shift the swc data */
printf(" shift swc data by %f, %f, %f \n",min_cx,min_cy, min_cz);
for (count=0; count<number_of_lines; count++){
List_cx[count] -= offset_x*voxel_length;
List_cy[count] -= offset_y*voxel_length;
List_cz[count] -= offset_z*voxel_length;
}
}
/* everybody gets the swc file */
Comm.bcast(List_cx,number_of_lines,0);
Comm.bcast(List_cy,number_of_lines,0);
Comm.bcast(List_cz,number_of_lines,0);
Comm.bcast(List_rad,number_of_lines,0);
Comm.bcast(List_index,number_of_lines,0);
Comm.bcast(List_parent,number_of_lines,0);
Comm.bcast(List_type,number_of_lines,0);
/* units of swc file are in micron */
double start_x, start_y, start_z;
/* box owned by this rank */
start_x = rank_info.ix*(Nx-2)*voxel_length;
start_y = rank_info.jy*(Ny-2)*voxel_length;
start_z = rank_info.kz*(Nz-2)*voxel_length;
//finish_x = (rank_info.ix+1)*(Nx-2)*voxel_length;
//finish_y = (rank_info.jy+1)*(Ny-2)*voxel_length;
//finish_z = (rank_info.kz+1)*(Nz-2)*voxel_length;
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
id[k*Nx*Ny + j*Nx + i] = 1;
}
}
}
/* Loop over SWC input and populate domain ID */
for (int idx=0; idx<number_of_lines; idx++){
/* get the object information */
int parent = List_parent[idx]-1;
if (parent < 0) parent = idx;
double xi = List_cx[idx];
double yi = List_cy[idx];
double zi = List_cz[idx];
double xp = List_cx[parent];
double yp = List_cy[parent];
double zp = List_cz[parent];
double ri = List_rad[idx];
double rp = List_rad[parent];
int radius_in_voxels = int(List_rad[idx]/voxel_length);
signed char label = char(List_type[idx]);
double xmin = min(((xi - start_x - List_rad[idx])/voxel_length) ,((xp - start_x - List_rad[parent])/voxel_length) );
double ymin = min(((yi - start_y - List_rad[idx])/voxel_length) ,((yp - start_y - List_rad[parent])/voxel_length) );
double zmin = min(((zi - start_z - List_rad[idx])/voxel_length) ,((zp - start_z - List_rad[parent])/voxel_length) );
double xmax = max(((xi - start_x + List_rad[idx])/voxel_length) ,((xp - start_x + List_rad[parent])/voxel_length) );
double ymax = max(((yi - start_y + List_rad[idx])/voxel_length) ,((yp - start_y + List_rad[parent])/voxel_length) );
double zmax = max(((zi - start_z + List_rad[idx])/voxel_length) ,((zp - start_z + List_rad[parent])/voxel_length) );
/* if (rank()==1){
printf("%i %f %f %f %f\n",label,xi,yi,zi,ri);
printf("parent %i %f %f %f %f\n",parent,xp,yp,zp,rp);
}
*/
double length = sqrt((xi-xp)*(xi-xp) + (yi-yp)*(yi-yp) + (zi-zp)*(zi-zp) );
if (length == 0.0) length = 1.0;
double alpha = (xi - xp)/length;
double beta = (yi - yp)/length;
double gamma = (zi - zp)/length;
int start_idx = int(xmin);
int start_idy = int(ymin);
int start_idz = int(zmin);
int finish_idx = int(xmax);
int finish_idy = int(ymax);
int finish_idz = int(zmax);
/* get the little box to loop over
int start_idx = int((List_cx[idx] - List_rad[idx] - start_x)/voxel_length) + 1;
int start_idy = int((List_cy[idx] - List_rad[idx] - start_y)/voxel_length) + 1;
int start_idz = int((List_cz[idx] - List_rad[idx] - start_z)/voxel_length) + 1;
int finish_idx = int((List_cx[idx] + List_rad[idx] - start_x)/voxel_length) + 1;
int finish_idy = int((List_cy[idx] + List_rad[idx] - start_y)/voxel_length) + 1;
int finish_idz = int((List_cz[idx] + List_rad[idx] - start_z)/voxel_length) + 1;
*/
if (start_idx < 0 ) start_idx = 0;
if (start_idy < 0 ) start_idy = 0;
if (start_idz < 0 ) start_idz = 0;
if (start_idx > Nx-1 ) start_idx = Nx;
if (start_idy > Ny-1 ) start_idy = Ny;
if (start_idz > Nz-1 ) start_idz = Nz;
if (finish_idx < 0 ) finish_idx = 0;
if (finish_idy < 0 ) finish_idy = 0;
if (finish_idz < 0 ) finish_idz = 0;
if (finish_idx > Nx-1 ) finish_idx = Nx;
if (finish_idy > Ny-1 ) finish_idy = Ny;
if (finish_idz > Nz-1 ) finish_idz = Nz;
/* if (rank()==1) printf(" alpha = %f, beta = %f, gamma= %f\n",alpha, beta,gamma);
if (rank()==1) printf(" xi = %f, yi = %f, zi= %f, ri = %f \n",xi, yi, zi, ri);
if (rank()==1) printf(" xp = %f, yp = %f, zp= %f, rp = %f \n",xp, yp, zp, rp);
if (rank()==1) printf( "start: %i, %i, %i \n",start_idx,start_idy,start_idz);
if (rank()==1) printf( "finish: %i, %i, %i \n",finish_idx,finish_idy,finish_idz);
*/
for (int k = start_idz; k<finish_idz; k++){
for (int j = start_idy; j<finish_idy; j++){
for (int i = start_idx; i<finish_idx; i++){
double x = i*voxel_length + start_x;
double y = j*voxel_length + start_y;
double z = k*voxel_length + start_z;
double distance;
double s = ((x-xp)*alpha+(y-yp)*beta+(z-zp)*gamma) / (alpha*alpha + beta*beta + gamma*gamma);
if (s > length){
distance = ri - sqrt((x-xi)*(x-xi) + (y-yi)*(y-yi) + (z-zi)*(z-zi));
}
else if (s < 0.0){
distance = rp - sqrt((x-xp)*(x-xp) + (y-yp)*(y-yp) + (z-zp)*(z-zp));
}
else {
// linear variation for radius
double radius = rp + (ri - rp)*s/length;
distance = radius - sqrt((x-xp-alpha*s)*(x-xp-alpha*s) + (y-yp-beta*s)*(y-yp-beta*s) + (z-zp-gamma*s)*(z-zp-gamma*s));
}
if ( distance > 0.0 ){
/* label the voxel */
//id[k*Nx*Ny + j*Nx + i] = label;
id[k*Nx*Ny + j*Nx + i] = 2;
}
}
}
}
//if (rank()==0) printf( "next line..\n");
}
delete[] List_cx;
delete[] List_cy;
delete[] List_cz;
delete[] List_rad;
delete[] List_index;
delete[] List_type;
delete[] List_parent;
}
/********************************************************
* Constructors *
********************************************************/
@ -101,6 +309,7 @@ void Domain::initialize(std::shared_ptr<Database> db) {
int nx = n[0];
int ny = n[1];
int nz = n[2];
offset_x = offset_y = offset_z = 0;
if (d_db->keyExists("InletLayers")) {
auto InletCount = d_db->getVector<int>("InletLayers");
@ -302,6 +511,9 @@ void Domain::Decomp(const std::string &Filename) {
xStart = offset[0];
yStart = offset[1];
zStart = offset[2];
offset_x = xStart;
offset_y = yStart;
offset_z = zStart;
}
if (database->keyExists("InletLayers")) {
auto InletCount = database->getVector<int>("InletLayers");
@ -333,380 +545,391 @@ void Domain::Decomp(const std::string &Filename) {
if (ReadType == "8bit") {
} else if (ReadType == "16bit") {
} else if (ReadType == "swc") {
} else {
//printf("INPUT ERROR: Valid ReadType are 8bit, 16bit \n");
ReadType = "8bit";
}
nx = size[0];
ny = size[1];
nz = size[2];
nprocx = nproc[0];
nprocy = nproc[1];
nprocz = nproc[2];
global_Nx = SIZE[0];
global_Ny = SIZE[1];
global_Nz = SIZE[2];
nprocs = nprocx * nprocy * nprocz;
char *SegData = NULL;
if (RANK == 0) {
printf("Input media: %s\n", Filename.c_str());
printf("Relabeling %lu values\n", ReadValues.size());
for (size_t idx = 0; idx < ReadValues.size(); idx++) {
int oldvalue = ReadValues[idx];
int newvalue = WriteValues[idx];
printf("oldvalue=%d, newvalue =%d \n", oldvalue, newvalue);
}
// Rank=0 reads the entire segmented data and distributes to worker processes
printf("Dimensions of segmented image: %ld x %ld x %ld \n", global_Nx,
global_Ny, global_Nz);
int64_t SIZE = global_Nx * global_Ny * global_Nz;
SegData = new char[SIZE];
if (ReadType == "8bit") {
printf("Reading 8-bit input data \n");
FILE *SEGDAT = fopen(Filename.c_str(), "rb");
if (SEGDAT == NULL)
ERROR("Domain.cpp: Error reading segmented data");
size_t ReadSeg;
ReadSeg = fread(SegData, 1, SIZE, SEGDAT);
if (ReadSeg != size_t(SIZE))
printf("Domain.cpp: Error reading segmented data \n");
fclose(SEGDAT);
} else if (ReadType == "16bit") {
printf("Reading 16-bit input data \n");
short int *InputData;
InputData = new short int[SIZE];
FILE *SEGDAT = fopen(Filename.c_str(), "rb");
if (SEGDAT == NULL)
ERROR("Domain.cpp: Error reading segmented data");
size_t ReadSeg;
ReadSeg = fread(InputData, 2, SIZE, SEGDAT);
if (ReadSeg != size_t(SIZE))
printf("Domain.cpp: Error reading segmented data \n");
fclose(SEGDAT);
for (int n = 0; n < SIZE; n++) {
SegData[n] = char(InputData[n]);
}
}
printf("Read segmented data from %s \n", Filename.c_str());
// relabel the data
std::vector<long int> LabelCount(ReadValues.size(), 0);
for (int k = 0; k < global_Nz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
n = k * global_Nx * global_Ny + j * global_Nx + i;
//char locval = loc_id[n];
signed char locval = SegData[n];
for (size_t idx = 0; idx < ReadValues.size(); idx++) {
signed char oldvalue = ReadValues[idx];
signed char newvalue = WriteValues[idx];
if (locval == oldvalue) {
SegData[n] = newvalue;
LabelCount[idx]++;
idx = ReadValues.size();
}
}
}
}
}
for (size_t idx = 0; idx < ReadValues.size(); idx++) {
long int label = ReadValues[idx];
long int count = LabelCount[idx];
printf("Label=%ld, Count=%ld \n", label, count);
}
if (USE_CHECKER) {
if (inlet_layers_x > 0) {
// use checkerboard pattern
printf("Checkerboard pattern at x inlet for %i layers \n",
inlet_layers_x);
for (int k = 0; k < global_Nz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = xStart; i < xStart + inlet_layers_x; i++) {
if ((j / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (inlet_layers_y > 0) {
printf("Checkerboard pattern at y inlet for %i layers \n",
inlet_layers_y);
// use checkerboard pattern
for (int k = 0; k < global_Nz; k++) {
for (int j = yStart; j < yStart + inlet_layers_y; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (inlet_layers_z > 0) {
printf("Checkerboard pattern at z inlet for %i layers, "
"saturated with phase label=%i \n",
inlet_layers_z, inlet_layers_phase);
// use checkerboard pattern
for (int k = zStart; k < zStart + inlet_layers_z; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + j / checkerSize) % 2 == 0) {
// void checkers
//SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2;
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = inlet_layers_phase;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (outlet_layers_x > 0) {
// use checkerboard pattern
printf("Checkerboard pattern at x outlet for %i layers \n",
outlet_layers_x);
for (int k = 0; k < global_Nz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = xStart + nx * nprocx - outlet_layers_x;
i < xStart + nx * nprocx; i++) {
if ((j / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (outlet_layers_y > 0) {
printf("Checkerboard pattern at y outlet for %i layers \n",
outlet_layers_y);
// use checkerboard pattern
for (int k = 0; k < global_Nz; k++) {
for (int j = yStart + ny * nprocy - outlet_layers_y;
j < yStart + ny * nprocy; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (outlet_layers_z > 0) {
printf("Checkerboard pattern at z outlet for %i layers, "
"saturated with phase label=%i \n",
outlet_layers_z, outlet_layers_phase);
// use checkerboard pattern
for (int k = zStart + nz * nprocz - outlet_layers_z;
k < zStart + nz * nprocz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + j / checkerSize) % 2 == 0) {
// void checkers
//SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2;
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] =
outlet_layers_phase;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
} else {
if (inlet_layers_z > 0) {
printf("Mixed reflection pattern at z inlet for %i layers, "
"saturated with phase label=%i \n",
inlet_layers_z, inlet_layers_phase);
for (int k = zStart; k < zStart + inlet_layers_z; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
signed char local_id =
SegData[k * global_Nx * global_Ny +
j * global_Nx + i];
signed char reflection_id =
SegData[(zStart + nz * nprocz - 1) * global_Nx *
global_Ny +
j * global_Nx + i];
if (local_id < 1 && reflection_id > 0) {
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = reflection_id;
}
}
}
}
}
if (outlet_layers_z > 0) {
printf("Mixed reflection pattern at z outlet for %i layers, "
"saturated with phase label=%i \n",
outlet_layers_z, outlet_layers_phase);
for (int k = zStart + nz * nprocz - outlet_layers_z;
k < zStart + nz * nprocz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
signed char local_id =
SegData[k * global_Nx * global_Ny +
j * global_Nx + i];
signed char reflection_id =
SegData[zStart * global_Nx * global_Ny +
j * global_Nx + i];
if (local_id < 1 && reflection_id > 0) {
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = reflection_id;
}
}
}
}
}
}
/* swc format for neurons */
if (ReadType == "swc") {
read_swc(Filename);
}
else {
nx = size[0];
ny = size[1];
nz = size[2];
nprocx = nproc[0];
nprocy = nproc[1];
nprocz = nproc[2];
global_Nx = SIZE[0];
global_Ny = SIZE[1];
global_Nz = SIZE[2];
nprocs = nprocx * nprocy * nprocz;
char *SegData = NULL;
// Get the rank info
int64_t N = (nx + 2) * (ny + 2) * (nz + 2);
if (RANK == 0) {
printf("Input media: %s\n", Filename.c_str());
printf("Relabeling %lu values\n", ReadValues.size());
for (size_t idx = 0; idx < ReadValues.size(); idx++) {
int oldvalue = ReadValues[idx];
int newvalue = WriteValues[idx];
printf("oldvalue=%d, newvalue =%d \n", oldvalue, newvalue);
}
// number of sites to use for periodic boundary condition transition zone
int64_t z_transition_size = (nprocz * nz - (global_Nz - zStart)) / 2;
if (z_transition_size < 0)
z_transition_size = 0;
// Rank=0 reads the entire segmented data and distributes to worker processes
printf("Dimensions of segmented image: %ld x %ld x %ld \n", global_Nx,
global_Ny, global_Nz);
int64_t SIZE = global_Nx * global_Ny * global_Nz;
SegData = new char[SIZE];
if (ReadType == "8bit") {
printf("Reading 8-bit input data \n");
FILE *SEGDAT = fopen(Filename.c_str(), "rb");
if (SEGDAT == NULL)
ERROR("Domain.cpp: Error reading segmented data");
size_t ReadSeg;
ReadSeg = fread(SegData, 1, SIZE, SEGDAT);
if (ReadSeg != size_t(SIZE))
printf("Domain.cpp: Error reading segmented data \n");
fclose(SEGDAT);
} else if (ReadType == "16bit") {
printf("Reading 16-bit input data \n");
short int *InputData;
InputData = new short int[SIZE];
FILE *SEGDAT = fopen(Filename.c_str(), "rb");
if (SEGDAT == NULL)
ERROR("Domain.cpp: Error reading segmented data");
size_t ReadSeg;
ReadSeg = fread(InputData, 2, SIZE, SEGDAT);
if (ReadSeg != size_t(SIZE))
printf("Domain.cpp: Error reading segmented data \n");
fclose(SEGDAT);
for (int n = 0; n < SIZE; n++) {
SegData[n] = char(InputData[n]);
}
}
else if (ReadType == "SWC"){
// Set up the sub-domains
if (RANK == 0) {
printf("Distributing subdomains across %i processors \n", nprocs);
printf("Process grid: %i x %i x %i \n", nprocx, nprocy, nprocz);
printf("Subdomain size: %i x %i x %i \n", nx, ny, nz);
printf("Size of transition region: %ld \n", z_transition_size);
auto loc_id = new char[(nx + 2) * (ny + 2) * (nz + 2)];
for (int kp = 0; kp < nprocz; kp++) {
for (int jp = 0; jp < nprocy; jp++) {
for (int ip = 0; ip < nprocx; ip++) {
// rank of the process that gets this subdomain
int rnk = kp * nprocx * nprocy + jp * nprocx + ip;
// Pack and send the subdomain for rnk
for (k = 0; k < nz + 2; k++) {
for (j = 0; j < ny + 2; j++) {
for (i = 0; i < nx + 2; i++) {
int64_t x = xStart + ip * nx + i - 1;
int64_t y = yStart + jp * ny + j - 1;
// int64_t z = zStart + kp*nz + k-1;
int64_t z = zStart + kp * nz + k - 1 -
z_transition_size;
if (x < xStart)
x = xStart;
if (!(x < global_Nx))
x = global_Nx - 1;
if (y < yStart)
y = yStart;
if (!(y < global_Ny))
y = global_Ny - 1;
if (z < zStart)
z = zStart;
if (!(z < global_Nz))
z = global_Nz - 1;
int64_t nlocal =
k * (nx + 2) * (ny + 2) + j * (nx + 2) + i;
int64_t nglobal = z * global_Nx * global_Ny +
y * global_Nx + x;
loc_id[nlocal] = SegData[nglobal];
}
}
}
if (rnk == 0) {
for (k = 0; k < nz + 2; k++) {
for (j = 0; j < ny + 2; j++) {
for (i = 0; i < nx + 2; i++) {
int nlocal = k * (nx + 2) * (ny + 2) +
j * (nx + 2) + i;
id[nlocal] = loc_id[nlocal];
}
}
}
} else {
//printf("Sending data to process %i \n", rnk);
Comm.send(loc_id, N, rnk, 15);
}
// Write the data for this rank data
char LocalRankFilename[40];
sprintf(LocalRankFilename, "ID.%05i", rnk + rank_offset);
FILE *ID = fopen(LocalRankFilename, "wb");
fwrite(loc_id, 1, (nx + 2) * (ny + 2) * (nz + 2), ID);
fclose(ID);
}
}
}
delete[] loc_id;
} else {
// Recieve the subdomain from rank = 0
//printf("Ready to recieve data %i at process %i \n", N,rank);
Comm.recv(id.data(), N, 0, 15);
}
printf("Read segmented data from %s \n", Filename.c_str());
// relabel the data
std::vector<long int> LabelCount(ReadValues.size(), 0);
for (int k = 0; k < global_Nz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
n = k * global_Nx * global_Ny + j * global_Nx + i;
//char locval = loc_id[n];
signed char locval = SegData[n];
for (size_t idx = 0; idx < ReadValues.size(); idx++) {
signed char oldvalue = ReadValues[idx];
signed char newvalue = WriteValues[idx];
if (locval == oldvalue) {
SegData[n] = newvalue;
LabelCount[idx]++;
idx = ReadValues.size();
}
}
}
}
}
for (size_t idx = 0; idx < ReadValues.size(); idx++) {
long int label = ReadValues[idx];
long int count = LabelCount[idx];
printf("Label=%ld, Count=%ld \n", label, count);
}
if (USE_CHECKER) {
if (inlet_layers_x > 0) {
// use checkerboard pattern
printf("Checkerboard pattern at x inlet for %i layers \n",
inlet_layers_x);
for (int k = 0; k < global_Nz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = xStart; i < xStart + inlet_layers_x; i++) {
if ((j / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (inlet_layers_y > 0) {
printf("Checkerboard pattern at y inlet for %i layers \n",
inlet_layers_y);
// use checkerboard pattern
for (int k = 0; k < global_Nz; k++) {
for (int j = yStart; j < yStart + inlet_layers_y; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (inlet_layers_z > 0) {
printf("Checkerboard pattern at z inlet for %i layers, "
"saturated with phase label=%i \n",
inlet_layers_z, inlet_layers_phase);
// use checkerboard pattern
for (int k = zStart; k < zStart + inlet_layers_z; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + j / checkerSize) % 2 == 0) {
// void checkers
//SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2;
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = inlet_layers_phase;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (outlet_layers_x > 0) {
// use checkerboard pattern
printf("Checkerboard pattern at x outlet for %i layers \n",
outlet_layers_x);
for (int k = 0; k < global_Nz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = xStart + nx * nprocx - outlet_layers_x;
i < xStart + nx * nprocx; i++) {
if ((j / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (outlet_layers_y > 0) {
printf("Checkerboard pattern at y outlet for %i layers \n",
outlet_layers_y);
// use checkerboard pattern
for (int k = 0; k < global_Nz; k++) {
for (int j = yStart + ny * nprocy - outlet_layers_y;
j < yStart + ny * nprocy; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + k / checkerSize) % 2 == 0) {
// void checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 2;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
if (outlet_layers_z > 0) {
printf("Checkerboard pattern at z outlet for %i layers, "
"saturated with phase label=%i \n",
outlet_layers_z, outlet_layers_phase);
// use checkerboard pattern
for (int k = zStart + nz * nprocz - outlet_layers_z;
k < zStart + nz * nprocz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
if ((i / checkerSize + j / checkerSize) % 2 == 0) {
// void checkers
//SegData[k*global_Nx*global_Ny+j*global_Nx+i] = 2;
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] =
outlet_layers_phase;
} else {
// solid checkers
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = 0;
}
}
}
}
}
} else {
if (inlet_layers_z > 0) {
printf("Mixed reflection pattern at z inlet for %i layers, "
"saturated with phase label=%i \n",
inlet_layers_z, inlet_layers_phase);
for (int k = zStart; k < zStart + inlet_layers_z; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
signed char local_id =
SegData[k * global_Nx * global_Ny +
j * global_Nx + i];
signed char reflection_id =
SegData[(zStart + nz * nprocz - 1) * global_Nx *
global_Ny +
j * global_Nx + i];
if (local_id < 1 && reflection_id > 0) {
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = reflection_id;
}
}
}
}
}
if (outlet_layers_z > 0) {
printf("Mixed reflection pattern at z outlet for %i layers, "
"saturated with phase label=%i \n",
outlet_layers_z, outlet_layers_phase);
for (int k = zStart + nz * nprocz - outlet_layers_z;
k < zStart + nz * nprocz; k++) {
for (int j = 0; j < global_Ny; j++) {
for (int i = 0; i < global_Nx; i++) {
signed char local_id =
SegData[k * global_Nx * global_Ny +
j * global_Nx + i];
signed char reflection_id =
SegData[zStart * global_Nx * global_Ny +
j * global_Nx + i];
if (local_id < 1 && reflection_id > 0) {
SegData[k * global_Nx * global_Ny +
j * global_Nx + i] = reflection_id;
}
}
}
}
}
}
}
// Get the rank info
int64_t N = (nx + 2) * (ny + 2) * (nz + 2);
// number of sites to use for periodic boundary condition transition zone
int64_t z_transition_size = (nprocz * nz - (global_Nz - zStart)) / 2;
if (z_transition_size < 0)
z_transition_size = 0;
// Set up the sub-domains
if (RANK == 0) {
printf("Distributing subdomains across %i processors \n", nprocs);
printf("Process grid: %i x %i x %i \n", nprocx, nprocy, nprocz);
printf("Subdomain size: %i x %i x %i \n", nx, ny, nz);
printf("Size of transition region: %ld \n", z_transition_size);
auto loc_id = new char[(nx + 2) * (ny + 2) * (nz + 2)];
for (int kp = 0; kp < nprocz; kp++) {
for (int jp = 0; jp < nprocy; jp++) {
for (int ip = 0; ip < nprocx; ip++) {
// rank of the process that gets this subdomain
int rnk = kp * nprocx * nprocy + jp * nprocx + ip;
// Pack and send the subdomain for rnk
for (k = 0; k < nz + 2; k++) {
for (j = 0; j < ny + 2; j++) {
for (i = 0; i < nx + 2; i++) {
int64_t x = xStart + ip * nx + i - 1;
int64_t y = yStart + jp * ny + j - 1;
// int64_t z = zStart + kp*nz + k-1;
int64_t z = zStart + kp * nz + k - 1 -
z_transition_size;
if (x < xStart)
x = xStart;
if (!(x < global_Nx))
x = global_Nx - 1;
if (y < yStart)
y = yStart;
if (!(y < global_Ny))
y = global_Ny - 1;
if (z < zStart)
z = zStart;
if (!(z < global_Nz))
z = global_Nz - 1;
int64_t nlocal =
k * (nx + 2) * (ny + 2) + j * (nx + 2) + i;
int64_t nglobal = z * global_Nx * global_Ny +
y * global_Nx + x;
loc_id[nlocal] = SegData[nglobal];
}
}
}
if (rnk == 0) {
for (k = 0; k < nz + 2; k++) {
for (j = 0; j < ny + 2; j++) {
for (i = 0; i < nx + 2; i++) {
int nlocal = k * (nx + 2) * (ny + 2) +
j * (nx + 2) + i;
id[nlocal] = loc_id[nlocal];
}
}
}
} else {
//printf("Sending data to process %i \n", rnk);
Comm.send(loc_id, N, rnk, 15);
}
// Write the data for this rank data
char LocalRankFilename[40];
sprintf(LocalRankFilename, "ID.%05i", rnk + rank_offset);
FILE *ID = fopen(LocalRankFilename, "wb");
fwrite(loc_id, 1, (nx + 2) * (ny + 2) * (nz + 2), ID);
fclose(ID);
}
}
}
delete[] loc_id;
} else {
// Recieve the subdomain from rank = 0
//printf("Ready to recieve data %i at process %i \n", N,rank);
Comm.recv(id.data(), N, 0, 15);
}
delete[] SegData;
}
Comm.barrier();
ComputePorosity();
delete[] SegData;
}
void Domain::ComputePorosity() {
// Compute the porosity
double sum;
double sum_local = 0.0;
double iVol_global = 1.0 / (1.0 * (Nx - 2) * (Ny - 2) * (Nz - 2) *
nprocx() * nprocy() * nprocz());
if (BoundaryCondition > 0 && BoundaryCondition != 5)
iVol_global =
1.0 / (1.0 * (Nx - 2) * nprocx() * (Ny - 2) * nprocy() *
((Nz - 2) * nprocz() - inlet_layers_z - outlet_layers_z));
//.........................................................
for (int k = inlet_layers_z + 1; k < Nz - outlet_layers_z - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
int n = k * Nx * Ny + j * Nx + i;
if (id[n] > 0) {
sum_local += 1.0;
}
}
}
}
sum = Comm.sumReduce(sum_local);
porosity = sum * iVol_global;
if (rank() == 0)
printf("Media porosity = %f \n", porosity);
//.........................................................
// Compute the porosity
double sum;
double sum_local = 0.0;
double iVol_global = 1.0 / (1.0 * (Nx - 2) * (Ny - 2) * (Nz - 2) *
nprocx() * nprocy() * nprocz());
if (BoundaryCondition > 0 && BoundaryCondition != 5)
iVol_global =
1.0 / (1.0 * (Nx - 2) * nprocx() * (Ny - 2) * nprocy() *
((Nz - 2) * nprocz() - inlet_layers_z - outlet_layers_z));
//.........................................................
for (int k = inlet_layers_z + 1; k < Nz - outlet_layers_z - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
int n = k * Nx * Ny + j * Nx + i;
if (id[n] > 0) {
sum_local += 1.0;
}
}
}
}
sum = Comm.sumReduce(sum_local);
porosity = sum * iVol_global;
if (rank() == 0)
printf("Media porosity = %f \n", porosity);
//.........................................................
}
void Domain::AggregateLabels(const std::string &filename) {
@ -1543,7 +1766,7 @@ void Domain::ReadFromFile(const std::string &Filename,
} else {
// Recieve the subdomain from rank = 0
//printf("Ready to recieve data %i at process %i \n", N,rank);
Comm.recv(id.data(), N, 0, 15);
Comm.recv(UserData, N, 0, 15);
}
Comm.barrier();
}

View File

@ -134,6 +134,7 @@ public: // Public variables (need to create accessors instead)
int Nx, Ny, Nz, N;
int inlet_layers_x, inlet_layers_y, inlet_layers_z;
int outlet_layers_x, outlet_layers_y, outlet_layers_z;
int offset_x, offset_y, offset_z;
int inlet_layers_phase; //as usual: 1->n, 2->w
int outlet_layers_phase;
double porosity;
@ -202,6 +203,11 @@ public: // Public variables (need to create accessors instead)
* \brief Read domain IDs from file
*/
void ReadIDs();
/**
* \brief Read domain IDs from SWC file
*/
void read_swc(const std::string &Filename);
/**
* \brief Compute the porosity

View File

@ -93,12 +93,11 @@ template<> long double genRand<long double>()
* axpy *
********************************************************/
template <>
void call_axpy<float>(size_t N, const float alpha, const float *x, float *y) {
void call_axpy<float>(size_t, const float, const float*, float*) {
ERROR("Not finished");
}
template <>
void call_axpy<double>(size_t N, const double alpha, const double *x,
double *y) {
void call_axpy<double>(size_t, const double, const double*, double*) {
ERROR("Not finished");
}
@ -106,22 +105,22 @@ void call_axpy<double>(size_t N, const double alpha, const double *x,
* Multiply two arrays *
********************************************************/
template <>
void call_gemv<double>(size_t M, size_t N, double alpha, double beta,
const double *A, const double *x, double *y) {
void call_gemv<double>(size_t, size_t, double, double,
const double*, const double*, double*) {
ERROR("Not finished");
}
template <>
void call_gemv<float>(size_t M, size_t N, float alpha, float beta,
const float *A, const float *x, float *y) {
void call_gemv<float>(size_t, size_t, float, float,
const float*, const float*, float*) {
ERROR("Not finished");
}
template <>
void call_gemm<double>(size_t M, size_t N, size_t K, double alpha, double beta,
const double *A, const double *B, double *C) {
void call_gemm<double>(size_t, size_t, size_t, double, double,
const double*, const double*, double*) {
ERROR("Not finished");
}
template <>
void call_gemm<float>(size_t M, size_t N, size_t K, float alpha, float beta,
const float *A, const float *B, float *C) {
void call_gemm<float>(size_t, size_t, size_t, float, float,
const float*, const float*, float*) {
ERROR("Not finished");
}

View File

@ -297,10 +297,10 @@ TYPE FunctionTable::sum(const Array<TYPE, FUN, ALLOC> &A) {
}
template <class TYPE>
inline void FunctionTable::gemmWrapper(char TRANSA, char TRANSB, int M, int N,
int K, TYPE alpha, const TYPE *A,
int LDA, const TYPE *B, int LDB,
TYPE beta, TYPE *C, int LDC) {
inline void FunctionTable::gemmWrapper(char, char, int, int,
int, TYPE, const TYPE*,
int, const TYPE*, int,
TYPE, TYPE*, int) {
ERROR("Not finished");
}

File diff suppressed because it is too large Load Diff

View File

@ -1115,15 +1115,14 @@ bool MPI_CLASS::anyReduce(const bool value) const {
template <>
void MPI_CLASS::call_sumReduce<unsigned char>(const unsigned char *send,
unsigned char *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<unsigned char>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_CHAR, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x, int n) const {
PROFILE_START("sumReduce2<unsigned char>", profile_level);
auto send = x;
auto recv = new unsigned char[n];
@ -1136,13 +1135,13 @@ void MPI_CLASS::call_sumReduce<unsigned char>(unsigned char *x,
// char
template <>
void MPI_CLASS::call_sumReduce<char>(const char *send, char *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<char>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIGNED_CHAR, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<char>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<char>(char *x, int n) const {
PROFILE_START("sumReduce2<char>", profile_level);
auto send = x;
auto recv = new char[n];
@ -1155,16 +1154,14 @@ template <> void MPI_CLASS::call_sumReduce<char>(char *x, const int n) const {
// unsigned int
template <>
void MPI_CLASS::call_sumReduce<unsigned int>(const unsigned int *send,
unsigned int *recv,
const int n) const {
unsigned int *recv, int n) const {
PROFILE_START("sumReduce1<unsigned int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x, int n) const {
PROFILE_START("sumReduce2<unsigned int>", profile_level);
auto send = x;
auto recv = new unsigned int[n];
@ -1176,14 +1173,13 @@ void MPI_CLASS::call_sumReduce<unsigned int>(unsigned int *x,
}
// int
template <>
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv,
const int n) const {
void MPI_CLASS::call_sumReduce<int>(const int *send, int *recv, int n) const {
PROFILE_START("sumReduce1<int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_INT, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<int>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<int>(int *x, int n) const {
PROFILE_START("sumReduce2<int>", profile_level);
auto send = x;
auto recv = new int[n];
@ -1196,14 +1192,13 @@ template <> void MPI_CLASS::call_sumReduce<int>(int *x, const int n) const {
// long int
template <>
void MPI_CLASS::call_sumReduce<long int>(const long int *send, long int *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<long int>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_LONG, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<long int>(long int *x, int n) const {
PROFILE_START("sumReduce2<long int>", profile_level);
auto send = x;
auto recv = new long int[n];
@ -1217,15 +1212,14 @@ void MPI_CLASS::call_sumReduce<long int>(long int *x, const int n) const {
template <>
void MPI_CLASS::call_sumReduce<unsigned long>(const unsigned long *send,
unsigned long *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<unsigned long>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_UNSIGNED_LONG, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<unsigned long>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
const int n) const {
void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x, int n) const {
PROFILE_START("sumReduce2<unsigned long>", profile_level);
auto send = x;
auto recv = new unsigned long int[n];
@ -1239,15 +1233,14 @@ void MPI_CLASS::call_sumReduce<unsigned long>(unsigned long *x,
#ifdef USE_WINDOWS
template <>
void MPI_CLASS::call_sumReduce<size_t>(const size_t *send, size_t *recv,
const int n) const {
int n) const {
MPI_ASSERT(MPI_SIZE_T != 0);
PROFILE_START("sumReduce1<size_t>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_SIZE_T, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<size_t>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<size_t>(size_t *x, int n) const {
MPI_ASSERT(MPI_SIZE_T != 0);
PROFILE_START("sumReduce2<size_t>", profile_level);
auto send = x;
@ -1263,13 +1256,13 @@ void MPI_CLASS::call_sumReduce<size_t>(size_t *x, const int n) const {
// float
template <>
void MPI_CLASS::call_sumReduce<float>(const float *send, float *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<float>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_FLOAT, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<float>", profile_level);
}
template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<float>(float *x, int n) const {
PROFILE_START("sumReduce2<float>", profile_level);
auto send = x;
auto recv = new float[n];
@ -1282,14 +1275,13 @@ template <> void MPI_CLASS::call_sumReduce<float>(float *x, const int n) const {
// double
template <>
void MPI_CLASS::call_sumReduce<double>(const double *send, double *recv,
const int n) const {
int n) const {
PROFILE_START("sumReduce1<double>", profile_level);
MPI_Allreduce((void *)send, (void *)recv, n, MPI_DOUBLE, MPI_SUM,
communicator);
PROFILE_STOP("sumReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
template <> void MPI_CLASS::call_sumReduce<double>(double *x, int n) const {
PROFILE_START("sumReduce2<double>", profile_level);
auto send = x;
auto recv = new double[n];
@ -1302,7 +1294,7 @@ void MPI_CLASS::call_sumReduce<double>(double *x, const int n) const {
// std::complex<double>
template <>
void MPI_CLASS::call_sumReduce<std::complex<double>>(
const std::complex<double> *x, std::complex<double> *y, const int n) const {
const std::complex<double> *x, std::complex<double> *y, int n) const {
PROFILE_START("sumReduce1<complex double>", profile_level);
auto send = new double[2 * n];
auto recv = new double[2 * n];
@ -1320,7 +1312,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(
}
template <>
void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
const int n) const {
int n) const {
PROFILE_START("sumReduce2<complex double>", profile_level);
auto send = new double[2 * n];
auto recv = new double[2 * n];
@ -1345,7 +1337,7 @@ void MPI_CLASS::call_sumReduce<std::complex<double>>(std::complex<double> *x,
// unsigned char
template <>
void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
unsigned char *recv, const int n,
unsigned char *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned char>", profile_level);
@ -1363,7 +1355,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(const unsigned char *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned char>", profile_level);
@ -1386,7 +1378,7 @@ void MPI_CLASS::call_minReduce<unsigned char>(unsigned char *x, const int n,
}
// char
template <>
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<char>", profile_level);
@ -1404,7 +1396,7 @@ void MPI_CLASS::call_minReduce<char>(const char *send, char *recv, const int n,
}
}
template <>
void MPI_CLASS::call_minReduce<char>(char *x, const int n,
void MPI_CLASS::call_minReduce<char>(char *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<char>", profile_level);
@ -1428,7 +1420,7 @@ void MPI_CLASS::call_minReduce<char>(char *x, const int n,
// unsigned int
template <>
void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
unsigned int *recv, const int n,
unsigned int *recv, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned int>", profile_level);
@ -1446,7 +1438,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(const unsigned int *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned int>", profile_level);
@ -1469,7 +1461,7 @@ void MPI_CLASS::call_minReduce<unsigned int>(unsigned int *x, const int n,
}
// int
template <>
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
void MPI_CLASS::call_minReduce<int>(const int *x, int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1492,7 +1484,7 @@ void MPI_CLASS::call_minReduce<int>(const int *x, int *y, const int n,
PROFILE_STOP("minReduce1<int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<int>(int *x, const int n,
void MPI_CLASS::call_minReduce<int>(int *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1523,7 +1515,7 @@ void MPI_CLASS::call_minReduce<int>(int *x, const int n,
template <>
void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
unsigned long int *recv,
const int n,
int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce1<unsigned long>", profile_level);
@ -1541,8 +1533,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(const unsigned long int *send,
}
}
template <>
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
const int n,
void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x, int n,
int *comm_rank_of_min) const {
if (comm_rank_of_min == nullptr) {
PROFILE_START("minReduce2<unsigned long>", profile_level);
@ -1565,8 +1556,7 @@ void MPI_CLASS::call_minReduce<unsigned long int>(unsigned long int *x,
}
// long int
template <>
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
const int n,
void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1589,7 +1579,7 @@ void MPI_CLASS::call_minReduce<long int>(const long int *x, long int *y,
PROFILE_STOP("minReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
void MPI_CLASS::call_minReduce<long int>(long int *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1619,8 +1609,8 @@ void MPI_CLASS::call_minReduce<long int>(long int *x, const int n,
// unsigned long long int
template <>
void MPI_CLASS::call_minReduce<unsigned long long int>(
const unsigned long long int *send, unsigned long long int *recv,
const int n, int *comm_rank_of_min) const {
const unsigned long long int *send, unsigned long long int *recv, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
auto x = new long long int[n];
@ -1647,7 +1637,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
}
template <>
void MPI_CLASS::call_minReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_min) const {
unsigned long long int *x, int n, int *comm_rank_of_min) const {
auto recv = new unsigned long long int[n];
call_minReduce<unsigned long long int>(x, recv, n, comm_rank_of_min);
for (int i = 0; i < n; i++)
@ -1657,7 +1647,7 @@ void MPI_CLASS::call_minReduce<unsigned long long int>(
// long long int
template <>
void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
long long int *y, const int n,
long long int *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<long int>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1676,7 +1666,7 @@ void MPI_CLASS::call_minReduce<long long int>(const long long int *x,
PROFILE_STOP("minReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
void MPI_CLASS::call_minReduce<long long int>(long long int *x, int n,
int *comm_rank_of_min) const {
auto recv = new long long int[n];
call_minReduce<long long int>(x, recv, n, comm_rank_of_min);
@ -1686,7 +1676,7 @@ void MPI_CLASS::call_minReduce<long long int>(long long int *x, const int n,
}
// float
template <>
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
void MPI_CLASS::call_minReduce<float>(const float *x, float *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<float>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1709,7 +1699,7 @@ void MPI_CLASS::call_minReduce<float>(const float *x, float *y, const int n,
PROFILE_STOP("minReduce1<float>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<float>(float *x, const int n,
void MPI_CLASS::call_minReduce<float>(float *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<float>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1738,7 +1728,7 @@ void MPI_CLASS::call_minReduce<float>(float *x, const int n,
}
// double
template <>
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
void MPI_CLASS::call_minReduce<double>(const double *x, double *y, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce1<double>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1762,7 +1752,7 @@ void MPI_CLASS::call_minReduce<double>(const double *x, double *y, const int n,
PROFILE_STOP("minReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_minReduce<double>(double *x, const int n,
void MPI_CLASS::call_minReduce<double>(double *x, int n,
int *comm_rank_of_min) const {
PROFILE_START("minReduce2<double>", profile_level);
if (comm_rank_of_min == nullptr) {
@ -1799,7 +1789,7 @@ void MPI_CLASS::call_minReduce<double>(double *x, const int n,
// unsigned char
template <>
void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
unsigned char *recv, const int n,
unsigned char *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned char>", profile_level);
@ -1817,7 +1807,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(const unsigned char *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned char>", profile_level);
@ -1840,7 +1830,7 @@ void MPI_CLASS::call_maxReduce<unsigned char>(unsigned char *x, const int n,
}
// char
template <>
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<char>", profile_level);
@ -1858,7 +1848,7 @@ void MPI_CLASS::call_maxReduce<char>(const char *send, char *recv, const int n,
}
}
template <>
void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
void MPI_CLASS::call_maxReduce<char>(char *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<char>", profile_level);
@ -1882,7 +1872,7 @@ void MPI_CLASS::call_maxReduce<char>(char *x, const int n,
// unsigned int
template <>
void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
unsigned int *recv, const int n,
unsigned int *recv, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned int>", profile_level);
@ -1900,7 +1890,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(const unsigned int *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned int>", profile_level);
@ -1923,7 +1913,7 @@ void MPI_CLASS::call_maxReduce<unsigned int>(unsigned int *x, const int n,
}
// int
template <>
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1946,7 +1936,7 @@ void MPI_CLASS::call_maxReduce<int>(const int *x, int *y, const int n,
PROFILE_STOP("maxReduce1<int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
void MPI_CLASS::call_maxReduce<int>(int *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1975,8 +1965,7 @@ void MPI_CLASS::call_maxReduce<int>(int *x, const int n,
}
// long int
template <>
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
const int n,
void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<lond int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -1999,7 +1988,7 @@ void MPI_CLASS::call_maxReduce<long int>(const long int *x, long int *y,
PROFILE_STOP("maxReduce1<lond int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
void MPI_CLASS::call_maxReduce<long int>(long int *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<lond int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2030,7 +2019,7 @@ void MPI_CLASS::call_maxReduce<long int>(long int *x, const int n,
template <>
void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
unsigned long int *recv,
const int n,
int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce1<unsigned long>", profile_level);
@ -2048,8 +2037,7 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(const unsigned long int *send,
}
}
template <>
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
const int n,
void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x, int n,
int *comm_rank_of_max) const {
if (comm_rank_of_max == nullptr) {
PROFILE_START("maxReduce2<unsigned long>", profile_level);
@ -2073,8 +2061,8 @@ void MPI_CLASS::call_maxReduce<unsigned long int>(unsigned long int *x,
// unsigned long long int
template <>
void MPI_CLASS::call_maxReduce<unsigned long long int>(
const unsigned long long int *send, unsigned long long int *recv,
const int n, int *comm_rank_of_max) const {
const unsigned long long int *send, unsigned long long int *recv, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<long int>", profile_level);
if (comm_rank_of_max == nullptr) {
auto x = new long long int[n];
@ -2101,7 +2089,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
}
template <>
void MPI_CLASS::call_maxReduce<unsigned long long int>(
unsigned long long int *x, const int n, int *comm_rank_of_max) const {
unsigned long long int *x, int n, int *comm_rank_of_max) const {
auto recv = new unsigned long long int[n];
call_maxReduce<unsigned long long int>(x, recv, n, comm_rank_of_max);
for (int i = 0; i < n; i++)
@ -2111,7 +2099,7 @@ void MPI_CLASS::call_maxReduce<unsigned long long int>(
// long long int
template <>
void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
long long int *y, const int n,
long long int *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<long int>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2130,7 +2118,7 @@ void MPI_CLASS::call_maxReduce<long long int>(const long long int *x,
PROFILE_STOP("maxReduce1<long int>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
void MPI_CLASS::call_maxReduce<long long int>(long long int *x, int n,
int *comm_rank_of_max) const {
auto recv = new long long int[n];
call_maxReduce<long long int>(x, recv, n, comm_rank_of_max);
@ -2140,7 +2128,7 @@ void MPI_CLASS::call_maxReduce<long long int>(long long int *x, const int n,
}
// float
template <>
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<float>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2164,7 +2152,7 @@ void MPI_CLASS::call_maxReduce<float>(const float *x, float *y, const int n,
PROFILE_STOP("maxReduce1<float>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
void MPI_CLASS::call_maxReduce<float>(float *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<float>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2193,7 +2181,7 @@ void MPI_CLASS::call_maxReduce<float>(float *x, const int n,
}
// double
template <>
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce1<double>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2217,7 +2205,7 @@ void MPI_CLASS::call_maxReduce<double>(const double *x, double *y, const int n,
PROFILE_STOP("maxReduce1<double>", profile_level);
}
template <>
void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
void MPI_CLASS::call_maxReduce<double>(double *x, int n,
int *comm_rank_of_max) const {
PROFILE_START("maxReduce2<double>", profile_level);
if (comm_rank_of_max == nullptr) {
@ -2253,51 +2241,46 @@ void MPI_CLASS::call_maxReduce<double>(double *x, const int n,
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<unsigned char>(unsigned char *x, int n,
int root) const {
PROFILE_START("bcast<unsigned char>", profile_level);
MPI_Bcast(x, n, MPI_UNSIGNED_CHAR, root, communicator);
PROFILE_STOP("bcast<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::call_bcast<char>(char *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<char>(char *x, int n, int root) const {
PROFILE_START("bcast<char>", profile_level);
MPI_Bcast(x, n, MPI_CHAR, root, communicator);
PROFILE_STOP("bcast<char>", profile_level);
}
// int
template <>
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<unsigned int>(unsigned int *x, int n,
int root) const {
PROFILE_START("bcast<unsigned int>", profile_level);
MPI_Bcast(x, n, MPI_UNSIGNED, root, communicator);
PROFILE_STOP("bcast<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::call_bcast<int>(int *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<int>(int *x, int n, int root) const {
PROFILE_START("bcast<int>", profile_level);
MPI_Bcast(x, n, MPI_INT, root, communicator);
PROFILE_STOP("bcast<int>", profile_level);
}
// float
template <>
void MPI_CLASS::call_bcast<float>(float *x, const int n, const int root) const {
template <> void MPI_CLASS::call_bcast<float>(float *x, int n, int root) const {
PROFILE_START("bcast<float>", profile_level);
MPI_Bcast(x, n, MPI_FLOAT, root, communicator);
PROFILE_STOP("bcast<float>", profile_level);
}
// double
template <>
void MPI_CLASS::call_bcast<double>(double *x, const int n,
const int root) const {
void MPI_CLASS::call_bcast<double>(double *x, int n, int root) const {
PROFILE_START("bcast<double>", profile_level);
MPI_Bcast(x, n, MPI_DOUBLE, root, communicator);
PROFILE_STOP("bcast<double>", profile_level);
}
#else
// We need a concrete instantiation of bcast<char>(x,n,root);
template <>
void MPI_CLASS::call_bcast<char>(char *, const int, const int) const {}
template <> void MPI_CLASS::call_bcast<char>(char *, int, int) const {}
#endif
/************************************************************************
@ -2316,8 +2299,8 @@ void MPI_CLASS::barrier() const {
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::send<char>(const char *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<char>(const char *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2329,8 +2312,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length,
}
// int
template <>
void MPI_CLASS::send<int>(const int *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<int>(const int *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2341,8 +2324,8 @@ void MPI_CLASS::send<int>(const int *buf, const int length,
}
// float
template <>
void MPI_CLASS::send<float>(const float *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<float>(const float *buf, int length, int recv_proc_number,
int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2354,8 +2337,8 @@ void MPI_CLASS::send<float>(const float *buf, const int length,
}
// double
template <>
void MPI_CLASS::send<double>(const double *buf, const int length,
const int recv_proc_number, int tag) const {
void MPI_CLASS::send<double>(const double *buf, int length,
int recv_proc_number, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2368,8 +2351,7 @@ void MPI_CLASS::send<double>(const double *buf, const int length,
#else
// We need a concrete instantiation of send for use without MPI
template <>
void MPI_CLASS::send<char>(const char *buf, const int length, const int,
int tag) const {
void MPI_CLASS::send<char>(const char *buf, int length, int, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("send<char>", profile_level);
@ -2391,8 +2373,8 @@ void MPI_CLASS::send<char>(const char *buf, const int length, const int,
#ifdef USE_MPI
// char
template <>
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2404,8 +2386,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length,
}
// int
template <>
MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<int>(const int *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2417,8 +2399,8 @@ MPI_Request MPI_CLASS::Isend<int>(const int *buf, const int length,
}
// float
template <>
MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<float>(const float *buf, int length, int recv_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2430,8 +2412,8 @@ MPI_Request MPI_CLASS::Isend<float>(const float *buf, const int length,
}
// double
template <>
MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
const int recv_proc, const int tag) const {
MPI_Request MPI_CLASS::Isend<double>(const double *buf, int length,
int recv_proc, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2444,8 +2426,8 @@ MPI_Request MPI_CLASS::Isend<double>(const double *buf, const int length,
#else
// We need a concrete instantiation of send for use without mpi
template <>
MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
const int tag) const {
MPI_Request MPI_CLASS::Isend<char>(const char *buf, int length, int,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("Isend<char>", profile_level);
@ -2472,8 +2454,8 @@ MPI_Request MPI_CLASS::Isend<char>(const char *buf, const int length, const int,
/************************************************************************
* Send byte array to another processor. *
************************************************************************/
void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
const int recv_proc_number, int tag) const {
void MPI_CLASS::sendBytes(const void *buf, int number_bytes,
int recv_proc_number, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
send<char>((const char *)buf, number_bytes, recv_proc_number, tag);
@ -2482,7 +2464,7 @@ void MPI_CLASS::sendBytes(const void *buf, const int number_bytes,
/************************************************************************
* Non-blocking send byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
MPI_Request MPI_CLASS::IsendBytes(const void *buf, int number_bytes,
const int recv_proc, const int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
@ -2496,7 +2478,7 @@ MPI_Request MPI_CLASS::IsendBytes(const void *buf, const int number_bytes,
#ifdef USE_MPI
// char
template <>
void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<char>(char *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2518,7 +2500,7 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int send_proc_number,
}
// int
template <>
void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<int>(int *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2540,7 +2522,7 @@ void MPI_CLASS::recv<int>(int *buf, int &length, const int send_proc_number,
}
// float
template <>
void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
void MPI_CLASS::recv<float>(float *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
@ -2562,9 +2544,8 @@ void MPI_CLASS::recv<float>(float *buf, int &length, const int send_proc_number,
}
// double
template <>
void MPI_CLASS::recv<double>(double *buf, int &length,
const int send_proc_number, const bool get_length,
int tag) const {
void MPI_CLASS::recv<double>(double *buf, int &length, int send_proc_number,
const bool get_length, int tag) const {
// Set the tag to 0 if it is < 0
tag = (tag >= 0) ? tag : 0;
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
@ -2586,7 +2567,7 @@ void MPI_CLASS::recv<double>(double *buf, int &length,
#else
// We need a concrete instantiation of recv for use without mpi
template <>
void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
void MPI_CLASS::recv<char>(char *buf, int &length, int, const bool,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
@ -2609,8 +2590,8 @@ void MPI_CLASS::recv<char>(char *buf, int &length, const int, const bool,
#ifdef USE_MPI
// char
template <>
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2622,8 +2603,8 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length,
}
// int
template <>
MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<int>(int *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2635,8 +2616,8 @@ MPI_Request MPI_CLASS::Irecv<int>(int *buf, const int length,
}
// float
template <>
MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<float>(float *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2648,8 +2629,8 @@ MPI_Request MPI_CLASS::Irecv<float>(float *buf, const int length,
}
// double
template <>
MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::Irecv<double>(double *buf, int length, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
MPI_Request request;
@ -2662,8 +2643,7 @@ MPI_Request MPI_CLASS::Irecv<double>(double *buf, const int length,
#else
// We need a concrete instantiation of irecv for use without mpi
template <>
MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
const int tag) const {
MPI_Request MPI_CLASS::Irecv<char>(char *buf, int length, int, int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
PROFILE_START("Irecv<char>", profile_level);
@ -2690,7 +2670,7 @@ MPI_Request MPI_CLASS::Irecv<char>(char *buf, const int length, const int,
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
void MPI_CLASS::recvBytes(void *buf, int &number_bytes, int send_proc,
int tag) const {
recv<char>((char *)buf, number_bytes, send_proc, false, tag);
}
@ -2698,8 +2678,8 @@ void MPI_CLASS::recvBytes(void *buf, int &number_bytes, const int send_proc,
/************************************************************************
* Recieve byte array to another processor. *
************************************************************************/
MPI_Request MPI_CLASS::IrecvBytes(void *buf, const int number_bytes,
const int send_proc, const int tag) const {
MPI_Request MPI_CLASS::IrecvBytes(void *buf, int number_bytes, int send_proc,
int tag) const {
MPI_INSIST(tag <= d_maxTag, "Maximum tag value exceeded");
MPI_INSIST(tag >= 0, "tag must be >= 0");
return Irecv<char>((char *)buf, number_bytes, send_proc, tag);
@ -2913,7 +2893,7 @@ void MPI_CLASS::call_allGather<char>(const char *, int, char *, int *,
************************************************************************/
#ifdef USE_MPI
template <>
void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
void MPI_CLASS::allToAll<unsigned char>(int n, const unsigned char *send,
unsigned char *recv) const {
PROFILE_START("allToAll<unsigned char>", profile_level);
MPI_Alltoall((void *)send, n, MPI_UNSIGNED_CHAR, (void *)recv, n,
@ -2921,15 +2901,14 @@ void MPI_CLASS::allToAll<unsigned char>(const int n, const unsigned char *send,
PROFILE_STOP("allToAll<unsigned char>", profile_level);
}
template <>
void MPI_CLASS::allToAll<char>(const int n, const char *send,
char *recv) const {
void MPI_CLASS::allToAll<char>(int n, const char *send, char *recv) const {
PROFILE_START("allToAll<char>", profile_level);
MPI_Alltoall((void *)send, n, MPI_CHAR, (void *)recv, n, MPI_CHAR,
communicator);
PROFILE_STOP("allToAll<char>", profile_level);
}
template <>
void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
void MPI_CLASS::allToAll<unsigned int>(int n, const unsigned int *send,
unsigned int *recv) const {
PROFILE_START("allToAll<unsigned int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_UNSIGNED, (void *)recv, n, MPI_UNSIGNED,
@ -2937,14 +2916,14 @@ void MPI_CLASS::allToAll<unsigned int>(const int n, const unsigned int *send,
PROFILE_STOP("allToAll<unsigned int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<int>(const int n, const int *send, int *recv) const {
void MPI_CLASS::allToAll<int>(int n, const int *send, int *recv) const {
PROFILE_START("allToAll<int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_INT, (void *)recv, n, MPI_INT,
communicator);
PROFILE_STOP("allToAll<int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<unsigned long int>(const int n,
void MPI_CLASS::allToAll<unsigned long int>(int n,
const unsigned long int *send,
unsigned long int *recv) const {
PROFILE_START("allToAll<unsigned long>", profile_level);
@ -2953,7 +2932,7 @@ void MPI_CLASS::allToAll<unsigned long int>(const int n,
PROFILE_STOP("allToAll<unsigned long>", profile_level);
}
template <>
void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
void MPI_CLASS::allToAll<long int>(int n, const long int *send,
long int *recv) const {
PROFILE_START("allToAll<long int>", profile_level);
MPI_Alltoall((void *)send, n, MPI_LONG, (void *)recv, n, MPI_LONG,
@ -2961,15 +2940,14 @@ void MPI_CLASS::allToAll<long int>(const int n, const long int *send,
PROFILE_STOP("allToAll<long int>", profile_level);
}
template <>
void MPI_CLASS::allToAll<float>(const int n, const float *send,
float *recv) const {
void MPI_CLASS::allToAll<float>(int n, const float *send, float *recv) const {
PROFILE_START("allToAll<float>", profile_level);
MPI_Alltoall((void *)send, n, MPI_FLOAT, (void *)recv, n, MPI_FLOAT,
communicator);
PROFILE_STOP("allToAll<float>", profile_level);
}
template <>
void MPI_CLASS::allToAll<double>(const int n, const double *send,
void MPI_CLASS::allToAll<double>(int n, const double *send,
double *recv) const {
PROFILE_START("allToAll<double>", profile_level);
MPI_Alltoall((void *)send, n, MPI_DOUBLE, (void *)recv, n, MPI_DOUBLE,
@ -3713,4 +3691,28 @@ MPI MPI::loadBalance(double local, std::vector<double> work) {
return split(0, key[getRank()]);
}
/****************************************************************************
* Function Persistent Communication *
****************************************************************************/
template <>
std::shared_ptr<MPI_Request> MPI::Isend_init<double>(const double *buf, int N, int proc, int tag) const
{
std::shared_ptr<MPI_Request> obj( new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); delete req; } );
MPI_Send_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
return obj;
}
template<>
std::shared_ptr<MPI_Request> MPI::Irecv_init<double>(double *buf, int N, int proc, int tag) const
{
std::shared_ptr<MPI_Request> obj( new MPI_Request, []( MPI_Request *req ) { MPI_Request_free( req ); delete req; } );
MPI_Recv_init( buf, N, MPI_DOUBLE, proc, tag, communicator, obj.get() );
return obj;
}
void MPI::Start( MPI_Request &request )
{
MPI_Start( &request );
}
} // namespace Utilities

View File

@ -26,6 +26,7 @@ redistribution is prohibited.
#include <atomic>
#include <complex>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
@ -173,10 +174,9 @@ public: // Member functions
*
*/
static void
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD),
const int method = 1,
balanceProcesses(const MPI &comm = MPI(MPI_COMM_WORLD), int method = 1,
const std::vector<int> &procs = std::vector<int>(),
const int N_min = 1, const int N_max = -1);
int N_min = 1, int N_max = -1);
//! Query the level of thread support
static ThreadSupport queryThreadSupport();
@ -420,7 +420,7 @@ public: // Member functions
* \param x The input/output array for the reduce
* \param n The number of values in the array (must match on all nodes)
*/
template <class type> void sumReduce(type *x, const int n = 1) const;
template <class type> void sumReduce(type *x, int n = 1) const;
/**
* \brief Sum Reduce
@ -432,7 +432,7 @@ public: // Member functions
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void sumReduce(const type *x, type *y, const int n = 1) const;
void sumReduce(const type *x, type *y, int n = 1) const;
/**
* \brief Min Reduce
@ -457,7 +457,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void minReduce(type *x, const int n = 1, int *rank_of_min = nullptr) const;
void minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
/**
* \brief Sum Reduce
@ -475,7 +475,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void minReduce(const type *x, type *y, const int n = 1,
void minReduce(const type *x, type *y, int n = 1,
int *rank_of_min = nullptr) const;
/**
@ -501,7 +501,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void maxReduce(type *x, const int n = 1, int *rank_of_max = nullptr) const;
void maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
/**
* \brief Sum Reduce
@ -519,7 +519,7 @@ public: // Member functions
* minimum value
*/
template <class type>
void maxReduce(const type *x, type *y, const int n = 1,
void maxReduce(const type *x, type *y, int n = 1,
int *rank_of_max = nullptr) const;
/**
@ -530,8 +530,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void sumScan(const type *x, type *y, const int n = 1) const;
template <class type> void sumScan(const type *x, type *y, int n = 1) const;
/**
* \brief Scan Min Reduce
@ -541,8 +540,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void minScan(const type *x, type *y, const int n = 1) const;
template <class type> void minScan(const type *x, type *y, int n = 1) const;
/**
* \brief Scan Max Reduce
@ -552,8 +550,7 @@ public: // Member functions
* \param y The output array for the scan
* \param n The number of values in the array (must match on all nodes)
*/
template <class type>
void maxScan(const type *x, type *y, const int n = 1) const;
template <class type> void maxScan(const type *x, type *y, int n = 1) const;
/**
* \brief Broadcast
@ -561,7 +558,7 @@ public: // Member functions
* \param value The input value for the broadcast.
* \param root The processor performing the broadcast
*/
template <class type> type bcast(const type &value, const int root) const;
template <class type> type bcast(const type &value, int root) const;
/**
* \brief Broadcast
@ -570,8 +567,7 @@ public: // Member functions
* \param n The number of values in the array (must match on all nodes)
* \param root The processor performing the broadcast
*/
template <class type>
void bcast(type *value, const int n, const int root) const;
template <class type> void bcast(type *value, int n, int root) const;
/**
* Perform a global barrier across all processors.
@ -595,8 +591,7 @@ public: // Member functions
* The matching recv must share this tag.
*/
template <class type>
void send(const type *buf, const int length, const int recv,
int tag = 0) const;
void send(const type *buf, int length, int recv, int tag = 0) const;
/*!
* @brief This function sends an MPI message with an array of bytes
@ -611,8 +606,7 @@ public: // Member functions
* to be sent with this message. Default tag is 0.
* The matching recv must share this tag.
*/
void sendBytes(const void *buf, const int N_bytes, const int recv,
int tag = 0) const;
void sendBytes(const void *buf, int N_bytes, int recv, int tag = 0) const;
/*!
* @brief This function sends an MPI message with an array
@ -627,8 +621,8 @@ public: // Member functions
* to be sent with this message.
*/
template <class type>
MPI_Request Isend(const type *buf, const int length, const int recv_proc,
const int tag) const;
MPI_Request Isend(const type *buf, int length, int recv_proc,
int tag) const;
/*!
* @brief This function sends an MPI message with an array of bytes
@ -642,8 +636,8 @@ public: // Member functions
* @param tag Integer argument specifying an integer tag
* to be sent with this message.
*/
MPI_Request IsendBytes(const void *buf, const int N_bytes,
const int recv_proc, const int tag) const;
MPI_Request IsendBytes(const void *buf, int N_bytes, int recv_proc,
int tag) const;
/*!
* @brief This function receives an MPI message with a data
@ -662,7 +656,7 @@ public: // Member functions
* by the tag of the incoming message. Default tag is 0.
*/
template <class type>
inline void recv(type *buf, int length, const int send, int tag) const {
inline void recv(type *buf, int length, int send, int tag) const {
int length2 = length;
recv(buf, length2, send, false, tag);
}
@ -687,7 +681,7 @@ public: // Member functions
* by the tag of the incoming message. Default tag is 0.
*/
template <class type>
void recv(type *buf, int &length, const int send, const bool get_length,
void recv(type *buf, int &length, int send, const bool get_length,
int tag) const;
/*!
@ -703,7 +697,7 @@ public: // Member functions
* must be matched by the tag of the incoming message. Default
* tag is 0.
*/
void recvBytes(void *buf, int &N_bytes, const int send, int tag = 0) const;
void recvBytes(void *buf, int &N_bytes, int send, int tag = 0) const;
/*!
* @brief This function receives an MPI message with a data
@ -716,8 +710,7 @@ public: // Member functions
* be matched by the tag of the incoming message.
*/
template <class type>
MPI_Request Irecv(type *buf, const int length, const int send_proc,
const int tag) const;
MPI_Request Irecv(type *buf, int length, int send_proc, int tag) const;
/*!
* @brief This function receives an MPI message with an array of
@ -731,8 +724,8 @@ public: // Member functions
* @param tag Integer argument specifying a tag which must
* be matched by the tag of the incoming message.
*/
MPI_Request IrecvBytes(void *buf, const int N_bytes, const int send_proc,
const int tag) const;
MPI_Request IrecvBytes(void *buf, int N_bytes, int send_proc,
int tag) const;
/*!
* @brief This function sends and recieves data using a blocking call
@ -741,6 +734,39 @@ public: // Member functions
void sendrecv(const type *sendbuf, int sendcount, int dest, int sendtag,
type *recvbuf, int recvcount, int source, int recvtag) const;
/*!
* @brief This function sets up an Isend call (see MPI_Send_init)
* @param buf Pointer to array buffer with length integers.
* @param length Number of integers in buf that we want to send.
* @param recv_proc Receiving processor number.
* @param tag Tag to send
* @return Returns an MPI_Request.
* Note this returns a unique pointer so the user does not
* need to manually free the request
*/
template <class type>
std::shared_ptr<MPI_Request> Isend_init(const type *buf, int length, int recv_proc,
int tag) const;
/*!
* @brief This function sets up an Irecv call (see MPI_Recv_init)
* @param buf Pointer to integer array buffer with capacity of length integers.
* @param length Maximum number of values that can be stored in buf.
* @param send_proc Processor number of sender.
* @param tag Tag to match
* @return Returns an MPI_Request.
* Note this returns a unique pointer so the user does not
* need to manually free the request
*/
template <class type>
std::shared_ptr<MPI_Request> Irecv_init(type *buf, int length, int send_proc, int tag) const;
/*!
* @brief Start the MPI communication
* @param request Request to start
*/
void Start( MPI_Request &request );
/*!
* Each processor sends every other processor a single value.
* @param[in] x Input value for allGather
@ -792,7 +818,7 @@ public: // Member functions
* and the sizes and displacements will be returned (if desired).
*/
template <class type>
int allGather(const type *send_data, const int send_cnt, type *recv_data,
int allGather(const type *send_data, int send_cnt, type *recv_data,
int *recv_cnt = nullptr, int *recv_disp = nullptr,
bool known_recv = false) const;
@ -822,7 +848,7 @@ public: // Member functions
* @param recv_data Output array of received values (nxN)
*/
template <class type>
void allToAll(const int n, const type *send_data, type *recv_data) const;
void allToAll(int n, const type *send_data, type *recv_data) const;
/*!
* Each processor sends an array of data to the different processors.
@ -995,23 +1021,20 @@ public: // Member functions
MPI loadBalance(double localPerformance, std::vector<double> work);
private: // Private helper functions for templated MPI operations;
template <class type> void call_sumReduce(type *x, const int n = 1) const;
template <class type> void call_sumReduce(type *x, int n = 1) const;
template <class type>
void call_sumReduce(const type *x, type *y, const int n = 1) const;
void call_sumReduce(const type *x, type *y, int n = 1) const;
template <class type>
void call_minReduce(type *x, const int n = 1,
void call_minReduce(type *x, int n = 1, int *rank_of_min = nullptr) const;
template <class type>
void call_minReduce(const type *x, type *y, int n = 1,
int *rank_of_min = nullptr) const;
template <class type>
void call_minReduce(const type *x, type *y, const int n = 1,
int *rank_of_min = nullptr) const;
void call_maxReduce(type *x, int n = 1, int *rank_of_max = nullptr) const;
template <class type>
void call_maxReduce(type *x, const int n = 1,
void call_maxReduce(const type *x, type *y, int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_maxReduce(const type *x, type *y, const int n = 1,
int *rank_of_max = nullptr) const;
template <class type>
void call_bcast(type *x, const int n, const int root) const;
template <class type> void call_bcast(type *x, int n, int root) const;
template <class type>
void call_allGather(const type &x_in, type *x_out) const;
template <class type>

850
common/Membrane.cpp Normal file
View File

@ -0,0 +1,850 @@
/* Membrane class for lattice Boltzmann models */
#include "common/Membrane.h"
#include "analysis/distance.h"
Membrane::Membrane(std::shared_ptr <ScaLBL_Communicator> sComm, int *dvcNeighborList, int Nsites) {
Np = Nsites;
initialNeighborList = new int[18*Np];
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, 18*Np*sizeof(int));
Lock=false; // unlock the communicator
//......................................................................................
// Create a separate copy of the communicator for the device
MPI_COMM_SCALBL = sComm->MPI_COMM_SCALBL.dup();
ScaLBL_CopyToHost(initialNeighborList, dvcNeighborList, 18*Np*sizeof(int));
sComm->MPI_COMM_SCALBL.barrier();
ScaLBL_CopyToDevice(NeighborList, initialNeighborList, 18*Np*sizeof(int));
/* Copy communication lists */
//......................................................................................
//Lock=false; // unlock the communicator
//......................................................................................
// Create a separate copy of the communicator for the device
//MPI_COMM_SCALBL = sComm->Comm.dup();
//......................................................................................
// Copy the domain size and communication information directly from sComm
Nx = sComm->Nx;
Ny = sComm->Ny;
Nz = sComm->Nz;
N = Nx*Ny*Nz;
//next=0;
rank=sComm->rank;
rank_x=sComm->rank_x;
rank_y=sComm->rank_y;
rank_z=sComm->rank_z;
rank_X=sComm->rank_X;
rank_Y=sComm->rank_Y;
rank_Z=sComm->rank_Z;
if (rank == 0){
printf("**** Creating membrane data structure ****** \n");
printf(" Number of active lattice sites (rank = %i): %i \n",rank, Np);
}
sendCount_x=sComm->sendCount_x;
sendCount_y=sComm->sendCount_y;
sendCount_z=sComm->sendCount_z;
sendCount_X=sComm->sendCount_X;
sendCount_Y=sComm->sendCount_Y;
sendCount_Z=sComm->sendCount_Z;
recvCount_x=sComm->recvCount_x;
recvCount_y=sComm->recvCount_y;
recvCount_z=sComm->recvCount_z;
recvCount_X=sComm->recvCount_X;
recvCount_Y=sComm->recvCount_Y;
recvCount_Z=sComm->recvCount_Z;
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_x, recvCount_x*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_y, recvCount_y*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_z, recvCount_z*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_X, recvCount_X*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Y, recvCount_Y*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcSendList_Z, recvCount_Z*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_x, recvCount_x*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_y, recvCount_y*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_z, recvCount_z*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_X, recvCount_X*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_Y, recvCount_Y*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvLinks_Z, recvCount_Z*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_x, recvCount_x*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_y, recvCount_y*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_z, recvCount_z*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_X, recvCount_X*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Y, recvCount_Y*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &dvcRecvDist_Z, recvCount_Z*sizeof(int));
ScaLBL_AllocateZeroCopy((void **) &sendbuf_x, sendCount_x*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &sendbuf_y, sendCount_y*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &sendbuf_z, sendCount_z*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &sendbuf_X, sendCount_X*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Y, sendCount_Y*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &sendbuf_Z, sendCount_Z*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &recvbuf_x, recvCount_x*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &recvbuf_y, recvCount_y*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &recvbuf_z, recvCount_z*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &recvbuf_X, recvCount_X*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Y, recvCount_Y*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &recvbuf_Z, recvCount_Z*sizeof(double));
sendCount_x=sComm->copySendList("x", dvcSendList_x);
sendCount_y=sComm->copySendList("y", dvcSendList_y);
sendCount_z=sComm->copySendList("z", dvcSendList_z);
sendCount_X=sComm->copySendList("X", dvcSendList_X);
sendCount_Y=sComm->copySendList("Y", dvcSendList_Y);
sendCount_Z=sComm->copySendList("Z", dvcSendList_Z);
recvCount_x=sComm->copyRecvList("x", dvcRecvDist_x);
recvCount_y=sComm->copyRecvList("y", dvcRecvDist_y);
recvCount_z=sComm->copyRecvList("z", dvcRecvDist_z);
recvCount_X=sComm->copyRecvList("X", dvcRecvDist_X);
recvCount_Y=sComm->copyRecvList("Y", dvcRecvDist_Y);
recvCount_Z=sComm->copyRecvList("Z", dvcRecvDist_Z);
}
Membrane::~Membrane() {
delete [] initialNeighborList;
delete [] membraneLinks;
delete [] membraneTag;
delete [] membraneDist;
ScaLBL_FreeDeviceMemory( coefficient_x );
ScaLBL_FreeDeviceMemory( coefficient_X );
ScaLBL_FreeDeviceMemory( coefficient_y );
ScaLBL_FreeDeviceMemory( coefficient_Y );
ScaLBL_FreeDeviceMemory( coefficient_z );
ScaLBL_FreeDeviceMemory( coefficient_Z );
ScaLBL_FreeDeviceMemory( NeighborList );
ScaLBL_FreeDeviceMemory( MembraneLinks );
ScaLBL_FreeDeviceMemory( MembraneCoef );
ScaLBL_FreeDeviceMemory( MembraneDistance );
ScaLBL_FreeDeviceMemory( sendbuf_x );
ScaLBL_FreeDeviceMemory( sendbuf_X );
ScaLBL_FreeDeviceMemory( sendbuf_y );
ScaLBL_FreeDeviceMemory( sendbuf_Y );
ScaLBL_FreeDeviceMemory( sendbuf_z );
ScaLBL_FreeDeviceMemory( sendbuf_Z );
/* ScaLBL_FreeDeviceMemory( sendbuf_xy );
ScaLBL_FreeDeviceMemory( sendbuf_xY );
ScaLBL_FreeDeviceMemory( sendbuf_Xy );
ScaLBL_FreeDeviceMemory( sendbuf_XY );
ScaLBL_FreeDeviceMemory( sendbuf_xz );
ScaLBL_FreeDeviceMemory( sendbuf_xZ );
ScaLBL_FreeDeviceMemory( sendbuf_Xz );
ScaLBL_FreeDeviceMemory( sendbuf_XZ );
ScaLBL_FreeDeviceMemory( sendbuf_yz );
ScaLBL_FreeDeviceMemory( sendbuf_yZ );
ScaLBL_FreeDeviceMemory( sendbuf_Yz );
ScaLBL_FreeDeviceMemory( sendbuf_YZ );
*/
ScaLBL_FreeDeviceMemory( recvbuf_x );
ScaLBL_FreeDeviceMemory( recvbuf_X );
ScaLBL_FreeDeviceMemory( recvbuf_y );
ScaLBL_FreeDeviceMemory( recvbuf_Y );
ScaLBL_FreeDeviceMemory( recvbuf_z );
ScaLBL_FreeDeviceMemory( recvbuf_Z );
/*
ScaLBL_FreeDeviceMemory( recvbuf_xy );
ScaLBL_FreeDeviceMemory( recvbuf_xY );
ScaLBL_FreeDeviceMemory( recvbuf_Xy );
ScaLBL_FreeDeviceMemory( recvbuf_XY );
ScaLBL_FreeDeviceMemory( recvbuf_xz );
ScaLBL_FreeDeviceMemory( recvbuf_xZ );
ScaLBL_FreeDeviceMemory( recvbuf_Xz );
ScaLBL_FreeDeviceMemory( recvbuf_XZ );
ScaLBL_FreeDeviceMemory( recvbuf_yz );
ScaLBL_FreeDeviceMemory( recvbuf_yZ );
ScaLBL_FreeDeviceMemory( recvbuf_Yz );
ScaLBL_FreeDeviceMemory( recvbuf_YZ );
*/
ScaLBL_FreeDeviceMemory( dvcSendList_x );
ScaLBL_FreeDeviceMemory( dvcSendList_X );
ScaLBL_FreeDeviceMemory( dvcSendList_y );
ScaLBL_FreeDeviceMemory( dvcSendList_Y );
ScaLBL_FreeDeviceMemory( dvcSendList_z );
ScaLBL_FreeDeviceMemory( dvcSendList_Z );
/*
ScaLBL_FreeDeviceMemory( dvcSendList_xy );
ScaLBL_FreeDeviceMemory( dvcSendList_xY );
ScaLBL_FreeDeviceMemory( dvcSendList_Xy );
ScaLBL_FreeDeviceMemory( dvcSendList_XY );
ScaLBL_FreeDeviceMemory( dvcSendList_xz );
ScaLBL_FreeDeviceMemory( dvcSendList_xZ );
ScaLBL_FreeDeviceMemory( dvcSendList_Xz );
ScaLBL_FreeDeviceMemory( dvcSendList_XZ );
ScaLBL_FreeDeviceMemory( dvcSendList_yz );
ScaLBL_FreeDeviceMemory( dvcSendList_yZ );
ScaLBL_FreeDeviceMemory( dvcSendList_Yz );
ScaLBL_FreeDeviceMemory( dvcSendList_YZ );
ScaLBL_FreeDeviceMemory( dvcRecvList_x );
ScaLBL_FreeDeviceMemory( dvcRecvList_X );
ScaLBL_FreeDeviceMemory( dvcRecvList_y );
ScaLBL_FreeDeviceMemory( dvcRecvList_Y );
ScaLBL_FreeDeviceMemory( dvcRecvList_z );
ScaLBL_FreeDeviceMemory( dvcRecvList_Z );
ScaLBL_FreeDeviceMemory( dvcRecvList_xy );
ScaLBL_FreeDeviceMemory( dvcRecvList_xY );
ScaLBL_FreeDeviceMemory( dvcRecvList_Xy );
ScaLBL_FreeDeviceMemory( dvcRecvList_XY );
ScaLBL_FreeDeviceMemory( dvcRecvList_xz );
ScaLBL_FreeDeviceMemory( dvcRecvList_xZ );
ScaLBL_FreeDeviceMemory( dvcRecvList_Xz );
ScaLBL_FreeDeviceMemory( dvcRecvList_XZ );
ScaLBL_FreeDeviceMemory( dvcRecvList_yz );
ScaLBL_FreeDeviceMemory( dvcRecvList_yZ );
ScaLBL_FreeDeviceMemory( dvcRecvList_Yz );
ScaLBL_FreeDeviceMemory( dvcRecvList_YZ );
*/
ScaLBL_FreeDeviceMemory( dvcRecvLinks_x );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_X );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_y );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_Y );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_z );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_Z );
/*
ScaLBL_FreeDeviceMemory( dvcRecvLinks_xy );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_xY );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_Xy );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_XY );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_xz );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_xZ );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_Xz );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_XZ );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_yz );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_yZ );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_Yz );
ScaLBL_FreeDeviceMemory( dvcRecvLinks_YZ );
*/
ScaLBL_FreeDeviceMemory( dvcRecvDist_x );
ScaLBL_FreeDeviceMemory( dvcRecvDist_X );
ScaLBL_FreeDeviceMemory( dvcRecvDist_y );
ScaLBL_FreeDeviceMemory( dvcRecvDist_Y );
ScaLBL_FreeDeviceMemory( dvcRecvDist_z );
ScaLBL_FreeDeviceMemory( dvcRecvDist_Z );
/*
ScaLBL_FreeDeviceMemory( dvcRecvDist_xy );
ScaLBL_FreeDeviceMemory( dvcRecvDist_xY );
ScaLBL_FreeDeviceMemory( dvcRecvDist_Xy );
ScaLBL_FreeDeviceMemory( dvcRecvDist_XY );
ScaLBL_FreeDeviceMemory( dvcRecvDist_xz );
ScaLBL_FreeDeviceMemory( dvcRecvDist_xZ );
ScaLBL_FreeDeviceMemory( dvcRecvDist_Xz );
ScaLBL_FreeDeviceMemory( dvcRecvDist_XZ );
ScaLBL_FreeDeviceMemory( dvcRecvDist_yz );
ScaLBL_FreeDeviceMemory( dvcRecvDist_yZ );
ScaLBL_FreeDeviceMemory( dvcRecvDist_Yz );
ScaLBL_FreeDeviceMemory( dvcRecvDist_YZ );
*/
}
int Membrane::Create(DoubleArray &Distance, IntArray &Map){
int mlink = 0;
int i,j,k;
int idx, neighbor;
double dist, locdist;
if (rank == 0) printf(" Copy initial neighborlist... \n");
int * neighborList = new int[18*Np];
/* Copy neighborList */
for (int idx=0; idx<Np; idx++){
for (int q = 0; q<18; q++){
neighborList[q*Np+idx] = initialNeighborList[q*Np+idx];
}
}
int Q = 7; // for D3Q7 model
/* go through the neighborlist structure */
/* count & cut the links */
if (rank == 0) printf(" Cut membrane links... \n");
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
idx=Map(i,j,k);
locdist=Distance(i,j,k);
if (!(idx<0)){
neighbor=Map(i-1,j,k);
dist=Distance(i-1,j,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[idx]=idx + 2*Np;
}
neighbor=Map(i+1,j,k);
dist=Distance(i+1,j,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[Np+idx] = idx + 1*Np;
mlink++;
}
neighbor=Map(i,j-1,k);
dist=Distance(i,j-1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[2*Np+idx]=idx + 4*Np;
}
neighbor=Map(i,j+1,k);
dist=Distance(i,j+1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[3*Np+idx]=idx + 3*Np;
mlink++;
}
neighbor=Map(i,j,k-1);
dist=Distance(i,j,k-1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[4*Np+idx]=idx + 6*Np;
}
neighbor=Map(i,j,k+1);
dist=Distance(i,j,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[5*Np+idx]=idx + 5*Np;
mlink++;
}
if (Q > 7){
neighbor=Map(i-1,j-1,k);
dist=Distance(i-1,j-1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[6*Np+idx]=idx + 8*Np;
}
neighbor=Map(i+1,j+1,k);
dist=Distance(i+1,j+1,k);
if (dist*locdist < 0.0){
neighborList[7*Np+idx]=idx + 7*Np;
mlink++;
}
neighbor=Map(i-1,j+1,k);
dist=Distance(i-1,j+1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[8*Np+idx]=idx + 10*Np;
}
neighbor=Map(i+1,j-1,k);
dist=Distance(i+1,j-1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[9*Np+idx]=idx + 9*Np;
mlink++;
}
neighbor=Map(i-1,j,k-1);
dist=Distance(i-1,j,k-1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[10*Np+idx]=idx + 12*Np;
}
neighbor=Map(i+1,j,k+1);
dist=Distance(i+1,j,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[11*Np+idx]=idx + 11*Np;
mlink++;
}
neighbor=Map(i-1,j,k+1);
dist=Distance(i-1,j,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[12*Np+idx]=idx + 14*Np;
}
neighbor=Map(i+1,j,k-1);
dist=Distance(i+1,j,k-1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[13*Np+idx]=idx + 13*Np;
mlink++;
}
neighbor=Map(i,j-1,k-1);
dist=Distance(i,j-1,k-1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[14*Np+idx]=idx + 16*Np;
}
neighbor=Map(i,j+1,k+1);
dist=Distance(i,j+1,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[15*Np+idx]=idx + 15*Np;
mlink++;
}
neighbor=Map(i,j-1,k+1);
dist=Distance(i,j-1,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[16*Np+idx]=idx + 18*Np;
}
neighbor=Map(i,j+1,k-1);
dist=Distance(i,j+1,k-1);
if (dist*locdist < 0.0 && !(neighbor<0)){
neighborList[17*Np+idx]=idx + 17*Np;
mlink++;
}
}
}
}
}
}
/* allocate memory */
membraneTag = new int [mlink];
membraneLinks = new int [2*mlink];
membraneDist = new double [2*mlink];
membraneLinkCount = mlink;
if (rank == 0) printf(" (cut %i links crossing membrane) \n",mlink);
/* construct the membrane*/
/* *
* Sites inside the membrane (negative distance) -- store at 2*mlink
* Sites outside the membrane (positive distance) -- store at 2*mlink+1
*/
if (rank == 0) printf(" Construct membrane data structures... \n");
mlink = 0;
int localSite = 0; int neighborSite = 0;
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
idx=Map(i,j,k);
locdist=Distance(i,j,k);
if (!(idx<0)){
neighbor=Map(i+1,j,k);
dist=Distance(i+1,j,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0 ){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 1*Np;
membraneLinks[neighborSite] = neighbor + 2*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
neighbor=Map(i,j+1,k);
dist=Distance(i,j+1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 3*Np;
membraneLinks[neighborSite] = neighbor + 4*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
neighbor=Map(i,j,k+1);
dist=Distance(i,j,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 5*Np;
membraneLinks[neighborSite] = neighbor + 6*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
if (Q > 7){
neighbor=Map(i+1,j+1,k);
dist=Distance(i+1,j+1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 7*Np;
membraneLinks[neighborSite] = neighbor+8*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
neighbor=Map(i+1,j-1,k);
dist=Distance(i+1,j-1,k);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 9*Np;
membraneLinks[neighborSite] = neighbor + 10*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
neighbor=Map(i+1,j,k+1);
dist=Distance(i+1,j,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 11*Np;
membraneLinks[neighborSite] = neighbor + 12*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
neighbor=Map(i+1,j,k-1);
dist=Distance(i+1,j,k-1);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 13*Np;
membraneLinks[neighborSite] = neighbor + 14*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
neighbor=Map(i,j+1,k+1);
dist=Distance(i,j+1,k+1);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 15*Np;
membraneLinks[neighborSite] = neighbor + 16*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
neighbor=Map(i,j+1,k-1);
dist=Distance(i,j+1,k-1);
if (dist*locdist < 0.0 && !(neighbor<0)){
if (locdist < 0.0){
localSite = 2*mlink;
neighborSite = 2*mlink+1;
}
else{
localSite = 2*mlink+1;
neighborSite = 2*mlink;
}
membraneLinks[localSite] = idx + 17*Np;
membraneLinks[neighborSite] = neighbor + 18*Np;
membraneDist[localSite] = locdist;
membraneDist[neighborSite] = dist;
mlink++;
}
}
}
}
}
}
if (rank == 0) printf(" Create device data structures... \n");
/* Create device copies of data structures */
ScaLBL_AllocateDeviceMemory((void **)&MembraneLinks, 2*mlink*sizeof(int));
ScaLBL_AllocateDeviceMemory((void **)&MembraneCoef, 2*mlink*sizeof(double));
//ScaLBL_AllocateDeviceMemory((void **)&MembraneDistance, 2*mlink*sizeof(double));
ScaLBL_AllocateDeviceMemory((void **)&MembraneDistance, Nx*Ny*Nz*sizeof(double));
ScaLBL_CopyToDevice(NeighborList, neighborList, 18*Np*sizeof(int));
ScaLBL_CopyToDevice(MembraneLinks, membraneLinks, 2*mlink*sizeof(int));
//ScaLBL_CopyToDevice(MembraneDistance, membraneDist, 2*mlink*sizeof(double));
ScaLBL_CopyToDevice(MembraneDistance, Distance.data(), Nx*Ny*Nz*sizeof(double));
int *dvcTmpMap;
ScaLBL_AllocateDeviceMemory((void **)&dvcTmpMap, sizeof(int) * Np);
int *TmpMap;
TmpMap = new int[Np];
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
int idx = Map(i, j, k);
if (!(idx < 0))
TmpMap[idx] = k * Nx * Ny + j * Nx + i;
}
}
}
ScaLBL_CopyToDevice(dvcTmpMap, TmpMap, sizeof(int) * Np);
//int Membrane::D3Q7_MapRecv(int Cqx, int Cqy, int Cqz, int *d3q19_recvlist,
// int count, int *membraneRecvLabels, DoubleArray &Distance, int *dvcMap){
if (rank == 0) printf(" Construct communication data structures... \n");
/* Re-organize communication based on membrane structure*/
//...dvcMap recieve list for the X face: q=2,8,10,12,14 .................................
linkCount_X[0] = D3Q7_MapRecv(-1,0,0, dvcRecvDist_X,recvCount_X,dvcRecvLinks_X,Distance,dvcTmpMap);
//...................................................................................
//...dvcMap recieve list for the x face: q=1,7,9,11,13..................................
linkCount_x[0] = D3Q7_MapRecv(1,0,0, dvcRecvDist_x,recvCount_x,dvcRecvLinks_x,Distance,dvcTmpMap);
//...................................................................................
//...dvcMap recieve list for the y face: q=4,8,9,16,18 ...................................
linkCount_Y[0] = D3Q7_MapRecv(0,-1,0, dvcRecvDist_Y,recvCount_Y,dvcRecvLinks_Y,Distance,dvcTmpMap);
//...................................................................................
//...dvcMap recieve list for the Y face: q=3,7,10,15,17 ..................................
linkCount_y[0] = D3Q7_MapRecv(0,1,0, dvcRecvDist_y,recvCount_y,dvcRecvLinks_y,Distance,dvcTmpMap);
//...................................................................................
//...dvcMap recieve list for the z face<<<6,12,13,16,17)..............................................
linkCount_Z[0] = D3Q7_MapRecv(0,0,-1, dvcRecvDist_Z,recvCount_Z,dvcRecvLinks_Z,Distance,dvcTmpMap);
//...dvcMap recieve list for the Z face<<<5,11,14,15,18)..............................................
linkCount_z[0] = D3Q7_MapRecv(0,0,1, dvcRecvDist_z,recvCount_z,dvcRecvLinks_z,Distance,dvcTmpMap);
//..................................................................................
//......................................................................................
MPI_COMM_SCALBL.barrier();
ScaLBL_DeviceBarrier();
//.......................................................................
SendCount = sendCount_x+sendCount_X+sendCount_y+sendCount_Y+sendCount_z+sendCount_Z;
RecvCount = recvCount_x+recvCount_X+recvCount_y+recvCount_Y+recvCount_z+recvCount_Z;
CommunicationCount = SendCount+RecvCount;
//......................................................................................
//......................................................................................
// Allocate membrane coefficient buffers (for d3q7 recv)
ScaLBL_AllocateZeroCopy((void **) &coefficient_x, 2*(recvCount_x )*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &coefficient_X, 2*(recvCount_X)*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &coefficient_y, 2*(recvCount_y)*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &coefficient_Y, 2*(recvCount_Y)*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &coefficient_z, 2*(recvCount_z)*sizeof(double));
ScaLBL_AllocateZeroCopy((void **) &coefficient_Z, 2*(recvCount_Z)*sizeof(double));
//......................................................................................
ScaLBL_FreeDeviceMemory (dvcTmpMap);
delete [] neighborList;
delete [] TmpMap;
return mlink;
}
int Membrane::D3Q7_MapRecv(int Cqx, int Cqy, int Cqz, int *d3q19_recvlist,
int count, int *membraneRecvLabels, DoubleArray &Distance, int *dvcMap){
int i,j,k,n,nn,idx;
double distanceNonLocal,distanceLocal;
int * ReturnLabels;
ReturnLabels=new int [count];
int * list;
list=new int [count];
ScaLBL_CopyToHost(list, d3q19_recvlist, count*sizeof(int));
int *TmpMap;
TmpMap=new int [Np];
ScaLBL_CopyToHost(TmpMap, dvcMap, Np*sizeof(int));
int countMembraneLinks=0;
for (idx=0; idx<count; idx++){
//printf(" Read 1 \n");
// Get the value from the list -- note that n is the index is from the send (non-local) process
nn = list[idx]; // if (rank == 0) printf("@ rank:%d n=%d\n",rank,n);
//printf(" Read 2 \n");
n = TmpMap[nn];
//printf(" idx= %i(%i), nn=%i, n= %i \n",idx,count,nn,n);
// Get the 3-D indices from the send process
k = n/(Nx*Ny); j = (n-Nx*Ny*k)/Nx; i = n-Nx*Ny*k-Nx*j;
// if (rank ==0) printf("@ Get 3D indices from the send process: i=%d, j=%d, k=%d\n",i,j,k);
distanceLocal = Distance(i,j,k); // this site should be in the halo
//printf(" Local value %i, %i, %i \n",i,j,k);
// Streaming for the non-local distribution
i -= Cqx; j -= Cqy; k -= Cqz;
distanceNonLocal = Distance(i,j,k);
//printf(" Nonlocal value %i, %i, %i \n",i,j,k);
ReturnLabels[idx] = 0;
if (distanceLocal*distanceNonLocal < 0.0){
if (distanceLocal > 0.0) ReturnLabels[idx] = 1;
else ReturnLabels[idx] = 2;
countMembraneLinks++;
}
}
// Return updated version to the device
ScaLBL_CopyToDevice(membraneRecvLabels, ReturnLabels, count*sizeof(int));
// clean up the work arrays
delete [] ReturnLabels;
delete [] TmpMap;
delete [] list;
return countMembraneLinks;
}
void Membrane::SendD3Q7AA(double *dist){
if (Lock==true){
ERROR("Membrane Error (SendD3Q7): Membrane communicator is locked -- did you forget to match Send/Recv calls?");
}
else{
Lock=true;
}
// assign tag of 37 to D3Q7 communication
sendtag = recvtag = 37;
ScaLBL_DeviceBarrier();
// Pack the distributions
//...Packing for x face(q=2)................................
ScaLBL_D3Q19_Pack(2,dvcSendList_x,0,sendCount_x,sendbuf_x,dist,Np);
req2[0] = MPI_COMM_SCALBL.Irecv(recvbuf_X, recvCount_X,rank_X,recvtag);
req1[0] = MPI_COMM_SCALBL.Isend(sendbuf_x, sendCount_x,rank_x,sendtag);
//...Packing for X face(q=1)................................
ScaLBL_D3Q19_Pack(1,dvcSendList_X,0,sendCount_X,sendbuf_X,dist,Np);
req2[1] = MPI_COMM_SCALBL.Irecv(recvbuf_x, recvCount_x,rank_x,recvtag);
req1[1] = MPI_COMM_SCALBL.Isend(sendbuf_X, sendCount_X,rank_X,sendtag);
//for (int idx=0; idx<sendCount_X; idx++) printf(" SendX(%i)=%e \n",idx,sendbuf_X[idx]);
//...Packing for y face(q=4).................................
ScaLBL_D3Q19_Pack(4,dvcSendList_y,0,sendCount_y,sendbuf_y,dist,Np);
req2[2] = MPI_COMM_SCALBL.Irecv(recvbuf_Y, recvCount_Y,rank_Y,recvtag);
req1[2] = MPI_COMM_SCALBL.Isend(sendbuf_y, sendCount_y,rank_y,sendtag);
//...Packing for Y face(q=3).................................
ScaLBL_D3Q19_Pack(3,dvcSendList_Y,0,sendCount_Y,sendbuf_Y,dist,Np);
req2[3] = MPI_COMM_SCALBL.Irecv(recvbuf_y, recvCount_y,rank_y,recvtag);
req1[3] = MPI_COMM_SCALBL.Isend(sendbuf_Y, sendCount_Y,rank_Y,sendtag);
//for (int idx=0; idx<sendCount_Y; idx++) printf(" SendY(%i)=%e \n",idx,sendbuf_Y[idx]);
//...Packing for z face(q=6)................................
ScaLBL_D3Q19_Pack(6,dvcSendList_z,0,sendCount_z,sendbuf_z,dist,Np);
req2[4] = MPI_COMM_SCALBL.Irecv(recvbuf_Z, recvCount_Z,rank_Z,recvtag);
req1[4] = MPI_COMM_SCALBL.Isend(sendbuf_z, sendCount_z,rank_z,sendtag);
//...Packing for Z face(q=5)................................
ScaLBL_D3Q19_Pack(5,dvcSendList_Z,0,sendCount_Z,sendbuf_Z,dist,Np);
req2[5] = MPI_COMM_SCALBL.Irecv(recvbuf_z, recvCount_z,rank_z,recvtag);
req1[5] = MPI_COMM_SCALBL.Isend(sendbuf_Z, sendCount_Z,rank_Z,sendtag);
}
void Membrane::RecvD3Q7AA(double *dist){
//...................................................................................
// Wait for completion of D3Q19 communication
MPI_COMM_SCALBL.waitAll(6,req1);
MPI_COMM_SCALBL.waitAll(6,req2);
ScaLBL_DeviceBarrier();
//...................................................................................
// NOTE: AA Routine writes to opposite
// Unpack the distributions on the device
//...................................................................................
//...Unpacking for x face(q=2)................................
ScaLBL_D3Q7_Membrane_Unpack(2,dvcRecvDist_x, recvbuf_x,recvCount_x,dist,Np,coefficient_x);
//...................................................................................
//...Packing for X face(q=1)................................
ScaLBL_D3Q7_Membrane_Unpack(1,dvcRecvDist_X, recvbuf_X,recvCount_X,dist,Np,coefficient_X);
//...................................................................................
//...Packing for y face(q=4).................................
ScaLBL_D3Q7_Membrane_Unpack(4,dvcRecvDist_y, recvbuf_y,recvCount_y,dist,Np,coefficient_y);
//...................................................................................
//...Packing for Y face(q=3).................................
ScaLBL_D3Q7_Membrane_Unpack(3,dvcRecvDist_Y, recvbuf_Y,recvCount_Y,dist,Np,coefficient_Y);
//...................................................................................
//...Packing for z face(q=6)................................
ScaLBL_D3Q7_Membrane_Unpack(6,dvcRecvDist_z, recvbuf_z, recvCount_z,dist,Np,coefficient_z);
//...Packing for Z face(q=5)................................
ScaLBL_D3Q7_Membrane_Unpack(5,dvcRecvDist_Z, recvbuf_Z,recvCount_Z,dist,Np,coefficient_Z);
//..................................................................................
MPI_COMM_SCALBL.barrier();
//...................................................................................
Lock=false; // unlock the communicator after communications complete
//...................................................................................
}
void Membrane::IonTransport(double *dist, double *den){
ScaLBL_D3Q7_Membrane_IonTransport(MembraneLinks,MembraneCoef, dist, den, membraneLinkCount, Np);
}
// std::shared_ptr<Database> db){
void Membrane::AssignCoefficients(int *Map, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn,
double ThresholdMassFractionOut){
/* Assign mass transfer coefficients to the membrane data structure */
if (membraneLinkCount > 0)
ScaLBL_D3Q7_Membrane_AssignLinkCoef(MembraneLinks, Map, MembraneDistance, Psi, MembraneCoef,
Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
membraneLinkCount, Nx, Ny, Nz, Np);
if (linkCount_X[0] < recvCount_X)
ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(-1,0,0,Map,MembraneDistance,Psi,Threshold,
MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut,
dvcRecvDist_X,dvcRecvLinks_X,coefficient_X,0,linkCount_X[0],recvCount_X,
Np,Nx,Ny,Nz);
if (linkCount_x[0] < recvCount_x)
ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(1,0,0,Map,MembraneDistance,Psi,Threshold,
MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut,
dvcRecvDist_x,dvcRecvLinks_x,coefficient_x,0,linkCount_x[0],recvCount_x,
Np,Nx,Ny,Nz);
if (linkCount_Y[0] < recvCount_Y)
ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(0,-1,0,Map,MembraneDistance,Psi,Threshold,
MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut,
dvcRecvDist_Y,dvcRecvLinks_Y,coefficient_Y,0,linkCount_Y[0],recvCount_Y,
Np,Nx,Ny,Nz);
if (linkCount_y[0]<recvCount_y)
ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(0,1,0,Map,MembraneDistance,Psi,Threshold,
MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut,
dvcRecvDist_y,dvcRecvLinks_y,coefficient_y,0,linkCount_y[0],recvCount_y,
Np,Nx,Ny,Nz);
if (linkCount_Z[0]<recvCount_Z)
ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(0,0,-1,Map,MembraneDistance,Psi,Threshold,
MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut,
dvcRecvDist_Z,dvcRecvLinks_Z,coefficient_Z,0,linkCount_Z[0],recvCount_Z,
Np,Nx,Ny,Nz);
if (linkCount_z[0]<recvCount_z)
ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(0,0,1,Map,MembraneDistance,Psi,Threshold,
MassFractionIn,MassFractionOut,ThresholdMassFractionIn,ThresholdMassFractionOut,
dvcRecvDist_z,dvcRecvLinks_z,coefficient_z,0,linkCount_z[0],recvCount_z,
Np,Nx,Ny,Nz);
}

178
common/Membrane.h Normal file
View File

@ -0,0 +1,178 @@
/* Flow adaptor class for multiphase flow methods */
#ifndef ScaLBL_Membrane_INC
#define ScaLBL_Membrane_INC
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/ScaLBL.h"
/**
* \brief Unpack D3Q19 distributions after communication using links determined based on membrane location
* @param q - index for distribution based on D3Q19 discrete velocity structure
* @param list - list of distributions to communicate
* @param recvbuf - memory buffer where recieved values have been stored
* @param count - number of values to unppack
* @param dist - memory buffer to hold the distributions
* @param N - size of the distributions (derived from Domain structure)
*/
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, double *recvbuf, int count,
double *dist, int N, double *coef);
/**
* \brief Set custom link rules for D3Q19 distribution based on membrane location
* @param q - index for distribution based on D3Q19 discrete velocity structure
* @param list - list of distributions to communicate
* @param links - list of active links based on the membrane location
* @param coef - coefficient to determine the local mass transport for each membrane link
* @param start - index to start parsing the list
* @param offset - offset to start reading membrane links
* @param count - number of values to unppack
* @param recvbuf - memory buffer where recieved values have been stored
* @param dist - memory buffer to hold the distributions
* @param N - size of the distributions (derived from Domain structure)
*/
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N);
/**
* \class Membrane
* @brief
* The Membrane class operates on ScaLBL data structures to insert membrane
*
*/
class Membrane {
public:
int Np;
int Nx,Ny,Nz,N;
int membraneLinkCount;
int *initialNeighborList; // original neighborlist
int *NeighborList; // modified neighborlist
/* host data structures */
int *membraneLinks; // D3Q7 links that cross membrane
int *membraneTag; // label each link in the membrane
double *membraneDist; // distance to membrane for each linked site
double *membraneOrientation; // distance to membrane for each linked site
/*
* Device data structures
*/
int *MembraneLinks;
double *MembraneCoef; // mass transport coefficient for the membrane
double *MembraneDistance;
double *MembraneOrientation;
/**
* \brief Create a flow adaptor to operate on the LB model
* @param ScaLBL - originating data structures
* @param neighborList - list of neighbors for each site
*/
//Membrane(std::shared_ptr <Domain> Dm, int *initialNeighborList, int Nsites);
Membrane(std::shared_ptr <ScaLBL_Communicator> sComm, int *dvcNeighborList, int Nsites);
/**
* \brief Destructor
*/
~Membrane();
/**
* \brief Create membrane
* \details Create membrane structure from signed distance function
* @param Dm - domain structure
* @param Distance - signed distance to membrane
* @param Map - mapping between regular layout and compact layout
*/
int Create(DoubleArray &Distance, IntArray &Map);
void SendD3Q7AA(double *dist);
void RecvD3Q7AA(double *dist);
void AssignCoefficients(int *Map, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn,
double ThresholdMassFractionOut);
void IonTransport(double *dist, double *den);
//......................................................................................
// Buffers to store data sent and recieved by this MPI process
double *sendbuf_x, *sendbuf_y, *sendbuf_z, *sendbuf_X, *sendbuf_Y, *sendbuf_Z;
double *recvbuf_x, *recvbuf_y, *recvbuf_z, *recvbuf_X, *recvbuf_Y, *recvbuf_Z;
//......................................................................................
private:
bool Lock; // use Lock to make sure only one call at a time to protect data in transit
int sendtag, recvtag;
int iproc,jproc,kproc;
int nprocx,nprocy,nprocz;
// Give the object it's own MPI communicator
RankInfoStruct rank_info;
Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain
MPI_Request req1[18],req2[18];
/**
* \brief Set up membrane communication
* \details associate p2p communication links to membrane where necessary
* returns the number of membrane links
* regular communications are stored in the first part of the list
* membrane communications are stored in the last part of the list
* @param Cqx - discrete velocity (x)
* @param Cqy - discrete velocity (y)
* @param Cqz - discrete velocity (z)
* @param d3q19_recvlist - device array with the saved list
* @param count - number recieved values
* @param membraneRecvLabels - sorted list with regular and membrane links
* @param Distance - signed distance to membrane
* @param dvcMap - data structure used to define mapping between dense and sparse representation
* @param Np - number of sites in dense representation
* */
int D3Q7_MapRecv(int Cqx, int Cqy, int Cqz, int *d3q19_recvlist,
int count, int *membraneRecvLabels, DoubleArray &Distance, int *dvcMap);
//......................................................................................
// MPI ranks for all 18 neighbors
//......................................................................................
// These variables are all private to prevent external things from modifying them!!
//......................................................................................
int rank;
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
int rank_xy,rank_XY,rank_xY,rank_Xy;
int rank_xz,rank_XZ,rank_xZ,rank_Xz;
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//......................................................................................
int SendCount, RecvCount, CommunicationCount;
//......................................................................................
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
//......................................................................................
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
//......................................................................................
int linkCount_x[5], linkCount_y[5], linkCount_z[5], linkCount_X[5], linkCount_Y[5], linkCount_Z[5];
int linkCount_xy, linkCount_yz, linkCount_xz, linkCount_Xy, linkCount_Yz, linkCount_xZ;
int linkCount_xY, linkCount_yZ, linkCount_Xz, linkCount_XY, linkCount_YZ, linkCount_XZ;
//......................................................................................
// Send buffers that reside on the compute device
int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z;
//int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ;
//int *dvcSendList_xY, *dvcSendList_yZ, *dvcSendList_Xz, *dvcSendList_XY, *dvcSendList_YZ, *dvcSendList_XZ;
// Recieve buffers that reside on the compute device
int *dvcRecvList_x, *dvcRecvList_y, *dvcRecvList_z, *dvcRecvList_X, *dvcRecvList_Y, *dvcRecvList_Z;
//int *dvcRecvList_xy, *dvcRecvList_yz, *dvcRecvList_xz, *dvcRecvList_Xy, *dvcRecvList_Yz, *dvcRecvList_xZ;
//int *dvcRecvList_xY, *dvcRecvList_yZ, *dvcRecvList_Xz, *dvcRecvList_XY, *dvcRecvList_YZ, *dvcRecvList_XZ;
// Link lists that reside on the compute device
int *dvcRecvLinks_x, *dvcRecvLinks_y, *dvcRecvLinks_z, *dvcRecvLinks_X, *dvcRecvLinks_Y, *dvcRecvLinks_Z;
//int *dvcRecvLinks_xy, *dvcRecvLinks_yz, *dvcRecvLinks_xz, *dvcRecvLinks_Xy, *dvcRecvLinks_Yz, *dvcRecvLinks_xZ;
//int *dvcRecvLinks_xY, *dvcRecvLinks_yZ, *dvcRecvLinks_Xz, *dvcRecvLinks_XY, *dvcRecvLinks_YZ, *dvcRecvLinks_XZ;
// Recieve buffers for the distributions
int *dvcRecvDist_x, *dvcRecvDist_y, *dvcRecvDist_z, *dvcRecvDist_X, *dvcRecvDist_Y, *dvcRecvDist_Z;
//int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
//int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
//......................................................................................
// mass transfer coefficient arrays
double *coefficient_x, *coefficient_X, *coefficient_y, *coefficient_Y, *coefficient_z, *coefficient_Z;
//......................................................................................
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -217,6 +217,25 @@ extern "C" void ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int
*/
extern "C" void ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz);
// MEMBRANE MODEL
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef, double *dist, double *Den, int memLinks, int Np);
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np);
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz);
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, double *recvbuf, int count,
double *dist, int N, double *coef);
// GREYSCALE MODEL (Single-component)
extern "C" void ScaLBL_D3Q19_GreyIMRT_Init(double *Dist, int Np, double Den);
@ -262,12 +281,21 @@ extern "C" void ScaLBL_D3Q19_AAodd_GreyscaleColor_CP(int *d_neighborList, int *M
//extern "C" void ScaLBL_Update_GreyscalePotential(int *Map, double *Phi, double *Psi, double *Poro, double *Perm, double alpha, double W,
// int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np);
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np);
// ION TRANSPORT MODEL
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAeven_Ion_v0(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np);
@ -278,7 +306,8 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDi
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np);
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np);
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np);
// LBM Poisson solver
@ -350,6 +379,22 @@ extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *d
* @param Np - size of local sub-domain (derived from Domain structure)
*/
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np);
extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np);
// LBM Stokes Model (adapted from MRT model)
extern "C" void ScaLBL_D3Q19_AAeven_StokesMRT(double *dist, double *Velocity, double *ChargeDensity, double *ElectricField, double rlx_setA, double rlx_setB,
@ -675,10 +720,26 @@ public:
*/
~ScaLBL_Communicator();
//......................................................................................
Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain
int rank;
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
int rank_xy,rank_XY,rank_xY,rank_Xy;
int rank_xz,rank_XZ,rank_xZ,rank_Xz;
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//......................................................................................
unsigned long int CommunicationCount,SendCount,RecvCount;
int Nx,Ny,Nz,N;
int n_bb_d3q7, n_bb_d3q19;
int BoundaryCondition;
//......................................................................................
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
//......................................................................................
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
//......................................................................................
int next;
int first_interior,last_interior;
@ -699,9 +760,19 @@ public:
int LastExterior();
int FirstInterior();
int LastInterior();
int copySendList(const char *dir, int *buffer);
int copyRecvList(const char *dir, int *buffer);
double GetPerformance(int *NeighborList, double *fq, int Np);
int MemoryOptimizedLayoutAA(IntArray &Map, int *neighborList, signed char *id, int Np, int width);
/**
* \brief Create membrane data structure
* - cut lattice links based on distance map
* @param Distance - signed distance to membrane
* @param neighborList - data structure that retains lattice links
* @param Np - number of lattice sites
* @param width - halo width for the model
*/
void Barrier(){
ScaLBL_DeviceBarrier();
MPI_COMM_SCALBL.barrier();
@ -764,29 +835,12 @@ private:
int sendtag,recvtag;
// Give the object it's own MPI communicator
RankInfoStruct rank_info;
Utilities::MPI MPI_COMM_SCALBL; // MPI Communicator for this domain
MPI_Request req1[18],req2[18];
//......................................................................................
// MPI ranks for all 18 neighbors
//......................................................................................
// These variables are all private to prevent external things from modifying them!!
//......................................................................................
int rank;
int rank_x,rank_y,rank_z,rank_X,rank_Y,rank_Z;
int rank_xy,rank_XY,rank_xY,rank_Xy;
int rank_xz,rank_XZ,rank_xZ,rank_Xz;
int rank_yz,rank_YZ,rank_yZ,rank_Yz;
//......................................................................................
//......................................................................................
int sendCount_x, sendCount_y, sendCount_z, sendCount_X, sendCount_Y, sendCount_Z;
int sendCount_xy, sendCount_yz, sendCount_xz, sendCount_Xy, sendCount_Yz, sendCount_xZ;
int sendCount_xY, sendCount_yZ, sendCount_Xz, sendCount_XY, sendCount_YZ, sendCount_XZ;
//......................................................................................
int recvCount_x, recvCount_y, recvCount_z, recvCount_X, recvCount_Y, recvCount_Z;
int recvCount_xy, recvCount_yz, recvCount_xz, recvCount_Xy, recvCount_Yz, recvCount_xZ;
int recvCount_xY, recvCount_yZ, recvCount_Xz, recvCount_XY, recvCount_YZ, recvCount_XZ;
//......................................................................................
//......................................................................................
// Send buffers that reside on the compute device
int *dvcSendList_x, *dvcSendList_y, *dvcSendList_z, *dvcSendList_X, *dvcSendList_Y, *dvcSendList_Z;
int *dvcSendList_xy, *dvcSendList_yz, *dvcSendList_xz, *dvcSendList_Xy, *dvcSendList_Yz, *dvcSendList_xZ;
@ -800,6 +854,14 @@ private:
int *dvcRecvDist_xy, *dvcRecvDist_yz, *dvcRecvDist_xz, *dvcRecvDist_Xy, *dvcRecvDist_Yz, *dvcRecvDist_xZ;
int *dvcRecvDist_xY, *dvcRecvDist_yZ, *dvcRecvDist_Xz, *dvcRecvDist_XY, *dvcRecvDist_YZ, *dvcRecvDist_XZ;
//......................................................................................
// MPI requests for persistent communications
std::vector<std::shared_ptr<MPI_Request>> req_D3Q19AA;
std::vector<std::shared_ptr<MPI_Request>> req_BiD3Q19AA;
std::vector<std::shared_ptr<MPI_Request>> req_TriD3Q19AA;
void start( std::vector<std::shared_ptr<MPI_Request>>& requests );
void wait( std::vector<std::shared_ptr<MPI_Request>>& requests );
//......................................................................................
int *bb_dist;
int *bb_interactions;
int *fluid_boundary;

View File

@ -69,7 +69,7 @@ void Utilities::startup(int argc, char **argv, bool multiple) {
"thread support, thread support will be disabled"
<< std::endl;
}
StackTrace::globalCallStackInitialize(MPI_COMM_WORLD);
//StackTrace::globalCallStackInitialize(MPI_COMM_WORLD);
} else {
MPI_Init(&argc, &argv);
}
@ -86,7 +86,7 @@ void Utilities::shutdown() {
int rank = 0;
#ifdef USE_MPI
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
StackTrace::globalCallStackFinalize();
//StackTrace::globalCallStackFinalize();
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
#endif

View File

@ -173,8 +173,7 @@
_Pragma( "GCC diagnostic ignored \"-Wunused-local-typedefs\"" ) \
_Pragma( "GCC diagnostic ignored \"-Woverloaded-virtual\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wunused-parameter\"" ) \
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" ) \
_Pragma( "GCC diagnostic ignored \"-Wterminate\"" )
_Pragma( "GCC diagnostic ignored \"-Warray-bounds\"" )
#define ENABLE_WARNINGS _Pragma( "GCC diagnostic pop" )
#else
#define DISABLE_WARNINGS

View File

@ -48,6 +48,7 @@ extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count,
}
}
extern "C" void ScaLBL_D3Q19_AA_Init(double *f_even, double *f_odd, int Np) {
int n;
for (n = 0; n < Np; n++) {
@ -1883,7 +1884,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_MRT(int *neighborList, double *dist,
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char *ID, double *dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAeven_Compact(double *dist, int Np) {
for (int n = 0; n < Np; n++) {
@ -1941,7 +1942,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_Compact(char *ID, double *dist, int Np) {
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char *ID, int *neighborList,
extern "C" void ScaLBL_D3Q19_AAodd_Compact(int *neighborList,
double *dist, int Np) {
int nread;

View File

@ -1,4 +1,144 @@
#include <stdio.h>
#include <math.h>
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
int link,iq,ip,nq,np,nqm,npm;
double aq, ap, membranePotential;
//double dq, dp, dist, orientation;
/* Interior Links */
for (link=0; link<memLinks; link++){
// inside //outside
aq = MassFractionIn; ap = MassFractionOut;
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
nqm = Map[nq]; npm = Map[np]; // strided layout
//dq = Distance[nqm]; dp = Distance[npm];
/* orientation for link to distance gradient*/
//orientation = 1.0/fabs(dq - dp);
/* membrane potential for this link */
membranePotential = Psi[nqm] - Psi[npm];
if (membranePotential > Threshold){
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
}
/* Save the mass transfer coefficients */
//coef[2*link] = aq*orientation; coef[2*link+1] = ap*orientation;
coef[2*link] = aq; coef[2*link+1] = ap;
}
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, label, nqm, npm, i, j, k;
double distanceLocal;//, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient
for (idx = 0; idx < count; idx++) {
n = d3q7_recvlist[idx];
label = d3q7_linkList[idx];
ap = 1.0; // regular streaming rule
aq = 1.0;
if (label > 0 && !(n < 0)){
nqm = Map[n];
distanceLocal = Distance[nqm];
psiLocal = Psi[nqm];
// Get the 3-D indices from the send process
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
// Streaming link the non-local distribution
i -= Cqx; j -= Cqy; k -= Cqz;
npm = k*Nx*Ny + j*Nx + i;
//distanceNonlocal = Distance[npm];
psiNonlocal = Psi[npm];
membranePotential = psiLocal - psiNonlocal;
aq = MassFractionIn;
ap = MassFractionOut;
/* link is inside membrane */
if (distanceLocal > 0.0){
if (membranePotential < Threshold*(-1.0)){
ap = MassFractionIn;
aq = MassFractionOut;
}
else {
ap = ThresholdMassFractionIn;
aq = ThresholdMassFractionOut;
}
}
else if (membranePotential > Threshold){
aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut;
}
}
coef[2*idx]=aq;
coef[2*idx+1]=ap;
}
}
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, double *recvbuf, int count,
double *dist, int N, double *coef) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx;
double fq,fp,fqq,ap,aq; // coefficient
/* First unpack the regular links */
for (idx = 0; idx < count; idx++) {
n = d3q7_recvlist[idx];
// update link based on mass transfer coefficients
if (!(n < 0)){
aq = coef[2*idx];
ap = coef[2*idx+1];
fq = dist[q * N + n];
fp = recvbuf[idx];
fqq = (1-aq)*fq+ap*fp;
dist[q * N + n] = fqq;
}
//printf(" LINK: site=%i, index=%i \n", n, idx);
}
}
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
int link,iq,ip,nq,np;
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
for (link=0; link<memLinks; link++){
// inside //outside
aq = coef[2*link]; ap = coef[2*link+1];
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
fq = dist[iq]; fp = dist[ip];
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
Cq = Den[nq]; Cp = Den[np];
Cq += fqq - fq; Cp += fpp - fp;
Den[nq] = Cq; Den[np] = Cp;
dist[iq] = fqq; dist[ip] = fpp;
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList,
double *dist, double *Den,
@ -85,7 +225,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den,
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
extern "C" void ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist,
double *Den, double *FluxDiffusive,
double *FluxAdvective,
double *FluxElectrical, double *Velocity,
@ -99,6 +239,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z,factor_x, factor_y, factor_z;
int nr1, nr2, nr3, nr4, nr5, nr6;
for (n = start; n < finish; n++) {
@ -137,6 +278,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
f6 = dist[nr6];
// compute diffusive flux
//Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
@ -149,37 +291,55 @@ extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
//Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(
extern "C" void ScaLBL_D3Q7_AAeven_Ion_v0(
double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective,
double *FluxElectrical, double *Velocity, double *ElectricField, double Di,
int zi, double rlx, double Vt, int start, int finish, int Np) {
@ -190,6 +350,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z, factor_x, factor_y, factor_z;
for (n = start; n < finish; n++) {
@ -214,6 +375,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
f6 = dist[5 * Np + n];
// compute diffusive flux
//Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
@ -226,33 +388,258 @@ extern "C" void ScaLBL_D3Q7_AAeven_Ion(
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
//Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist,
double *Den, double *FluxDiffusive,
double *FluxAdvective,
double *FluxElectrical, double *Velocity,
double *ElectricField, double Di, int zi,
double rlx, double Vt, int start,
int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z,factor_x, factor_y, factor_z;
int nr1, nr2, nr3, nr4, nr5, nr6;
for (n = start; n < finish; n++) {
//Load data
//Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(
double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective,
double *FluxElectrical, double *Velocity, double *ElectricField, double Di,
int zi, double rlx, double Vt, int start, int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z, factor_x, factor_y, factor_z;
for (n = start; n < finish; n++) {
//Load data
//Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
@ -289,7 +676,7 @@ extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den,
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den,
double *ChargeDensity,
int IonValence, int ion_component,
double IonValence, int ion_component,
int start, int finish, int Np) {
int n;

42
cpu/MembraneHelper.cpp Normal file
View File

@ -0,0 +1,42 @@
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
double *recvbuf, double *dist, int N) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
for (link=0; link<linkCount; link++){
idx = links[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = recvbuf[start + idx];
}
}
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double alpha;
for (link=offset; link<linkCount; link++){
idx = list[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
alpha = coef[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = alpha*recvbuf[start + idx];
}
}

View File

@ -1,3 +1,4 @@
#include <math.h>
extern "C" void
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map,
@ -444,34 +445,503 @@ extern "C" void ScaLBL_D3Q7_PoissonResidualError(
// }
//}
//extern "C" void ScaLBL_D3Q7_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
// int n;
// // distributions
// double f1,f2,f3,f4,f5,f6;
// double Ex,Ey,Ez;
// double rlx=1.0/tau;
//
// for (n=0; n<Np; n++){
// //........................................................................
// // Registers to store the distributions
// //........................................................................
// f1 = dist[Np+n];
// f2 = dist[2*Np+n];
// f3 = dist[3*Np+n];
// f4 = dist[4*Np+n];
// f5 = dist[5*Np+n];
// f6 = dist[6*Np+n];
// //.................Compute the Electric Field...................................
// //Ex = (f1-f2)*rlx*4.5;//NOTE the unit of electric field here is V/lu
// //Ey = (f3-f4)*rlx*4.5;
// //Ez = (f5-f6)*rlx*4.5;
// Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
// Ey = (f3-f4)*rlx*4.0;
// Ez = (f5-f6)*rlx*4.0;
// //..................Write the Electric Field.....................................
// ElectricField[0*Np+n] = Ex;
// ElectricField[1*Np+n] = Ey;
// ElectricField[2*Np+n] = Ez;
// //........................................................................
// }
//}
extern "C" void ScaLBL_D3Q19_Poisson_getElectricField(double *dist, double *ElectricField, double tau, int Np){
int n;
double f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double Ex,Ey,Ez;
double rlx=1.0/tau;
for (n=0; n<Np; n++){
//........................................................................
// Registers to store the distributions
//........................................................................
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
//.................Compute the Electric Field...................................
Ex = (f1 - f2 + f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14)*rlx*3.0;//NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18)*rlx*3.0;
Ez = (f5 - f6 + f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18)*rlx*3.0;
//..................Write the Electric Field.....................................
ElectricField[0*Np+n] = Ex;
ElectricField[1*Np+n] = Ey;
ElectricField[2*Np+n] = Ez;
//........................................................................
}
}
extern "C" void
ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(int *neighborList, int *Map,
double *dist, double *Den_charge, double *Psi,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double rho_e; //local charge density
//double Gs;
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
int idx;
for (n = start; n < finish; n++) {
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
idx = Map[n];
Psi[idx] = psi - 0.5*rho_e;
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(
int *Map, double *dist, double *Den_charge, double *Psi, double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np) {
int n;
double psi; //electric potential
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
//double Gs;
int idx;
for (n = start; n < finish; n++) {
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
//........................................................................
// q=0
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
idx = Map[n];
Psi[idx] = psi - 0.5*rho_e;
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
double sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
for (n = start; n < finish; n++) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
//error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
// q = 1
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
//........................................................................
// q = 7
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 8
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 9
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 10
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 11
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 12
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 13
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q= 14
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 15
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 16
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 17
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 18
dist[nr17] = W2*psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
for (n = start; n < finish; n++) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
/* Ex = (f1 - f2) * rlx *
4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4) * rlx *
4.0; //factor 4.0 is D3Q7 lattice squared speed of sound
Ez = (f5 - f6) * rlx * 4.0;
*/
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
Error[n] = error;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
// q = 0
dist[n] = W0*psi;//
// q = 1
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
//........................................................................
}
}
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np) {
int n;
int ijk;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
for (n = start; n < finish; n++) {
ijk = Map[n];
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
dist[1 * Np + n] = W1 * Psi[ijk];
dist[2 * Np + n] = W1 * Psi[ijk];
dist[3 * Np + n] = W1 * Psi[ijk];
dist[4 * Np + n] = W1 * Psi[ijk];
dist[5 * Np + n] = W1 * Psi[ijk];
dist[6 * Np + n] = W1 * Psi[ijk];
dist[7 * Np + n] = W2* Psi[ijk];
dist[8 * Np + n] = W2* Psi[ijk];
dist[9 * Np + n] = W2* Psi[ijk];
dist[10 * Np + n] = W2* Psi[ijk];
dist[11 * Np + n] = W2* Psi[ijk];
dist[12 * Np + n] = W2* Psi[ijk];
dist[13 * Np + n] = W2* Psi[ijk];
dist[14 * Np + n] = W2* Psi[ijk];
dist[15 * Np + n] = W2* Psi[ijk];
dist[16 * Np + n] = W2* Psi[ijk];
dist[17 * Np + n] = W2* Psi[ijk];
dist[18 * Np + n] = W2* Psi[ijk];
}
}

View File

@ -1,7 +1,7 @@
#include <stdio.h>
#define NBLOCKS 1024
#define NTHREADS 256
#define NTHREADS 512
__global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;

View File

@ -290,7 +290,7 @@ __global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *distev
//__launch_bounds__(512,4)
__global__ void
dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
dvc_ScaLBL_AAodd_Compact(int *d_neighborList, double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -1321,7 +1321,7 @@ dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_se
//__launch_bounds__(512,4)
__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *dist, int Np) {
__global__ void dvc_ScaLBL_AAeven_Compact( double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -2390,18 +2390,18 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np) {
cudaFuncSetCacheConfig(dvc_ScaLBL_AAeven_Compact, cudaFuncCachePreferL1);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_dist, Np);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(d_dist, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np) {
cudaFuncSetCacheConfig(dvc_ScaLBL_AAodd_Compact, cudaFuncCachePreferL1);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(ID,d_neighborList, d_dist,Np);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(d_neighborList, d_dist,Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",cudaGetErrorString(err));

View File

@ -6,6 +6,16 @@
#define NTHREADS 256
#define CHECK_ERROR(KERNEL) \
do { \
auto err = cudaGetLastError(); \
if ( cudaSuccess != err ){ \
auto errString = cudaGetErrorString(err); \
printf("error in %s (kernel): %s \n",KERNEL,errString); \
} \
} while(0)
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
@ -740,28 +750,19 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighbor
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_Dirichlet_D3Q7");
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_Neumann_D3Q7");
}
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_DirichletAndNeumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_DirichletAndNeumann_D3Q7");
}
extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
@ -775,211 +776,142 @@ extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta
BounceBackDist_list, BounceBackSolid_list, FluidBoundary_list,
lattice_weight, lattice_cx, lattice_cy, lattice_cz,
count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Solid_SlippingVelocityBC_D3Q19 (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Solid_SlippingVelocityBC_D3Q19");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z");
}
//------------Diff-----------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z");
}
//----------DiffAdvc-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z");
}
//----------DiffAdvcElec-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z (kernel): %s \n",cudaGetErrorString(err));
}
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z");
}
//-------------------------------

View File

@ -3,7 +3,207 @@
//#include <cuda_profiler_api.h>
#define NBLOCKS 1024
#define NTHREADS 256
#define NTHREADS 512
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
double *recvbuf, double *dist, int N) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
for (link=0; link<linkCount; link++){
idx = links[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = recvbuf[start + idx];
}
}
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double alpha;
for (link=offset; link<linkCount; link++){
idx = list[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
alpha = coef[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = alpha*recvbuf[start + idx];
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
int link,iq,ip,nq,np,nqm,npm;
double aq, ap, membranePotential;
/* Interior Links */
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks) {
// inside //outside
aq = MassFractionIn; ap = MassFractionOut;
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
nqm = Map[nq]; npm = Map[np]; // strided layout
/* membrane potential for this link */
membranePotential = Psi[nqm] - Psi[npm];
if (membranePotential > Threshold){
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
}
/* Save the mass transfer coefficients */
coef[2*link] = aq; coef[2*link+1] = ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, nqm, npm, label, i, j, k;
double distanceLocal, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = (count-nlinks)/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (idx < count) {
n = d3q7_recvlist[idx];
label = d3q7_linkList[idx];
ap = 1.0; // regular streaming rule
aq = 1.0;
if (label > 0 && !(n < 0)){
nqm = Map[n];
distanceLocal = Distance[nqm];
psiLocal = Psi[nqm];
// Get the 3-D indices from the send process
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
// Streaming link the non-local distribution
i -= Cqx; j -= Cqy; k -= Cqz;
npm = k*Nx*Ny + j*Nx + i;
distanceNonlocal = Distance[npm];
psiNonlocal = Psi[npm];
membranePotential = psiLocal - psiNonlocal;
aq = MassFractionIn;
ap = MassFractionOut;
/* link is inside membrane */
if (distanceLocal > 0.0){
if (membranePotential < Threshold*(-1.0)){
ap = MassFractionIn;
aq = MassFractionOut;
}
else {
ap = ThresholdMassFractionIn;
aq = ThresholdMassFractionOut;
}
}
else if (membranePotential > Threshold){
aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut;
}
}
coef[2*idx]=aq;
coef[2*idx+1]=ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, double *recvbuf, int count,
double *dist, int N, double *coef) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double fq,fp,fqq,ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = count/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (idx < count){
n = d3q7_recvlist[idx];
// update link based on mass transfer coefficients
if (!(n < 0)){
aq = coef[2*idx];
ap = coef[2*idx+1];
fq = dist[q * N + n];
fp = recvbuf[idx];
fqq = (1-aq)*fq+ap*fp;
dist[q * N + n] = fqq;
}
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
int link,iq,ip,nq,np;
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks){
// inside //outside
aq = coef[2*link]; ap = coef[2*link+1];
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
fq = dist[iq]; fp = dist[ip];
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
Cq = Den[nq]; Cp = Den[np];
Cq += fqq - fq; Cp += fpp - fp;
Den[nq] = Cq; Den[np] = Cp;
dist[iq] = fqq; dist[ip] = fpp;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
@ -106,6 +306,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
@ -114,80 +315,96 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, doub
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
//Load data
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
@ -201,6 +418,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *F
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
@ -208,67 +426,83 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *F
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
//Load data
//Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
}
}
}
@ -314,7 +548,7 @@ __global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, in
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
@ -327,13 +561,278 @@ __global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDe
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
if (ion_component == 0) CD=0.0;
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
ChargeDensity[n] = CD + CD_tmp;
// Ci = Den[n+ion_component*Np];
// CD = ChargeDensity[n];
// CD_tmp = F*IonValence*Ci;
// ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist,
double *Den, double *FluxDiffusive,
double *FluxAdvective,
double *FluxElectrical, double *Velocity,
double *ElectricField, double Di, int zi,
double rlx, double Vt, int start,
int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z,factor_x, factor_y, factor_z;
int nr1, nr2, nr3, nr4, nr5, nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// compute diffusive flux
//Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
//Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_v0(
double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective,
double *FluxElectrical, double *Velocity, double *ElectricField, double Di,
int zi, double rlx, double Vt, int start, int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z, factor_x, factor_y, factor_z;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
// compute diffusive flux
//Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
//Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_v0(
double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective,
double *FluxElectrical, double *Velocity, double *ElectricField, double Di,
int zi, double rlx, double Vt, int start, int finish, int Np) {
dvc_ScaLBL_D3Q7_AAeven_Ion_v0<<<NBLOCKS,NTHREADS >>>(dist,
Den, FluxDiffusive, FluxAdvective,
FluxElectrical, Velocity,
ElectricField, Di, zi,
rlx, Vt, start, finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("cuda error in dvc_ScaLBL_D3Q7_AAeven_Ion_v0: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist,
double *Den, double *FluxDiffusive,
double *FluxAdvective,
double *FluxElectrical, double *Velocity,
double *ElectricField, double Di, int zi,
double rlx, double Vt, int start,
int finish, int Np) {
dvc_ScaLBL_D3Q7_AAodd_Ion_v0<<<NBLOCKS,NTHREADS >>>(neighborList, dist,
Den, FluxDiffusive, FluxAdvective,
FluxElectrical, Velocity,
ElectricField, Di, zi,
rlx, Vt, start,
finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("cuda error in dvc_ScaLBL_D3Q7_AAodd_Ion_v0: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
@ -408,7 +907,7 @@ extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np)
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
@ -419,3 +918,65 @@ extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef<<<NBLOCKS,NTHREADS >>>(membrane, Map, Distance, Psi, coef,
Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
memLinks, Nx, Ny, Nz, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
int GRID = count / NTHREADS + 1;
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo<<<GRID,NTHREADS >>>(
Cqx, Cqy, Cqz, Map, Distance, Psi, Threshold,
MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
d3q7_recvlist, d3q7_linkList, coef, start, nlinks, count, N, Nx, Ny, Nz);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, double *recvbuf, int count,
double *dist, int N, double *coef){
int GRID = count / NTHREADS + 1;
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<GRID,NTHREADS >>>(q, d3q7_recvlist, recvbuf,count,
dist, N, coef);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_Unpack: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
dvc_ScaLBL_D3Q7_Membrane_IonTransport<<<NBLOCKS,NTHREADS >>>(membrane, coef, dist, Den, memLinks, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_IonTransport: %s \n",cudaGetErrorString(err));
}
}

View File

@ -4,8 +4,8 @@
//*************************************************************************
#include <cuda.h>
#define NBLOCKS 560
#define NTHREADS 128
#define NBLOCKS 1024
#define NTHREADS 512
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{

View File

@ -271,6 +271,413 @@ __global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Ps
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(
int *Map, double *dist, double *Den_charge, double *Psi, double epsilon_LB, bool UseSlippingVelBC, int start, int finish, int Np) {
int n;
double psi,sum; //electric potential
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double Gs;
int idx;
for (n = start; n < finish; n++) {
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
//........................................................................
// q=0
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
psi = f0 + f2 + f1 + f4 + f3 + f6 + f5 + f8 + f7 + f10 + f9 + f12 +
f11 + f14 + f13 + f16 + f15 + f18 + f17;
idx = Map[n];
Psi[idx] = psi - 0.5*rho_e;
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
// q = 1
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
//........................................................................
// q = 7
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 8
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 9
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 10
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 11
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 12
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 13
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q= 14
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 15
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 16
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 17
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 18
dist[nr17] = W2*psi; //f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
// q = 0
dist[n] = W0*psi;//
// q = 1
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np) {
int n;
int ijk;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
dist[1 * Np + n] = W1 * Psi[ijk];
dist[2 * Np + n] = W1 * Psi[ijk];
dist[3 * Np + n] = W1 * Psi[ijk];
dist[4 * Np + n] = W1 * Psi[ijk];
dist[5 * Np + n] = W1 * Psi[ijk];
dist[6 * Np + n] = W1 * Psi[ijk];
dist[7 * Np + n] = W2* Psi[ijk];
dist[8 * Np + n] = W2* Psi[ijk];
dist[9 * Np + n] = W2* Psi[ijk];
dist[10 * Np + n] = W2* Psi[ijk];
dist[11 * Np + n] = W2* Psi[ijk];
dist[12 * Np + n] = W2* Psi[ijk];
dist[13 * Np + n] = W2* Psi[ijk];
dist[14 * Np + n] = W2* Psi[ijk];
dist[15 * Np + n] = W2* Psi[ijk];
dist[16 * Np + n] = W2* Psi[ijk];
dist[17 * Np + n] = W2* Psi[ijk];
dist[18 * Np + n] = W2* Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList, Map,
dist, Den_charge, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q19_AAodd_Poisson: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
dvc_ScaLBL_D3Q19_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>( Map, dist, Den_charge, Psi,
ElectricField, Error, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q19_AAeven_Poisson: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q19_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map, dist, Psi, start, finish, Np);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Poisson_Init: %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();

View File

@ -0,0 +1,18 @@
import numpy as np
import matplotlib.pylab as plt
D=np.ones((40,40,40),dtype="uint8")
cx = 20
cy = 20
cz = 20
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
if (dist < 12.5 ) :
D[i,j,k] = 2
D.tofile("bubble_40x40x40.raw")

View File

@ -0,0 +1,77 @@
import numpy as np
import matplotlib.pylab as plt
D=np.ones((40,40,40),dtype="uint8")
cx = 20
cy = 20
cz = 20
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
if (dist < 15.5 ) :
D[i,j,k] = 2
D.tofile("cell_40x40x40.raw")
C1=np.zeros((40,40,40),dtype="double")
C2=np.zeros((40,40,40),dtype="double")
C3=np.zeros((40,40,40),dtype="double")
C4=np.zeros((40,40,40),dtype="double")
C5=np.zeros((40,40,40),dtype="double")
C6=np.zeros((40,40,40),dtype="double")
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
#outside the cell
C1[i,j,k] = 4.0e-6 # K
C2[i,j,k] = 150.0e-6 # Na
C3[i,j,k] = 116.0e-6 # Cl
C4[i,j,k] = 29.0e-6 # HC03
#C5[i,j,k] = 2.4e-6 # Ca
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
# inside the cell
if (dist < 15.5 ) :
C1[i,j,k] = 145.0e-6
C2[i,j,k] = 12.0e-6
C3[i,j,k] = 4.0e-6
C4[i,j,k] = 12.0e-6 # 12 mmol / L
#C5[i,j,k] = 0.10e-6 # 100 nmol / L
# add up the total charge to make sure it is zero
TotalCharge = 0
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
TotalCharge += C1[i,j,k] + C2[i,j,k] - C3[i,j,k] - C4[i,j,k]
TotalCharge /= (40*40*40)
print("Total charge " + str(TotalCharge))
for i in range(0, 40):
for j in range (0, 40):
for k in range (0,40):
if TotalCharge < 0 :
# need more cation
C5[i,j,k] = abs(TotalCharge)
C6[i,j,k] = 0.0
else :
# need more anion
C5[i,j,k] = 0.0
C6[i,j,k] = abs(TotalCharge)
C1.tofile("cell_concentration_K_40x40x40.raw")
C2.tofile("cell_concentration_Na_40x40x40.raw")
C3.tofile("cell_concentration_Cl_40x40x40.raw")
C4.tofile("cell_concentration_HCO3_40x40x40.raw")
C5.tofile("cell_concentration_cation_40x40x40.raw")
C6.tofile("cell_concentration_anion_40x40x40.raw")

75
example/Bubble/cell.db Normal file
View File

@ -0,0 +1,75 @@
MultiphysController {
timestepMax = 60
num_iter_Ion_List = 2
analysis_interval = 50
tolerance = 1.0e-9
visualization_interval = 100 // Frequency to write visualization data
analysis_interval = 50 // Frequency to perform analysis
}
Stokes {
tau = 1.0
F = 0, 0, 0
ElectricField = 0, 0, 0 //body electric field; user-input unit: [V/m]
nu_phys = 0.889e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
}
Ions {
IonConcentrationFile = "cell_concentration_K_40x40x40.raw", "double", "cell_concentration_Na_40x40x40.raw", "double", "cell_concentration_Cl_40x40x40.raw", "double", "cell_concentration_HCO3_40x40x40.raw", "double", "cell_concentration_anion_40x40x40.raw", "double", "cell_concentration_cation_40x40x40.raw", "double"
temperature = 293.15 //unit [K]
number_ion_species = 6 //number of ions
tauList = 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
IonDiffusivityList = 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9 //user-input unit: [m^2/sec]
IonValenceList = 1, 1, -1, -1, 1, -1 //valence charge of ions; dimensionless; positive/negative integer
IonConcentrationList = 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6, 1.0e-6 //user-input unit: [mol/m^3]
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
//SolidLabels = 0 //solid labels for assigning solid boundary condition; ONLY for BC_Solid=1
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
FluidVelDummy = 0.0, 0.0, 1.0e-2 // dummy fluid velocity for debugging
}
Poisson {
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
BC_Solid = 2 //solid boundary condition; 1=surface potential; 2=surface charge density
SolidLabels = 0 //solid labels for assigning solid boundary condition
SolidValues = 0 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
WriteLog = true //write convergence log for LB-Poisson solver
// ------------------------------- Testing Utilities ----------------------------------------
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
TestPeriodic = false
TestPeriodicTime = 1.0 //unit:[sec]
TestPeriodicTimeConv = 0.01 //unit:[sec]
TestPeriodicSaveInterval = 0.2 //unit:[sec]
//------------------------------ advanced setting ------------------------------------
timestepMax = 100000 //max timestep for obtaining steady-state electrical potential
analysis_interval = 200 //timestep checking steady-state convergence
tolerance = 1.0e-6 //stopping criterion for steady-state solution
}
Domain {
Filename = "cell_40x40x40.raw"
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 40, 40, 40 // Size of local domain (Nx,Ny,Nz)
N = 40, 40, 40 // size of the input image
voxel_length = 1.0 //resolution; user-input unit: [um]
BC = 0 // Boundary condition type
ReadType = "8bit"
ReadValues = 0, 1, 2
WriteValues = 0, 1, 2
}
Analysis {
analysis_interval = 100
subphase_analysis_interval = 50 // Frequency to perform analysis
restart_interval = 5000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_electric_potential = true
save_concentration = true
save_velocity = true
}
Membrane {
MembraneLabels = 2
}

View File

@ -0,0 +1,117 @@
MultiphysController {
timestepMax = 20000
visualization_interval = 1000 // Frequency to write visualization data
analysis_interval = 20 // Frequency to perform analysis
}
Stokes {
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
tau = 1.0
F = 0, 0, 0
rho_phys = 998.2
nu_phys = 1.003e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
BC = 3 // Pressure constant BC
din = 1.0 // Inlet pressure
dout = 1.0 // Outlet pressure
UseElectroosmoticVelocityBC = true
SolidLabels = 0, -1
ZetaPotentialSolidList = -0.005, -0.03 // unit [v]
}
Ions {
temperature = 310.15 //unit [K]
//number_ion_species = 5 //number of ions
//tauList = 1.0, 1.0, 1.0, 1.0, 1.0 // H+, OH-, Na+, Cl-, Fe3+
//IonDiffusivityList = 9.3e-9, 5.3e-9, 1.3e-9, 2.0e-9, 0.604e-9 //user-input unit: [m^2/sec]
//IonValenceList = 1, -1, 1, -1, 3 //valence charge of ions; dimensionless; positive/negative integer
//IonConcentrationList = 1.0e-4, 1.0e-4, 100, 100, 0 //user-input unit: [mol/m^3]
number_ion_species = 2 //number of ions
//IonConcentrationFile = "Pseudo3D_plane_membrane_concentration_Na_z192_xy64.raw", "double", "Pseudo3D_plane_membrane_concentration_Na_z192_xy64.raw", "double"
tauList = 1.0,1.0 // Na+, anion
IonDiffusivityList = 1e-9,1e-9 //user-input unit: [m^2/sec]
IonValenceList = 1,-1 //valence charge of ions; dimensionless; positive/negative integer
IonConcentrationList = 145e-3,145e-3 //user-input unit: [mol/m^3]
MembraneIonConcentrationList = 15e-3, 15e-3
BC_InletList = 0,0 //boundary condition for inlet; 0=periodic; 1=ion concentration; 2=ion flux
BC_OutletList = 0,0 //boundary condition for outlet; 0=periodic; 1=ion concentration; 2=ion flux
InletValueList = 15e-3, 15e-3 //if ion concentration unit=[mol/m^3]; if flux (inward) unit=[mol/m^2/sec]
OutletValueList = 145e-3, 145e-3 //if ion concentration unit=[mol/m^3]; if flux (inward) unit=[mol/m^2/sec]
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
//SolidLabels = 0 olid labels for assigning solid boundary condition; ONLY for BC_Solid=1
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
FluidVelDummy = 0.0, 0.0, 0.0 // dummy fluid velocity for debugging
}
Poisson {
epsilonR = 80.4 //fluid dielectric constant [dimensionless]
tau = 4.5
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
InitialValueLabels = 1,2//a list of labels of fluid nodes
InitialValues = 60.6e-3, 0 //unit: [V]
//------- Boundary Voltage for BC = 1 (Inlet & Outlet) ---------------------
Vin = 60.6e-3 //ONLY for BC_Inlet = 1; electrical potential at inlet
Vout = 0 //ONLY for BC_Outlet = 1; electrical potential at outlet
//--------------------------------------------------------------------------
//------- Boundary Voltage for BC = 2 (Inlet & Outlet) ---------------------
//Vin0 = 0.01 //(ONLY for BC_Inlet = 2); unit:[Volt]
//freqIn = 1.0 //(ONLY for BC_Inlet = 2); unit:[Hz]
//t0_In = 0.0 //(ONLY for BC_Inlet = 1); unit:[sec]
//Vin_Type = 1 //(ONLY for BC_Inlet = 1); 1->sin(); 2->cos()
//Vout0 = 0.01 //(ONLY for BC_Outlet = 1); unit:[Volt]
//freqOut = 1.0 //(ONLY for BC_Outlet = 1); unit:[Hz]
//t0_Out = 0.0 //(ONLY for BC_Outlet = 1); unit:[sec]
//Vout_Type = 1 //(ONLY for BC_Outlet = 1); 1->sin(); 2->cos()
//--------------------------------------------------------------------------
BC_SolidList = 1 //solid boundary condition; 1=surface potential; 2=surface charge density
SolidLabels = 0 //solid labels for assigning solid boundary condition
SolidValues = -0.001 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
WriteLog = true //write convergence log for LB-Poisson solver
// ------------------------------- Testing Utilities ----------------------------------------
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
TestPeriodic = false
TestPeriodicTime = 1.0 //unit:[sec]
TestPeriodicTimeConv = 0.01 //unit:[sec]
TestPeriodicSaveInterval = 0.2 //unit:[sec]
//------------------------------ advanced setting ------------------------------------
timestepMax = 10000 //max timestep for obtaining steady-state electrical potential
analysis_interval = 200 //timestep checking steady-state convergence
tolerance = 1.0e-6 //stopping criterion for steady-state solution
}
Membrane {
MembraneLabels = 1
VoltageThreshold = 100.0, 100.0
MassFractionIn = 1,0
MassFractionOut = 1,0
ThresholdMassFractionIn = 1, 0
ThresholdMassFractionOut = 1, 0
}
Domain {
Filename = "Pseudo3D_double_plane_membrane_z192_xy64_InsideLabel1_OutsideLabel2.raw"
nproc = 1, 1, 3 // Number of processors (Npx,Npy,Npz)
n = 64, 64, 64 // Size of local domain (Nx,Ny,Nz)
N = 64, 64, 192 // size of the input image
voxel_length = 0.01 //resolution; user-input unit: [um]
BC = 0 // Boundary condition type0
ReadType = "8bit"
ReadValues = 2, 1
WriteValues = 2, 1
//InletLayers = 0, 0, 1
//OutletLayers = 0, 0, 1
//InletLayersPhase = 1
//OutletLayersPhase = 1
//checkerSize = 3 // size of the checker to use
}
Analysis {
}
Visualization {
save_electric_potential = true
save_concentration = true
#save_velocity = true
#save_pressure = true
save_8bit_raw = true
}

View File

@ -0,0 +1,90 @@
import numpy as np
import math
import matplotlib.pyplot as plt
#physical constant
k_B_const = 1.380649e-23 #[J/K]
N_A_const = 6.02214076e23 #[1/mol]
e_const = 1.602176634e-19 #[C]
epsilon0_const = 8.85418782e-12 #[C/V/m]
#other material property parameters
epsilonr_water = 80.4
T=310.15 #[K]
#input ion concentration
C_Na_in = 15e-3 #[mol/m^3]
C_Na_out = 145e-3 #[mol/m^3]
C_K_in = 150e-3 #[mol/m^3]
C_K_out = 4e-3 #[mol/m^3]
C_Cl_in = 10e-3 #[mol/m^3]
C_Cl_out = 110e-3 #[mol/m^3]
#calculating Debye length
#For the definition of Debye lenght in electrolyte solution, see:
#DOI:10.1016/j.cnsns.2014.03.005
#Eq(42) in Yoshida etal., Coupled LB method for simulator electrokinetic flows
prefactor= math.sqrt(epsilonr_water*epsilon0_const*k_B_const*T/2.0/N_A_const/e_const**2)
debye_length_in = prefactor*np.sqrt(np.array([1.0/C_Na_in,1.0/C_K_in,1.0/C_Cl_in]))
debye_length_out = prefactor*np.sqrt(np.array([1.0/C_Na_out,1.0/C_K_out,1.0/C_Cl_out]))
print("Debye length inside membrane in [m]")
print(debye_length_in)
print("Debye length outside membrane in [m]")
print(debye_length_out)
#setup domain
cube_length_z = 192
cube_length_xy = 64
#set LBPM domain resoluiton
h=0.01 #[um]
print("Image resolution = %.6g [um] (= %.6g [m])"%(h,h*1e-6))
domain=2*np.ones((cube_length_z,cube_length_xy,cube_length_xy),dtype=np.int8)
zgrid,ygrid,xgrid=np.meshgrid(np.arange(cube_length_z),np.arange(cube_length_xy),np.arange(cube_length_xy),indexing='ij')
domain_centre=cube_length_xy/2
make_bubble = np.logical_and(zgrid>=cube_length_z/4,zgrid<=cube_length_z*0.75)
domain[make_bubble]=1
##save domain
file_name= "Pseudo3D_double_plane_membrane_z192_xy64_InsideLabel1_OutsideLabel2.raw"
domain.tofile(file_name)
print("save file: "+file_name)
#debug plot
#plt.figure(1)
#plt.pcolormesh(domain[:,int(domain_centre),:])
#plt.colorbar()
#plt.axis("equal")
#plt.show()
##generate initial ion concentration - 3D
#domain_Na = C_Na_out*np.ones_like(domain,dtype=np.float64)
#domain_Na[make_bubble] = C_Na_in
#domain_K = C_K_out*np.ones_like(domain,dtype=np.float64)
#domain_K[make_bubble] = C_K_in
#domain_Cl = C_Cl_out*np.ones_like(domain,dtype=np.float64)
#domain_Cl[make_bubble] = C_Cl_in
#
#domain_Na.tofile("Pseudo3D_plane_membrane_concentration_Na_z192_xy64.raw")
#domain_K.tofile("Pseudo3D_plane_membrane_concentration_K_z192_xy64.raw")
#domain_Cl.tofile("Pseudo3D_plane_membrane_concentration_Cl_z192_xy64.raw")
##debug plot
#plt.figure(2)
#plt.subplot(1,3,1)
#plt.title("Na concentration")
#plt.pcolormesh(domain_Na[:,int(bubble_centre),:])
#plt.colorbar()
#plt.axis("equal")
#plt.subplot(1,3,2)
#plt.title("K concentration")
#plt.pcolormesh(domain_K[:,int(bubble_centre),:])
#plt.colorbar()
#plt.axis("equal")
#plt.subplot(1,3,3)
#plt.title("Cl concentration")
#plt.pcolormesh(domain_Cl[:,int(bubble_centre),:])
#plt.colorbar()
#plt.axis("equal")
#plt.show()

View File

@ -0,0 +1,86 @@
MultiphysController {
timestepMax = 25000
num_iter_Ion_List = 4
analysis_interval = 100
tolerance = 1.0e-9
visualization_interval = 1000 // Frequency to write visualization data
}
Stokes {
tau = 1.0
F = 0, 0, 0
ElectricField = 0, 0, 0 //body electric field; user-input unit: [V/m]
nu_phys = 0.889e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
}
Ions {
MembraneIonConcentrationList = 150.0e-3, 10.0e-3, 15.0e-3, 155.0e-3 //user-input unit: [mol/m^3]
temperature = 293.15 //unit [K]
number_ion_species = 4 //number of ions
tauList = 1.0, 1.0, 1.0, 1.0
IonDiffusivityList = 1.0e-9, 1.0e-9, 1.0e-9, 1.0e-9 //user-input unit: [m^2/sec]
IonValenceList = 1, -1, 1, -1 //valence charge of ions; dimensionless; positive/negative integer
IonConcentrationList = 4.0e-3, 20.0e-3, 16.0e-3, 0.0e-3 //user-input unit: [mol/m^3]
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
//SolidLabels = 0 //solid labels for assigning solid boundary condition; ONLY for BC_Solid=1
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
FluidVelDummy = 0.0, 0.0, 0.0 // dummy fluid velocity for debugging
BC_InletList = 0, 0, 0, 0
BC_OutletList = 0, 0, 0, 0
}
Poisson {
lattice_scheme = "D3Q19"
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
BC_Solid = 2 //solid boundary condition; 1=surface potential; 2=surface charge density
SolidLabels = 0 //solid labels for assigning solid boundary condition
SolidValues = 0 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
WriteLog = true //write convergence log for LB-Poisson solver
// ------------------------------- Testing Utilities ----------------------------------------
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
TestPeriodic = false
TestPeriodicTime = 1.0 //unit:[sec]
TestPeriodicTimeConv = 0.01 //unit:[sec]
TestPeriodicSaveInterval = 0.2 //unit:[sec]
//------------------------------ advanced setting ------------------------------------
timestepMax = 4000 //max timestep for obtaining steady-state electrical potential
analysis_interval = 25 //timestep checking steady-state convergence
tolerance = 1.0e-10 //stopping criterion for steady-state solution
InitialValueLabels = 1, 2
InitialValues = 0.0, 0.0
}
Domain {
Filename = "Bacterium.swc"
nproc = 2, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 64, 64, 64 // Size of local domain (Nx,Ny,Nz)
N = 128, 64, 64 // size of the input image
voxel_length = 0.01 //resolution; user-input unit: [um]
BC = 0 // Boundary condition type
ReadType = "swc"
ReadValues = 0, 1, 2
WriteValues = 0, 1, 2
}
Analysis {
analysis_interval = 100
subphase_analysis_interval = 50 // Frequency to perform analysis
restart_interval = 5000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_electric_potential = true
save_concentration = true
save_velocity = false
}
Membrane {
MembraneLabels = 2
VoltageThreshold = 0.0, 0.0, 0.0, 0.0
MassFractionIn = 1e-1, 1.0, 5e-3, 0.0
MassFractionOut = 1e-1, 1.0, 5e-3, 0.0
ThresholdMassFractionIn = 1e-1, 1.0, 5e-3, 0.0
ThresholdMassFractionOut = 1e-1, 1.0, 5e-3, 0.0
}

View File

@ -0,0 +1,8 @@
# id,type,x,y,z,r,pid
1 1 0.30 0.32 0.32 0.15 -1
2 1 0.35 0.32 0.32 0.16 1
3 1 0.43 0.32 0.32 0.17 2
4 1 0.60 0.32 0.32 0.18 3
5 1 0.77 0.32 0.32 0.17 4
6 1 0.85 0.32 0.32 0.16 5
7 1 0.90 0.32 0.32 0.15 6

View File

@ -0,0 +1,41 @@
import numpy as np
import matplotlib.pylab as plt
Nx = 64
Ny = 64
Nz = 64
cx = Nx/2
cy = Ny/2
cz = Nz/2
radius = 12
D=np.ones((Nx,Ny,Nz),dtype="uint8")
for i in range(0, Nx):
for j in range (0, Ny):
for k in range (0,Nz):
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
if (dist < radius ) :
D[i,j,k] = 2
D.tofile("cell_64x64x64.raw")
C1=np.zeros((Nx,Ny,Nz),dtype="double")
C2=np.zeros((Nx,Ny,Nz),dtype="double")
for i in range(0, Nx):
for j in range (0, Ny):
for k in range (0,Nz):
#outside the cell
C1[i,j,k] = 125.0e-6 # Na
C2[i,j,k] = 125.0e-6 # Cl
dist = np.sqrt((i-cx)*(i-cx) + (j-cx)*(j-cx) + (k-cz)*(k-cz))
# inside the cell
if (dist < radius ) :
C1[i,j,k] = 110.0e-6
C2[i,j,k] = 110.0e-6
C1.tofile("cell_concentration_Na_64x64x64.raw")
C2.tofile("cell_concentration_Cl_64x64x64.raw")

View File

@ -0,0 +1,80 @@
MultiphysController {
timestepMax = 2000
num_iter_Ion_List = 2
analysis_interval = 40
tolerance = 1.0e-9
visualization_interval = 40 // Frequency to write visualization data
}
Stokes {
tau = 1.0
F = 0, 0, 0
ElectricField = 0, 0, 0 //body electric field; user-input unit: [V/m]
nu_phys = 0.889e-6 //fluid kinematic viscosity; user-input unit: [m^2/sec]
}
Ions {
IonConcentrationFile = "cell_concentration_Na_64x64x64.raw", "double", "cell_concentration_Cl_64x64x64.raw", "double"
temperature = 293.15 //unit [K]
number_ion_species = 2 //number of ions
tauList = 1.0, 1.0
IonDiffusivityList = 1.0e-9, 1.0e-9 //user-input unit: [m^2/sec]
IonValenceList = 1, -1 //valence charge of ions; dimensionless; positive/negative integer
IonConcentrationList = 1.0e-6, 1.0e-6 //user-input unit: [mol/m^3]
BC_Solid = 0 //solid boundary condition; 0=non-flux BC; 1=surface ion concentration
//SolidLabels = 0 //solid labels for assigning solid boundary condition; ONLY for BC_Solid=1
//SolidValues = 1.0e-5 // user-input surface ion concentration unit: [mol/m^2]; ONLY for BC_Solid=1
FluidVelDummy = 0.0, 0.0, 0.0 // dummy fluid velocity for debugging
}
Poisson {
epsilonR = 78.5 //fluid dielectric constant [dimensionless]
BC_Inlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
BC_Outlet = 0 // ->1: fixed electric potential; ->2: sine/cosine periodic electric potential
//--------------------------------------------------------------------------
//--------------------------------------------------------------------------
BC_Solid = 2 //solid boundary condition; 1=surface potential; 2=surface charge density
SolidLabels = 0 //solid labels for assigning solid boundary condition
SolidValues = 0 //if surface potential, unit=[V]; if surface charge density, unit=[C/m^2]
WriteLog = true //write convergence log for LB-Poisson solver
// ------------------------------- Testing Utilities ----------------------------------------
// ONLY for code debugging; the followings test sine/cosine voltage BCs; disabled by default
TestPeriodic = false
TestPeriodicTime = 1.0 //unit:[sec]
TestPeriodicTimeConv = 0.01 //unit:[sec]
TestPeriodicSaveInterval = 0.2 //unit:[sec]
//------------------------------ advanced setting ------------------------------------
timestepMax = 4000 //max timestep for obtaining steady-state electrical potential
analysis_interval = 25 //timestep checking steady-state convergence
tolerance = 1.0e-10 //stopping criterion for steady-state solution
}
Domain {
Filename = "cell_64x64x64.raw"
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 64, 64, 64 // Size of local domain (Nx,Ny,Nz)
N = 64, 64, 64 // size of the input image
voxel_length = 0.1 //resolution; user-input unit: [um]
BC = 0 // Boundary condition type
ReadType = "8bit"
ReadValues = 0, 1, 2
WriteValues = 0, 1, 2
}
Analysis {
analysis_interval = 100
subphase_analysis_interval = 50 // Frequency to perform analysis
restart_interval = 5000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_electric_potential = true
save_concentration = true
save_velocity = true
}
Membrane {
MembraneLabels = 2
VoltageThreshold = 0.0, 0.0
MassFractionIn = 1e-2, 1e-8
MassFractionOut = 1e-2, 1e-8
ThresholdMassFractionIn = 1e-2, 1e-8
ThresholdMassFractionOut = 1e-2, 1e-8
}

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J Color-dense
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/bin
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/lbpm_color_simulator input.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J MRT-a2
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/bin
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/lbpm_permeability_simulator input.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,69 @@
MRT {
timestepMax = 10000
analysis_interval = 20000
tau = 0.7
F = 0, 0, 5.0e-5
Restart = false
din = 1.0
dout = 1.0
flux = 0.0
}
Color {
tauA = 0.7;
tauB = 0.7;
rhoA = 1.0;
rhoB = 1.0;
alpha = 1e-2;
beta = 0.95;
F = 0, 0, 1.0e-5
Restart = false
flux = 0.0 // voxels per timestep
timestepMax = 10000
// rescale_force_after_timestep = 100000
ComponentLabels = 0, -1, -2
ComponentAffinity = -1.0, -1.0, -0.9
// protocol = "image sequence"
// capillary_number = 1e-5
}
Domain {
Filename = "a2_2048x2048x8192.raw"
nproc = 2, 2, 2 // Number of processors (Npx,Npy,Npz)
offset = 0, 0, 0
n = 382, 382, 382 // Size of local domain (Nx,Ny,Nz)
N = 2048, 2048, 1024 // size of the input image
voxel_length = 1.0 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
//Sw = 0.2
ReadType = "8bit"
ReadValues = 0, 1, 2, -1, -2
WriteValues = 0, 1, 2, -1, -2
ComponentLabels = 0, -1, -2
InletLayers = 0, 0, 5
OutletLayers = 0, 0, 5
}
Analysis {
visualization_interval = 1000000
//morph_interval = 100000
//morph_delta = -0.08
analysis_interval = 20000 // Frequency to perform analysis
min_steady_timesteps = 15000000
max_steady_timesteps = 15000000
restart_interval = 500000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 0 // Number of threads to use
load_balance = "default" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_8bit_raw = true
write_silo = true
}
FlowAdaptor {
}

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J MPI-multinode
#SBATCH -o %x-%j.out
#SBATCH -t 6:00:00
#SBATCH -p batch
#SBATCH -N 8
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/tests
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57"
srun --verbose -N8 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/TestCommD3Q19 multinode.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J MPI-singlenode
#SBATCH -o %x-%j.out
#SBATCH -t 0:10:00
#SBATCH -p batch
#SBATCH -N 1
#SBATCH --exclusive
# MODULE ENVIRONMENT
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPL_MBX_SIZE=1024000000
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
export LBPM_BIN=/ccs/proj/csc380/mcclurej/crusher/LBPM/tests
echo "Running Color LBM"
MYCPUBIND="--cpu-bind=verbose,map_cpu:57,33,25,1,9,17,41,49"
srun --verbose -N1 -n8 --cpus-per-gpu=8 --gpus-per-task=1 --gpu-bind=closest ${MYCPUBIND} $LBPM_BIN/TestCommD3Q19 multinode.db
#srun --verbose -N1 -n2 --mem-per-gpu=8g --cpus-per-gpu=1 --gpus-per-node=2 --gpu-bind=closest $LBPM_BIN/lbpm_permeability_simulator input.db
exit;

View File

@ -0,0 +1,9 @@
import numpy as np
N = 1024
data = np.random.randint(low=1,high=3,size=(N,N,N),dtype=np.uint8)
data.tofile("dense_1024x1024x1024.raw")

View File

@ -0,0 +1,69 @@
MRT {
timestepMax = 100
analysis_interval = 20000
tau = 0.7
F = 0, 0, 5.0e-5
Restart = false
din = 1.0
dout = 1.0
flux = 0.0
}
Color {
tauA = 0.7;
tauB = 0.7;
rhoA = 1.0;
rhoB = 1.0;
alpha = 1e-2;
beta = 0.95;
F = 0, 0, 0.0
Restart = false
flux = 0.0 // voxels per timestep
timestepMax = 10
// rescale_force_after_timestep = 100000
ComponentLabels = 0, -1, -2
ComponentAffinity = -1.0, -1.0, -0.9
// protocol = "image sequence"
// capillary_number = 1e-5
}
Domain {
Filename = "dense_1024x1024x1024.raw"
nproc = 2, 2, 2 // Number of processors (Npx,Npy,Npz)
offset = 0, 0, 0
n = 222, 222, 222 // Size of local domain (Nx,Ny,Nz)
N = 1024, 1024, 1024 // size of the input image
voxel_length = 1.0 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
//Sw = 0.2
ReadType = "8bit"
ReadValues = 0, 1, 2, -1, -2
WriteValues = 0, 1, 2, -1, -2
ComponentLabels = 0, -1, -2
InletLayers = 0, 0, 5
OutletLayers = 0, 0, 5
}
Analysis {
visualization_interval = 1000000
//morph_interval = 100000
//morph_delta = -0.08
analysis_interval = 20000 // Frequency to perform analysis
min_steady_timesteps = 15000000
max_steady_timesteps = 15000000
restart_interval = 500000 // Frequency to write restart data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 0 // Number of threads to use
load_balance = "default" // Load balance method to use: "none", "default", "independent"
}
Visualization {
save_8bit_raw = true
write_silo = true
}
FlowAdaptor {
}

View File

@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J sphere_test
#SBATCH -o %x-%j.out
#SBATCH -t 00:05:00
#SBATCH -p caar
#SBATCH -N 1
module load rocm/4.2.0
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
srun -n1 --ntasks-per-node=1 $LBPM_DIR/GenerateSphereTest input.db

View File

@ -0,0 +1,17 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J sphere_test
#SBATCH -o %x-%j.out
#SBATCH -t 00:05:00
#SBATCH -p caar
#SBATCH -N 1
module load rocm/4.2.0
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
export MPICH_SMP_SINGLE_COPY_MODE=CMA
#srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/lbpm_color_simulator spheres322.db
srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/TestCommD3Q19 spheres322.db

View File

@ -0,0 +1,32 @@
#!/bin/bash
#SBATCH -A CSC380
#SBATCH -J sphere_test
#SBATCH -o %x-%j.out
#SBATCH -e %x-%j.err
#SBATCH -t 00:05:00
#SBATCH -p caar
#SBATCH -N 1
module load craype-accel-amd-gfx908
module load PrgEnv-cray
#module load rocm
module load rocm/4.2.0
export LBPM_DIR=/ccs/proj/csc380/mcclurej/spock/install/lbpm/tests
#export MPICH_RDMA_ENABLED_CUDA=1
#export MPICH_ENV_DISPLAY=1
#export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_GPU_NO_ASYNC_MEMCPY=0
export MPICH_SMP_SINGLE_COPY_MODE=CMA
#export MPICH_DBG_FILENAME="./mpich-dbg.log"
export MPICH_DBG_CLASS=ALL
export MPICH_DBG_LEVEL=VERBOSE
export MPICH_DBG=yes
#export PMI_DEBUG=1
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 $LBPM_DIR/lbpm_color_simulator spheres322.db
srun -n1 --ntasks-per-node=1 --accel-bind=g --gpus-per-task=1 --verbose --export=ALL $LBPM_DIR/TestCommD3Q19 test.db

View File

@ -0,0 +1,54 @@
MRT {
tau = 1.0 // relaxation time
F = 0, 0, 1e-4 // external body force applied to system
timestepMax = 1000 // max number of timesteps
din = 1.0
dout = 1.0
Restart = false
flux = 0.0
}
Color {
tauA = 0.7; // relaxation time for fluid A
tauB = 0.7; // relaxation time for fluid B
rhoA = 1.0; // mass density for fluid A
rhoB = 1.0; // mass density for fluid B
alpha = 1e-3; // controls interfacial tension between fluids
beta = 0.95; // controls interface width
F = 0, 0, 1.0e-5 // external body force applied to the system
Restart = false // restart from checkpoint file?
din = 1.0 // density at inlet (if external BC is applied)
dout = 1.0 // density at outlet (if external BC is applied )
timestepMax = 10 // maximum number of timesteps to simulate
flux = 0.0 // volumetric flux in voxels per timestep (if flux BC is applied)
ComponentLabels = 0 // comma separated list of solid mineral labels
ComponentAffinity = -1.0 // comma separated list of phase indicato field value to assign for each mineral label
}
Domain {
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 318, 320, 320 // Size of local domain (Nx,Ny,Nz)
N = 320, 320, 320
nspheres = 1896 // Number of spheres (only needed if using a sphere packing)
L = 1, 1, 1 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
// BC = 0 for periodic BC
// BC = 1 for pressure BC (applied in z direction)
// BC = 4 for flux BC (applied in z direction
ReadType = "8bit"
ReadValues = 0, 1, 2 // list of labels within the binary file (read)
WriteValues = 0, 1, 2 // list of labels within the output files (write)
}
Analysis {
analysis_interval = 1000 // Frequency to perform analysis
restart_interval = 50000 // Frequency to write restart data
visualization_interval = 50000 // Frequency to write visualization data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
}

View File

View File

@ -0,0 +1,54 @@
MRT {
tau = 1.0 // relaxation time
F = 0, 0, 1e-4 // external body force applied to system
timestepMax = 1000 // max number of timesteps
din = 1.0
dout = 1.0
Restart = false
flux = 0.0
}
Color {
tauA = 0.7; // relaxation time for fluid A
tauB = 0.7; // relaxation time for fluid B
rhoA = 1.0; // mass density for fluid A
rhoB = 1.0; // mass density for fluid B
alpha = 1e-3; // controls interfacial tension between fluids
beta = 0.95; // controls interface width
F = 0, 0, 1.0e-5 // external body force applied to the system
Restart = false // restart from checkpoint file?
din = 1.0 // density at inlet (if external BC is applied)
dout = 1.0 // density at outlet (if external BC is applied )
timestepMax = 10 // maximum number of timesteps to simulate
flux = 0.0 // volumetric flux in voxels per timestep (if flux BC is applied)
ComponentLabels = 0 // comma separated list of solid mineral labels
ComponentAffinity = -1.0 // comma separated list of phase indicato field value to assign for each mineral label
}
Domain {
nproc = 1, 1, 1 // Number of processors (Npx,Npy,Npz)
n = 240, 240, 240 // Size of local domain (Nx,Ny,Nz)
N = 320, 320, 320
nspheres = 1896 // Number of spheres (only needed if using a sphere packing)
L = 1, 1, 1 // Length of domain (x,y,z)
BC = 0 // Boundary condition type
// BC = 0 for periodic BC
// BC = 1 for pressure BC (applied in z direction)
// BC = 4 for flux BC (applied in z direction
ReadType = "8bit"
ReadValues = 0, 1, 2 // list of labels within the binary file (read)
WriteValues = 0, 1, 2 // list of labels within the output files (write)
}
Analysis {
analysis_interval = 1000 // Frequency to perform analysis
restart_interval = 50000 // Frequency to write restart data
visualization_interval = 50000 // Frequency to write visualization data
restart_file = "Restart" // Filename to use for restart file (will append rank)
N_threads = 4 // Number of threads to use
load_balance = "independent" // Load balance method to use: "none", "default", "independent"
}
Visualization {
}

View File

@ -18,9 +18,11 @@
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
#define NTHREADS 512
__global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
__global__ void
__launch_bounds__(512,1) dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;
// conserved momemnts
double rho,ux,uy,uz,uu;
@ -138,7 +140,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_BGK(double *dist, int start, int finish,
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
__global__ void
__launch_bounds__(512,1) dvc_ScaLBL_D3Q19_AAodd_BGK(int *neighborList, double *dist, int start, int finish, int Np, double rlx, double Fx, double Fy, double Fz){
int n;
// conserved momemnts
double rho,ux,uy,uz,uu;

View File

@ -1,10 +0,0 @@
SET( HIP_SEPERABLE_COMPILATION ON )
FILE( GLOB HIP_SOURCES "*.cu" )
SET_SOURCE_FILES_PROPERTIES( ${HIP_SOURCES} PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1 )
HIP_ADD_LIBRARY( lbpm-hip ${HIP_SOURCES} SHARED HIPCC_OPTIONS ${HIP_HIPCC_OPTIONS} HCC_OPTIONS ${HIP_HCC_OPTIONS} NVCC_OPTIONS ${HIP_NVCC_OPTIONS} ${HIP_NVCC_FLAGS} )
#TARGET_LINK_LIBRARIES( lbpm-hip /opt/rocm-3.3.0/lib/libhip_hcc.so )
#TARGET_LINK_LIBRARIES( lbpm-wia lbpm-hip )
#ADD_DEPENDENCIES( lbpm-hip copy-include )

View File

@ -21,6 +21,21 @@
#define NBLOCKS 1024
#define NTHREADS 256
__device__ __constant__ double mrt_V1=0.05263157894736842;
__device__ __constant__ double mrt_V2=0.012531328320802;
__device__ __constant__ double mrt_V3=0.04761904761904762;
__device__ __constant__ double mrt_V4=0.004594820384294068;
__device__ __constant__ double mrt_V5=0.01587301587301587;
__device__ __constant__ double mrt_V6=0.0555555555555555555555555;
__device__ __constant__ double mrt_V7=0.02777777777777778;
__device__ __constant__ double mrt_V8=0.08333333333333333;
__device__ __constant__ double mrt_V9=0.003341687552213868;
__device__ __constant__ double mrt_V10=0.003968253968253968;
__device__ __constant__ double mrt_V11=0.01388888888888889;
__device__ __constant__ double mrt_V12=0.04166666666666666;
__global__ void dvc_ScaLBL_Color_Init(char *ID, double *Den, double *Phi, double das, double dbs, int Nx, int Ny, int Nz)
{
//int i,j,k;
@ -541,7 +556,7 @@ __global__ void dvc_ColorCollide( char *ID, double *disteven, double *distodd,
}
__global__ void
__launch_bounds__(512,2)
__launch_bounds__(256,1)
dvc_ScaLBL_D3Q19_ColorCollide( char *ID, double *disteven, double *distodd, double *phi, double *ColorGrad,
double *Velocity, int Nx, int Ny, int Nz, double rlx_setA, double rlx_setB,
double alpha, double beta, double Fx, double Fy, double Fz)
@ -1257,7 +1272,8 @@ __global__ void dvc_ScaLBL_SetSlice_z(double *Phi, double value, int Nx, int Ny
__global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *Aq, double *Bq, double *Den, double *Phi,
double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
int ijk,nn,n;
@ -1273,19 +1289,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
double ux,uy,uz;
double phi,tau,rho0,rlx_setA,rlx_setB;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
@ -1295,9 +1298,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
// read the component number densities
nA = Den[n];
nB = Den[Np + n];
nAB = 1.0/(nA+nB);
// compute phase indicator field
phi=(nA-nB)/(nA+nB);
phi=(nA-nB)*nAB;
// local density
rho0=rhoA + 0.5*(1.0-phi)*(rhoB-rhoA);
@ -1372,11 +1376,11 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
//...........Normalize the Color Gradient.................................
C = sqrt(nx*nx+ny*ny+nz*nz);
double ColorMag = C;
if (C==0.0) ColorMag=1.0;
nx = nx/ColorMag;
ny = ny/ColorMag;
nz = nz/ColorMag;
double iColorMag = 1.0/C;
if (C==0.0) iColorMag=1.0;
nx = nx*iColorMag;
ny = ny*iColorMag;
nz = nz*iColorMag;
// q=0
fq = dist[n];
@ -1651,19 +1655,20 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
//........................................................................
//..............carry out relaxation process..............................
//..........Toelke, Fruediger et. al. 2006................................
double irho0 = 1.0/rho0;
if (C == 0.0) nx = ny = nz = 0.0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2);
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)*irho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)*irho0)- m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m10 = m10 + rlx_setA*( - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m12 = m12 + rlx_setA*( - m12);
m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15);
m13 = m13 + rlx_setA*( (jx*jy*irho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz*irho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz*irho0) + 0.5*alpha*C*nx*nz - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
@ -1776,9 +1781,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
//........................................................................
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
ux = jx*irho0;
uy = jy*irho0;
uz = jz*irho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
@ -1786,7 +1791,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
// Instantiate mass transport distributions
// Stationary value - distribution 0
nAB = 1.0/(nA+nB);
Aq[n] = 0.3333333333333333*nA;
Bq[n] = 0.3333333333333333*nB;
@ -1839,8 +1843,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_Color(int *Map, double *dist, double *A
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double *dist, double *Aq, double *Bq, double *Den,
double *Phi, double *Velocity, double rhoA, double rhoB, double tauA, double tauB, double alpha, double beta,
double Fx, double Fy, double Fz, int strideY, int strideZ, int start, int finish, int Np){
@ -1861,19 +1865,6 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
double ux,uy,uz;
double phi,tau,rho0,rlx_setA,rlx_setB;
const double mrt_V1=0.05263157894736842;
const double mrt_V2=0.012531328320802;
const double mrt_V3=0.04761904761904762;
const double mrt_V4=0.004594820384294068;
const double mrt_V5=0.01587301587301587;
const double mrt_V6=0.0555555555555555555555555;
const double mrt_V7=0.02777777777777778;
const double mrt_V8=0.08333333333333333;
const double mrt_V9=0.003341687552213868;
const double mrt_V10=0.003968253968253968;
const double mrt_V11=0.01388888888888889;
const double mrt_V12=0.04166666666666666;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
@ -1882,9 +1873,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
// read the component number densities
nA = Den[n];
nB = Den[Np + n];
nAB = 1.0/(nA+nB);
// compute phase indicator field
phi=(nA-nB)/(nA+nB);
phi=(nA-nB)*nAB;
// local density
rho0=rhoA + 0.5*(1.0-phi)*(rhoB-rhoA);
@ -1959,11 +1951,11 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
//...........Normalize the Color Gradient.................................
C = sqrt(nx*nx+ny*ny+nz*nz);
double ColorMag = C;
if (C==0.0) ColorMag=1.0;
nx = nx/ColorMag;
ny = ny/ColorMag;
nz = nz/ColorMag;
double iColorMag = 1.0/C;
if (C==0.0) iColorMag=1.0;
nx = nx*iColorMag;
ny = ny*iColorMag;
nz = nz*iColorMag;
// q=0
fq = dist[n];
@ -2290,18 +2282,19 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
//..............carry out relaxation process..............................
//..........Toelke, Fruediger et. al. 2006................................
if (C == 0.0) nx = ny = nz = 0.0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)/rho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)/rho0)- m2);
double irho0=1.0/rho0;
m1 = m1 + rlx_setA*((19*(jx*jx+jy*jy+jz*jz)*irho0 - 11*rho) -19*alpha*C - m1);
m2 = m2 + rlx_setA*((3*rho - 5.5*(jx*jx+jy*jy+jz*jz)*irho0)- m2);
m4 = m4 + rlx_setB*((-0.6666666666666666*jx)- m4);
m6 = m6 + rlx_setB*((-0.6666666666666666*jy)- m6);
m8 = m8 + rlx_setB*((-0.6666666666666666*jz)- m8);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m9 = m9 + rlx_setA*(((2*jx*jx-jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(2*nx*nx-ny*ny-nz*nz) - m9);
m10 = m10 + rlx_setA*( - m10);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)/rho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m11 = m11 + rlx_setA*(((jy*jy-jz*jz)*irho0) + 0.5*alpha*C*(ny*ny-nz*nz)- m11);
m12 = m12 + rlx_setA*( - m12);
m13 = m13 + rlx_setA*( (jx*jy/rho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz/rho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz/rho0) + 0.5*alpha*C*nx*nz - m15);
m13 = m13 + rlx_setA*( (jx*jy*irho0) + 0.5*alpha*C*nx*ny - m13);
m14 = m14 + rlx_setA*( (jy*jz*irho0) + 0.5*alpha*C*ny*nz - m14);
m15 = m15 + rlx_setA*( (jx*jz*irho0) + 0.5*alpha*C*nx*nz - m15);
m16 = m16 + rlx_setB*( - m16);
m17 = m17 + rlx_setB*( - m17);
m18 = m18 + rlx_setB*( - m18);
@ -2426,16 +2419,15 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_Color(int *neighborList, int *Map, double
dist[nread] = fq;
// write the velocity
ux = jx / rho0;
uy = jy / rho0;
uz = jz / rho0;
ux = jx*irho0;
uy = jy*irho0;
uz = jz*irho0;
Velocity[n] = ux;
Velocity[Np+n] = uy;
Velocity[2*Np+n] = uz;
// Instantiate mass transport distributions
// Stationary value - distribution 0
nAB = 1.0/(nA+nB);
Aq[n] = 0.3333333333333333*nA;
Bq[n] = 0.3333333333333333*nB;
@ -3677,7 +3669,8 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_ColorMass(int *neighborList, double *Aq,
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, double *Aq, double *Bq,
double *Den, double *Phi, int start, int finish, int Np){
int idx,n,nread;
double fq,nA,nB;
@ -3747,7 +3740,8 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_PhaseField(int *neighborList, int *Map, d
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi,
__global__ void
__launch_bounds__(256,1) dvc_ScaLBL_D3Q7_AAeven_PhaseField(int *Map, double *Aq, double *Bq, double *Den, double *Phi,
int start, int finish, int Np){
int idx,n;
double fq,nA,nB;

View File

@ -19,7 +19,7 @@
#include "hip/hip_cooperative_groups.h"
#define NBLOCKS 1024
#define NTHREADS 256
#define NTHREADS 512
/*
1. constants that are known at compile time should be defined using preprocessor macros (e.g. #define) or via C/C++ const variables at global/file scope.
@ -321,10 +321,10 @@ __global__ void dvc_ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *distev
}
}
//__launch_bounds__(512,4)
//__launch_bounds__(512,1)
__global__ void
dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
dvc_ScaLBL_AAodd_Compact( int *d_neighborList, double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -463,7 +463,8 @@ dvc_ScaLBL_AAodd_Compact(char * ID, int *d_neighborList, double *dist, int Np) {
}
__global__ void
__global__ void
__launch_bounds__(512,1)
dvc_ScaLBL_AAodd_MRT(int *neighborList, double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) {
int n;
@ -932,7 +933,8 @@ dvc_ScaLBL_AAodd_MRT(int *neighborList, double *dist, int start, int finish, int
//__launch_bounds__(512,1)
__global__ void
__global__ void
__launch_bounds__(512,1)
dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz) {
int n;
@ -1353,9 +1355,9 @@ dvc_ScaLBL_AAeven_MRT(double *dist, int start, int finish, int Np, double rlx_se
}
}
//__launch_bounds__(512,4)
//__launch_bounds__(512,1)
__global__ void dvc_ScaLBL_AAeven_Compact(char * ID, double *dist, int Np) {
__global__ void dvc_ScaLBL_AAeven_Compact( double *dist, int Np) {
int n;
double f0,f1,f2,f3,f4,f5,f6,f7,f8,f9;
@ -2374,12 +2376,12 @@ __global__ void dvc_ScaLBL_D3Q19_Init_Simple(char *ID, double *f_even, double *f
extern "C" void ScaLBL_D3Q19_Pack(int q, int *list, int start, int count, double *sendbuf, double *dist, int N){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Pack <<<GRID,512 >>>(q, list, start, count, sendbuf, dist, N);
dvc_ScaLBL_D3Q19_Pack <<<NBLOCKS,NTHREADS >>>(q, list, start, count, sendbuf, dist, N);
}
extern "C" void ScaLBL_D3Q19_Unpack(int q, int *list, int start, int count, double *recvbuf, double *dist, int N){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q19_Unpack <<<GRID,512 >>>(q, list, start, count, recvbuf, dist, N);
dvc_ScaLBL_D3Q19_Unpack <<<NBLOCKS,NTHREADS >>>(q, list, start, count, recvbuf, dist, N);
}
//*************************************************************************
@ -2423,19 +2425,17 @@ extern "C" void ScaLBL_D3Q19_Swap_Compact(int *neighborList, double *disteven, d
printf("CUDA error in ScaLBL_D3Q19_Swap: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Compact(char * ID, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAeven_Compact( double *d_dist, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAeven_Compact, hipFuncCachePreferL1);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>(ID, d_dist, Np);
dvc_ScaLBL_AAeven_Compact<<<NBLOCKS,NTHREADS>>>( d_dist, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Compact(char * ID, int *d_neighborList, double *d_dist, int Np) {
extern "C" void ScaLBL_D3Q19_AAodd_Compact( int *d_neighborList, double *d_dist, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_AAodd_Compact, hipFuncCachePreferL1);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(ID,d_neighborList, d_dist,Np);
dvc_ScaLBL_AAodd_Compact<<<NBLOCKS,NTHREADS>>>(d_neighborList, d_dist,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in ScaLBL_D3Q19_Init: %s \n",hipGetErrorString(err));

View File

@ -1,567 +0,0 @@
#include <math.h>
#include <stdio.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 560
#define NTHREADS 128
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
}
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = value_q + value_b;
}
}
__global__ void dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_b_label,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_b_label = BoundaryLabel[ib];//get boundary label (i.e. type of BC) from a solid site
value_q = dist[iq];
if (value_b_label==1){//Dirichlet BC
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
if (value_b_label==2){//Neumann BC
dist[iq] = value_q + value_b;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vin;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vout;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
//*************************************************************************
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Solid_Dirichlet_D3Q7 (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Solid_Neumann_D3Q7 (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
cudaError_t err = cudaGetLastError();
if (cudaSuccess != err){
printf("hip error in ScaLBL_Solid_DirichletAndNeumann_D3Q7 (kernel): %s \n",cudaGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Poisson_D3Q7_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_Poisson_D3Q7_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_z (kernel): %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion_Flux_BC_Z (kernel): %s \n",hipGetErrorString(err));
}
}

917
hip/D3Q7BC.hip Normal file
View File

@ -0,0 +1,917 @@
#include <math.h>
#include <stdio.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
#define CHECK_ERROR(KERNEL) \
do { \
auto err = hipGetLastError(); \
if ( hipSuccess != err ){ \
auto errString = hipGetErrorString(err); \
printf("error in %s (kernel): %s \n",KERNEL,errString); \
} \
} while(0)
__global__ void dvc_ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
}
__global__ void dvc_ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_q = dist[iq];
dist[iq] = value_q + value_b;
}
}
__global__ void dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count)
{
int idx;
int iq,ib;
double value_b,value_b_label,value_q;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
value_b = BoundaryValue[ib];//get boundary value from a solid site
value_b_label = BoundaryLabel[ib];//get boundary label (i.e. type of BC) from a solid site
value_q = dist[iq];
if (value_b_label==1){//Dirichlet BC
dist[iq] = -1.0*value_q + value_b*0.25;//NOTE 0.25 is the speed of sound for D3Q7 lattice
}
if (value_b_label==2){//Neumann BC
dist[iq] = value_q + value_b;
}
}
}
__global__ void dvc_ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
double epsilon_LB, double tau, double rho0,double den_scale, double h, double time_conv,
int *BounceBackDist_list, int *BounceBackSolid_list, int *FluidBoundary_list,
double *lattice_weight, float *lattice_cx, float *lattice_cy, float *lattice_cz,
int count, int Np)
{
int idx;
int iq,ib,ifluidBC;
double value_b,value_q;
double Ex,Ey,Ez;
double Etx,Ety,Etz;//tangential part of electric field
double E_mag_normal;
double nsx,nsy,nsz;//unit normal solid gradient
double ubx,uby,ubz;//slipping velocity at fluid boundary nodes
float cx,cy,cz;//lattice velocity (D3Q19)
double LB_weight;//lattice weighting coefficient (D3Q19)
double cs2_inv = 3.0;//inverse of cs^2 for D3Q19
double nu_LB = (tau-0.5)/cs2_inv;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
iq = BounceBackDist_list[idx];
ib = BounceBackSolid_list[idx];
ifluidBC = FluidBoundary_list[idx];
value_b = zeta_potential[ib];//get zeta potential from a solid site
value_q = dist[iq];
//Load electric field and compute its tangential componet
Ex = ElectricField[ifluidBC+0*Np];
Ey = ElectricField[ifluidBC+1*Np];
Ez = ElectricField[ifluidBC+2*Np];
nsx = SolidGrad[ifluidBC+0*Np];
nsy = SolidGrad[ifluidBC+1*Np];
nsz = SolidGrad[ifluidBC+2*Np];
E_mag_normal = Ex*nsx+Ey*nsy+Ez*nsz;//magnitude of electric field in the direction normal to solid nodes
//compute tangential electric field
Etx = Ex - E_mag_normal*nsx;
Ety = Ey - E_mag_normal*nsy;
Etz = Ez - E_mag_normal*nsz;
ubx = -epsilon_LB*value_b*Etx/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
uby = -epsilon_LB*value_b*Ety/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
ubz = -epsilon_LB*value_b*Etz/(nu_LB*rho0)*time_conv*time_conv/(h*h*1.0e-12)/den_scale;
//compute bounce-back distribution
LB_weight = lattice_weight[idx];
cx = lattice_cx[idx];
cy = lattice_cy[idx];
cz = lattice_cz[idx];
dist[iq] = value_q - 2.0*LB_weight*rho0*cs2_inv*(cx*ubx+cy*uby+cz*ubz);
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Vin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Vout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vin;
}
}
__global__ void dvc_ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count)
{
int idx,n,nm;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
nm = Map[n];
Psi[nm] = Vout;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
//...................................................
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np)
{
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
//...................................................
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np)
{
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
f5 = Cin - (f0+f1+f2+f3+f4+f6);
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np)
{
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
// unknown distributions
nr6 = d_neighborList[n+5*Np];
f6 = Cout - (f0+f1+f2+f3+f4+f5);
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*(f6+uz*fsum_partial))/(1.0-0.5/tau)/(1.0-uz);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*(f5-uz*fsum_partial))/(1.0-0.5/tau)/(1.0+uz);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*(f6+uz*fsum_partial))/(1.0-0.5/tau)/(1.0-uz);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*(f5-uz*fsum_partial))/(1.0-0.5/tau)/(1.0+uz);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-0.5*uz*fsum_partial/tau)/(1.0-0.5/tau+0.5*uz/tau);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+0.5*uz*fsum_partial/tau)/(1.0-0.5/tau-0.5*uz/tau);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f6 = dist[5*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau+0.5*uz/tau+uEPz);
dist[6*Np+n] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx,n;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau-0.5*uz/tau-uEPz);
dist[5*Np+n] = f6;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr5;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
nread = d_neighborList[n+5*Np];
f6 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f6;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f5 =(FluxIn+(1.0-0.5/tau)*f6-(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau+0.5*uz/tau+uEPz);
// Unknown distributions
nr5 = d_neighborList[n+4*Np];
dist[nr5] = f5;
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np)
{
//NOTE: FluxIn is the inward flux
int idx, n;
int nread,nr6;
double f0,f1,f2,f3,f4,f5,f6;
double fsum_partial;
double uz;
double uEPz;//electrochemical induced velocity
double Ez;//electrical field
idx = blockIdx.x*blockDim.x + threadIdx.x;
if (idx < count){
n = list[idx];
f0 = dist[n];
nread = d_neighborList[n];
f1 = dist[nread];
nread = d_neighborList[n+2*Np];
f3 = dist[nread];
nread = d_neighborList[n+4*Np];
f5 = dist[nread];
nread = d_neighborList[n+Np];
f2 = dist[nread];
nread = d_neighborList[n+3*Np];
f4 = dist[nread];
fsum_partial = f0+f1+f2+f3+f4+f5;
uz = VelocityZ[n];
Ez = ElectricField_Z[n];
uEPz=zi*Di/Vt*Ez;
//...................................................
f6 =(FluxIn+(1.0-0.5/tau)*f5+(0.5*uz/tau+uEPz)*fsum_partial)/(1.0-0.5/tau-0.5*uz/tau-uEPz);
// unknown distributions
nr6 = d_neighborList[n+5*Np];
dist[nr6] = f6;
}
}
//*************************************************************************
extern "C" void ScaLBL_Solid_Dirichlet_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Dirichlet_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
CHECK_ERROR("ScaLBL_Solid_Dirichlet_D3Q7");
}
extern "C" void ScaLBL_Solid_Neumann_D3Q7(double *dist, double *BoundaryValue, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_Neumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BounceBackDist_list, BounceBackSolid_list, count);
CHECK_ERROR("ScaLBL_Solid_Neumann_D3Q7");
}
extern "C" void ScaLBL_Solid_DirichletAndNeumann_D3Q7(double *dist, double *BoundaryValue,int *BoundaryLabel, int *BounceBackDist_list, int *BounceBackSolid_list, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_DirichletAndNeumann_D3Q7<<<GRID,512>>>(dist, BoundaryValue, BoundaryLabel, BounceBackDist_list, BounceBackSolid_list, count);
CHECK_ERROR("ScaLBL_Solid_DirichletAndNeumann_D3Q7");
}
extern "C" void ScaLBL_Solid_SlippingVelocityBC_D3Q19(double *dist, double *zeta_potential, double *ElectricField, double *SolidGrad,
double epsilon_LB, double tau, double rho0,double den_scale, double h, double time_conv,
int *BounceBackDist_list, int *BounceBackSolid_list, int *FluidBoundary_list,
double *lattice_weight, float *lattice_cx, float *lattice_cy, float *lattice_cz,
int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_Solid_SlippingVelocityBC_D3Q19<<<GRID,512>>>(dist, zeta_potential, ElectricField, SolidGrad,
epsilon_LB, tau, rho0, den_scale, h, time_conv,
BounceBackDist_list, BounceBackSolid_list, FluidBoundary_list,
lattice_weight, lattice_cx, lattice_cy, lattice_cz,
count, Np);
CHECK_ERROR("ScaLBL_Solid_SlippingVelocityBC_D3Q19");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z(int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z<<<GRID,512>>>(list, dist, Vin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z(int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z<<<GRID,512>>>(list, dist, Vout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z(int *d_neighborList, int *list, double *dist, double Vin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Vin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z(int *d_neighborList, int *list, double *dist, double Vout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Vout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Poisson_Potential_BC_Z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_z(int *list, int *Map, double *Psi, double Vin, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_z<<<GRID,512>>>(list, Map, Psi, Vin, count);
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_z");
}
extern "C" void ScaLBL_Poisson_D3Q7_BC_Z(int *list, int *Map, double *Psi, double Vout, int count){
int GRID = count / 512 + 1;
dvc_ScaLBL_Poisson_D3Q7_BC_Z<<<GRID,512>>>(list, Map, Psi, Vout, count);
CHECK_ERROR("ScaLBL_Poisson_D3Q7_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z(int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z<<<GRID,512>>>(list, dist, Cin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z(int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z<<<GRID,512>>>(list, dist, Cout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Concentration_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z(int *d_neighborList, int *list, double *dist, double Cin, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z<<<GRID,512>>>(d_neighborList, list, dist, Cin, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z(int *d_neighborList, int *list, double *dist, double Cout, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, Cout, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Concentration_BC_Z");
}
//------------Diff-----------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_Diff_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_Diff_BC_Z");
}
//----------DiffAdvc-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvc_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvc_BC_Z");
}
//----------DiffAdvcElec-------------
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z(int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAeven_Ion_Flux_DiffAdvcElec_BC_Z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_z");
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z(int *d_neighborList, int *list, double *dist, double FluxIn, double tau, double *VelocityZ, double *ElectricField_Z,
double Di, double zi, double Vt, int count, int Np){
int GRID = count / 512 + 1;
dvc_ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z<<<GRID,512>>>(d_neighborList, list, dist, FluxIn, tau, VelocityZ, ElectricField_Z, Di, zi, Vt, count, Np);
CHECK_ERROR("ScaLBL_D3Q7_AAodd_Ion_Flux_DiffAdvcElec_BC_Z");
}
//-------------------------------

View File

@ -2726,10 +2726,10 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined(int *Map, double *
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
__global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField_alt(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np){
int idx,nread;
int n,idx,nread;
double fq,phi;
// for (int n=start; n<finish; n++){
@ -2787,7 +2787,7 @@ __global__ void dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList,
__global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np){
int idx;
int n,idx;
double fq,phi;
// for (int n=start; n<finish; n++){
int S = Np/NBLOCKS/NTHREADS + 1;
@ -2833,7 +2833,6 @@ __global__ void dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double
idx = Map[n];
Phi[idx] = phi;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
@ -3396,7 +3395,7 @@ extern "C" void ScaLBL_FreeLeeModel_PhaseField_Init(int *Map, double *Phi, doubl
extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel,
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np)
{
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAodd_FreeLee_PhaseField<<<NBLOCKS,NTHREADS >>>(neighborList, Map, hq, Den, Phi, ColorGrad, Vel,
rhoA, rhoB, tauM, W, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3406,9 +3405,9 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLee_PhaseField(int *neighborList, int *Map
}
extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, double *Den, double *Phi, double *ColorGrad, double *Vel,
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1);
double rhoA, double rhoB, double tauM, double W, int start, int finish, int Np)
{
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAeven_FreeLee_PhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, ColorGrad, Vel, rhoA, rhoB, tauM, W, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3419,7 +3418,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLee_PhaseField( int *Map, double *hq, dou
extern "C" void ScaLBL_D3Q7_ComputePhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q7_ComputePhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_ComputePhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3432,7 +3431,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel(int *neighborList, int *Map, dou
double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel<<<NBLOCKS,NTHREADS >>>(neighborList, Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3445,7 +3444,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double
double rhoA, double rhoB, double tauA, double tauB, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel<<<NBLOCKS,NTHREADS >>>(Map, dist, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3458,7 +3457,7 @@ extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel(int *Map, double *dist, double
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined(int *Map, double *dist, double *hq, double *Den, double *Phi, double *mu_phi, double *Vel, double *Pressure, double *ColorGrad,
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_Combined<<<NBLOCKS,NTHREADS >>>(Map, dist, Den, hq, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3471,7 +3470,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined(int *neighborList, int
double rhoA, double rhoB, double tauA, double tauB, double tauM, double kappa, double beta, double W, double Fx, double Fy, double Fz,
int strideY, int strideZ, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined<<<NBLOCKS,NTHREADS >>>(neighborList, Map, dist, hq, Den, Phi, mu_phi, Vel, Pressure, ColorGrad,
rhoA, rhoB, tauA, tauB, tauM, kappa, beta, W, Fx, Fy, Fz, strideY, strideZ, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3482,7 +3481,7 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_Combined(int *neighborList, int
extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int *Map, double *hq, double *Den, double *Phi,
double rhoA, double rhoB, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField<<<NBLOCKS,NTHREADS >>>( neighborList, Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3492,7 +3491,7 @@ extern "C" void ScaLBL_D3Q7_AAodd_FreeLeeModel_PhaseField(int *neighborList, int
extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq, double *Den, double *Phi, double rhoA, double rhoB, int start, int finish, int Np){
cudaFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField, cudaFuncCachePreferL1);
//hipFuncSetCacheConfig(dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField<<<NBLOCKS,NTHREADS >>>( Map, hq, Den, Phi, rhoA, rhoB, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
@ -3503,7 +3502,7 @@ extern "C" void ScaLBL_D3Q7_AAeven_FreeLeeModel_PhaseField(int *Map, double *hq,
extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborList, double *dist, double *Vel, double *Pressure,
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK<<<NBLOCKS,NTHREADS >>>(neighborList, dist, Vel, Pressure,
tau, rho0, Fx, Fy, Fz, start, finish, Np);
hipError_t err = hipGetLastError();
@ -3513,9 +3512,9 @@ extern "C" void ScaLBL_D3Q19_AAodd_FreeLeeModel_SingleFluid_BGK(int *neighborLis
}
extern "C" void ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK(double *dist, double *Vel, double *Pressure,
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np){
hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
double tau, double rho0, double Fx, double Fy, double Fz, int start, int finish, int Np)
{
//hipFuncSetCacheConfig((void*)dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_FreeLeeModel_SingleFluid_BGK<<<NBLOCKS,NTHREADS >>>(dist, Vel, Pressure,
tau, rho0, Fx, Fy, Fz, start, finish, Np);
hipError_t err = hipGetLastError();

View File

@ -1,5 +1,6 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
@ -1609,7 +1610,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAodd_GreyscaleColor_CP(int *neighborList, int
Fcpy = ny;
Fcpz = nz;
double Fcp_mag=sqrt(Fcpx*Fcpx+Fcpy*Fcpy+Fcpz*Fcpz);
if (Fcp_mag==0.0); Fcpx=Fcpy=Fcpz=0.0;
if (Fcp_mag==0.0) {
Fcpx=Fcpy=Fcpz=0.0;
}
//NOTE for open node (porosity=1.0),Fcp=0.0
Fcpx *= alpha*W*(1.0-porosity)/sqrt(perm);
Fcpy *= alpha*W*(1.0-porosity)/sqrt(perm);
@ -2396,7 +2399,9 @@ __global__ void dvc_ScaLBL_D3Q19_AAeven_GreyscaleColor_CP(int *Map, double *dis
Fcpy = ny;
Fcpz = nz;
double Fcp_mag=sqrt(Fcpx*Fcpx+Fcpy*Fcpy+Fcpz*Fcpz);
if (Fcp_mag==0.0); Fcpx=Fcpy=Fcpz=0.0;
if (Fcp_mag==0.0) {
Fcpx=Fcpy=Fcpz=0.0;
}
//NOTE for open node (porosity=1.0),Fcp=0.0
Fcpx *= alpha*W*(1.0-porosity)/sqrt(perm);
Fcpy *= alpha*W*(1.0-porosity)/sqrt(perm);

View File

@ -1,422 +0,0 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
int n;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[nr2] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[nr1] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[nr4] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[nr3] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[nr6] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[nr5] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci=Den[n];
Ex=ElectricField[n+0*Np];
Ey=ElectricField[n+1*Np];
Ez=ElectricField[n+2*Np];
ux=Velocity[n+0*Np];
uy=Velocity[n+1*Np];
uz=Velocity[n+2*Np];
uEPx=zi*Di/Vt*Ex;
uEPy=zi*Di/Vt*Ey;
uEPz=zi*Di/Vt*Ez;
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
// compute diffusive flux
flux_diffusive_x = (1.0-0.5*rlx)*((f1-f2)-ux*Ci);
flux_diffusive_y = (1.0-0.5*rlx)*((f3-f4)-uy*Ci);
flux_diffusive_z = (1.0-0.5*rlx)*((f5-f6)-uz*Ci);
FluxDiffusive[n+0*Np] = flux_diffusive_x;
FluxDiffusive[n+1*Np] = flux_diffusive_y;
FluxDiffusive[n+2*Np] = flux_diffusive_z;
FluxAdvective[n+0*Np] = ux*Ci;
FluxAdvective[n+1*Np] = uy*Ci;
FluxAdvective[n+2*Np] = uz*Ci;
FluxElectrical[n+0*Np] = uEPx*Ci;
FluxElectrical[n+1*Np] = uEPy*Ci;
FluxElectrical[n+2*Np] = uEPz*Ci;
// q=0
dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci;
//dist[n] = f0*(1.0-rlx)+rlx*0.25*Ci*(1.0 - 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx));
//dist[1*Np+n] = f1*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q=2
dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx));
//dist[2*Np+n] = f2*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(ux+uEPx)+8.0*(ux+uEPx)*(ux+uEPx)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy));
//dist[3*Np+n] = f3*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy));
//dist[4*Np+n] = f4*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uy+uEPy)+8.0*(uy+uEPy)*(uy+uEPy)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz));
//dist[5*Np+n] = f5*(1.0-rlx) + rlx*0.125*Ci*(1.0+4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz));
//dist[6*Np+n] = f6*(1.0-rlx) + rlx*0.125*Ci*(1.0-4.0*(uz+uEPz)+8.0*(uz+uEPz)*(uz+uEPz)- 2.0*((ux+uEPx)*(ux+uEPx) + (uy+uEPy)*(uy+uEPy) + (uz+uEPz)*(uz+uEPz)));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
int n;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
Den[n] = DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
int n;
double DenInit;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD*(ion_component>0) + CD_tmp;
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, int IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}

969
hip/Ion.hip Normal file
View File

@ -0,0 +1,969 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 512
extern "C" void Membrane_D3Q19_Unpack(int q, int *list, int *links, int start, int linkCount,
double *recvbuf, double *dist, int N) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
for (link=0; link<linkCount; link++){
idx = links[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = recvbuf[start + idx];
}
}
extern "C" void Membrane_D3Q19_Transport(int q, int *list, int *links, double *coef, int start, int offset,
int linkCount, double *recvbuf, double *dist, int N){
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double alpha;
for (link=offset; link<linkCount; link++){
idx = list[start+link];
// Get the value from the list -- note that n is the index is from the send (non-local) process
n = list[start + idx];
alpha = coef[start + idx];
// unpack the distribution to the proper location
if (!(n < 0))
dist[q * N + n] = alpha*recvbuf[start + idx];
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
int link,iq,ip,nq,np,nqm,npm;
double aq, ap, membranePotential;
/* Interior Links */
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks) {
// inside //outside
aq = MassFractionIn; ap = MassFractionOut;
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
nqm = Map[nq]; npm = Map[np]; // strided layout
/* membrane potential for this link */
membranePotential = Psi[nqm] - Psi[npm];
if (membranePotential > Threshold){
aq = ThresholdMassFractionIn; ap = ThresholdMassFractionOut;
}
/* Save the mass transfer coefficients */
coef[2*link] = aq; coef[2*link+1] = ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, nqm, npm, label, i, j, k;
double distanceLocal, distanceNonlocal;
double psiLocal, psiNonlocal, membranePotential;
double ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = count/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (idx < count) {
n = d3q7_recvlist[idx];
label = d3q7_linkList[idx];
ap = 1.0; // regular streaming rule
aq = 1.0;
if (label > 0 && !(n < 0)){
nqm = Map[n];
distanceLocal = Distance[nqm];
psiLocal = Psi[nqm];
// Get the 3-D indices from the send process
k = nqm/(Nx*Ny); j = (nqm-Nx*Ny*k)/Nx; i = nqm-Nx*Ny*k-Nx*j;
// Streaming link the non-local distribution
i -= Cqx; j -= Cqy; k -= Cqz;
npm = k*Nx*Ny + j*Nx + i;
distanceNonlocal = Distance[npm];
psiNonlocal = Psi[npm];
membranePotential = psiLocal - psiNonlocal;
aq = MassFractionIn;
ap = MassFractionOut;
/* link is inside membrane */
if (distanceLocal > 0.0){
if (membranePotential < Threshold*(-1.0)){
ap = MassFractionIn;
aq = MassFractionOut;
}
else {
ap = ThresholdMassFractionIn;
aq = ThresholdMassFractionOut;
}
}
else if (membranePotential > Threshold){
aq = ThresholdMassFractionIn;
ap = ThresholdMassFractionOut;
}
}
coef[2*idx]=aq;
coef[2*idx+1]=ap;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, double *recvbuf, int count,
double *dist, int N, double *coef) {
//....................................................................................
// Unack distribution from the recv buffer
// Distribution q matche Cqx, Cqy, Cqz
// swap rule means that the distributions in recvbuf are OPPOSITE of q
// dist may be even or odd distributions stored by stream layout
//....................................................................................
int n, idx, link;
double fq,fp,fqq,ap,aq; // coefficient
/* second enforce custom rule for membrane links */
int S = count/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
idx = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (idx < count){
n = d3q7_recvlist[idx];
// update link based on mass transfer coefficients
if (!(n < 0)){
aq = coef[2*idx];
ap = coef[2*idx+1];
fq = dist[q * N + n];
fp = recvbuf[idx];
fqq = (1-aq)*fq+ap*fp;
dist[q * N + n] = fqq;
}
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
int link,iq,ip,nq,np;
double aq, ap, fq, fp, fqq, fpp, Cq, Cp;
int S = memLinks/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
link = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (link < memLinks){
// inside //outside
aq = coef[2*link]; ap = coef[2*link+1];
iq = membrane[2*link]; ip = membrane[2*link+1];
nq = iq%Np; np = ip%Np;
fq = dist[iq]; fp = dist[ip];
fqq = (1-aq)*fq+ap*fp; fpp = (1-ap)*fp+aq*fq;
Cq = Den[nq]; Cp = Den[np];
Cq += fqq - fq; Cp += fpp - fp;
Den[nq] = Cq; Den[np] = Cp;
dist[iq] = fqq; dist[ip] = fpp;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
int n,nread;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
Ci += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
Ci += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
Ci += fq;
// q=4
nread = neighborList[n+3*Np];
fq = dist[nread];
Ci += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
Ci += fq;
// q=6
nread = neighborList[n+5*Np];
fq = dist[nread];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
int n;
double fq,Ci;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
Ci = fq;
// q=1
fq = dist[2*Np+n];
Ci += fq;
// q=2
fq = dist[1*Np+n];
Ci += fq;
// q=3
fq = dist[4*Np+n];
Ci += fq;
// q=4
fq = dist[3*Np+n];
Ci += fq;
// q=5
fq = dist[6*Np+n];
Ci += fq;
// q=6
fq = dist[5*Np+n];
Ci += fq;
Den[n]=Ci;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int nr1,nr2,nr3,nr4,nr5,nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
int n;
double Ci;
double ux,uy,uz;
double uEPx,uEPy,uEPz;//electrochemical induced velocity
double Ex,Ey,Ez;//electrical field
double flux_diffusive_x,flux_diffusive_y,flux_diffusive_z;
double f0,f1,f2,f3,f4,f5,f6;
double X,Y,Z,factor_x,factor_y,factor_z;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
// compute diffusive flux
Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
X = 4.0 * (ux + uEPx);
Y = 4.0 * (uy + uEPy);
Z = 4.0 * (uz + uEPz);
factor_x = X / sqrt(1 + X*X);
factor_y = Y / sqrt(1 + Y*Y);
factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
//f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
//f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
//f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
//f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
//f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
//f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
int n;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
Den[n] = DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
int n;
double DenInit;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x;
if (n<Np) {
DenInit = Den[n];
dist[0*Np+n] = 0.25*DenInit;
dist[1*Np+n] = 0.125*DenInit;
dist[2*Np+n] = 0.125*DenInit;
dist[3*Np+n] = 0.125*DenInit;
dist[4*Np+n] = 0.125*DenInit;
dist[5*Np+n] = 0.125*DenInit;
dist[6*Np+n] = 0.125*DenInit;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
int n;
double Ci;//ion concentration of species i
double CD;//charge density
double CD_tmp;
double F = 96485.0;//Faraday's constant; unit[C/mol]; F=e*Na, where Na is the Avogadro constant
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
Ci = Den[n+ion_component*Np];
CD = ChargeDensity[n];
if (ion_component == 0) CD=0.0;
CD_tmp = F*IonValence*Ci;
ChargeDensity[n] = CD + CD_tmp;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist,
double *Den, double *FluxDiffusive,
double *FluxAdvective,
double *FluxElectrical, double *Velocity,
double *ElectricField, double Di, int zi,
double rlx, double Vt, int start,
int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z,factor_x, factor_y, factor_z;
int nr1, nr2, nr3, nr4, nr5, nr6;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
// q=2
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q=4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q=6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// compute diffusive flux
//Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
//Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[nr2] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[nr1] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[nr4] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y );
// q = 4
dist[nr3] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[nr6] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[nr5] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Ion_v0(
double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective,
double *FluxElectrical, double *Velocity, double *ElectricField, double Di,
int zi, double rlx, double Vt, int start, int finish, int Np) {
int n;
double Ci;
double ux, uy, uz;
double uEPx, uEPy, uEPz; //electrochemical induced velocity
double Ex, Ey, Ez; //electrical field
double flux_diffusive_x, flux_diffusive_y, flux_diffusive_z;
double f0, f1, f2, f3, f4, f5, f6;
//double X,Y,Z, factor_x, factor_y, factor_z;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
Ci = Den[n];
Ex = ElectricField[n + 0 * Np];
Ey = ElectricField[n + 1 * Np];
Ez = ElectricField[n + 2 * Np];
ux = Velocity[n + 0 * Np];
uy = Velocity[n + 1 * Np];
uz = Velocity[n + 2 * Np];
uEPx = zi * Di / Vt * Ex;
uEPy = zi * Di / Vt * Ey;
uEPz = zi * Di / Vt * Ez;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
// compute diffusive flux
//Ci = f0 + f1 + f2 + f3 + f4 + f5 + f6;
flux_diffusive_x = (1.0 - 0.5 * rlx) * ((f1 - f2) - ux * Ci);
flux_diffusive_y = (1.0 - 0.5 * rlx) * ((f3 - f4) - uy * Ci);
flux_diffusive_z = (1.0 - 0.5 * rlx) * ((f5 - f6) - uz * Ci);
FluxDiffusive[n + 0 * Np] = flux_diffusive_x;
FluxDiffusive[n + 1 * Np] = flux_diffusive_y;
FluxDiffusive[n + 2 * Np] = flux_diffusive_z;
FluxAdvective[n + 0 * Np] = ux * Ci;
FluxAdvective[n + 1 * Np] = uy * Ci;
FluxAdvective[n + 2 * Np] = uz * Ci;
FluxElectrical[n + 0 * Np] = uEPx * Ci;
FluxElectrical[n + 1 * Np] = uEPy * Ci;
FluxElectrical[n + 2 * Np] = uEPz * Ci;
//Den[n] = Ci;
/* use logistic function to prevent negative distributions*/
//X = 4.0 * (ux + uEPx);
//Y = 4.0 * (uy + uEPy);
//Z = 4.0 * (uz + uEPz);
//factor_x = X / sqrt(1 + X*X);
//factor_y = Y / sqrt(1 + Y*Y);
//factor_z = Z / sqrt(1 + Z*Z);
// q=0
dist[n] = f0 * (1.0 - rlx) + rlx * 0.25 * Ci;
// q = 1
dist[1 * Np + n] =
f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (ux + uEPx));
// f1 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_x);
// q=2
dist[2 * Np + n] =
f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (ux + uEPx));
// f2 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_x);
// q = 3
dist[3 * Np + n] =
f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uy + uEPy));
// f3 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_y);
// q = 4
dist[4 * Np + n] =
f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uy + uEPy));
// f4 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_y);
// q = 5
dist[5 * Np + n] =
f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + 4.0 * (uz + uEPz));
// f5 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 + factor_z);
// q = 6
dist[6 * Np + n] =
f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - 4.0 * (uz + uEPz));
// f6 * (1.0 - rlx) + rlx * 0.125 * Ci * (1.0 - factor_z);
}
}
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion_v0(
double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective,
double *FluxElectrical, double *Velocity, double *ElectricField, double Di,
int zi, double rlx, double Vt, int start, int finish, int Np) {
dvc_ScaLBL_D3Q7_AAeven_Ion_v0<<<NBLOCKS,NTHREADS >>>(dist,
Den, FluxDiffusive, FluxAdvective,
FluxElectrical, Velocity,
ElectricField, Di, zi,
rlx, Vt, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in dvc_ScaLBL_D3Q7_AAeven_Ion_v0: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion_v0(int *neighborList, double *dist,
double *Den, double *FluxDiffusive,
double *FluxAdvective,
double *FluxElectrical, double *Velocity,
double *ElectricField, double Di, int zi,
double rlx, double Vt, int start,
int finish, int Np) {
dvc_ScaLBL_D3Q7_AAodd_Ion_v0<<<NBLOCKS,NTHREADS >>>(neighborList, dist,
Den, FluxDiffusive, FluxAdvective,
FluxElectrical, Velocity,
ElectricField, Di, zi,
rlx, Vt, start,
finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in dvc_ScaLBL_D3Q7_AAodd_Ion_v0: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_IonConcentration(int *neighborList, double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_IonConcentration<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_IonConcentration(double *dist, double *Den, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_IonConcentration<<<NBLOCKS,NTHREADS >>>(dist,Den,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_IonConcentration: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Ion(int *neighborList, double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Ion<<<NBLOCKS,NTHREADS >>>(neighborList,dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Ion(double *dist, double *Den, double *FluxDiffusive, double *FluxAdvective, double *FluxElectrical, double *Velocity, double *ElectricField,
double Di, int zi, double rlx, double Vt, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Ion<<<NBLOCKS,NTHREADS >>>(dist,Den,FluxDiffusive,FluxAdvective,FluxElectrical,Velocity,ElectricField,Di,zi,rlx,Vt,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Ion: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init(double *dist, double *Den, double DenInit, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init<<<NBLOCKS,NTHREADS >>>(dist,Den,DenInit,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_Init_FromFile(double *dist, double *Den, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_Init_FromFile<<<NBLOCKS,NTHREADS >>>(dist,Den,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_Init_FromFile: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Ion_ChargeDensity(double *Den, double *ChargeDensity, double IonValence, int ion_component, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Ion_ChargeDensity<<<NBLOCKS,NTHREADS >>>(Den,ChargeDensity,IonValence,ion_component,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Ion_ChargeDensity: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef(int *membrane, int *Map, double *Distance, double *Psi, double *coef,
double Threshold, double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int memLinks, int Nx, int Ny, int Nz, int Np){
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef<<<NBLOCKS,NTHREADS >>>(membrane, Map, Distance, Psi, coef,
Threshold, MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
memLinks, Nx, Ny, Nz, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo(
const int Cqx, const int Cqy, int const Cqz,
int *Map, double *Distance, double *Psi, double Threshold,
double MassFractionIn, double MassFractionOut, double ThresholdMassFractionIn, double ThresholdMassFractionOut,
int *d3q7_recvlist, int *d3q7_linkList, double *coef, int start, int nlinks, int count,
const int N, const int Nx, const int Ny, const int Nz) {
dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo<<<NBLOCKS,NTHREADS>>>(
Cqx, Cqy, Cqz, Map, Distance, Psi, Threshold,
MassFractionIn, MassFractionOut, ThresholdMassFractionIn, ThresholdMassFractionOut,
d3q7_recvlist, d3q7_linkList, coef, start, nlinks, count, N, Nx, Ny, Nz);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_AssignLinkCoef_halo: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_Unpack(int q,
int *d3q7_recvlist, double *recvbuf, int count,
double *dist, int N, double *coef){
dvc_ScaLBL_D3Q7_Membrane_Unpack<<<NBLOCKS,NTHREADS >>>(q, d3q7_recvlist, recvbuf,count,
dist, N, coef);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_Unpack: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_Membrane_IonTransport(int *membrane, double *coef,
double *dist, double *Den, int memLinks, int Np){
dvc_ScaLBL_D3Q7_Membrane_IonTransport<<<NBLOCKS,NTHREADS >>>(membrane, coef, dist, Den, memLinks, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("CUDA error in dvc_ScaLBL_D3Q7_Membrane_IonTransport: %s \n",hipGetErrorString(err));
}
}

View File

@ -19,8 +19,8 @@
//*************************************************************************
#include "hip/hip_runtime.h"
#define NBLOCKS 560
#define NTHREADS 128
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void INITIALIZE(char *ID, double *f_even, double *f_odd, int Nx, int Ny, int Nz)
{
@ -122,8 +122,7 @@ __global__ void Compute_VELOCITY(char *ID, double *disteven, double *distodd, do
//*************************************************************************
__global__ void
__launch_bounds__(512,2)
D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz,
__launch_bounds__(256,4) D3Q19_MRT(char *ID, double *disteven, double *distodd, int Nx, int Ny, int Nz,
double rlx_setA, double rlx_setB, double Fx, double Fy, double Fz)
{

View File

@ -65,13 +65,11 @@ __global__ void dvc_ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gr
extern "C" void ScaLBL_D3Q19_MixedGradient(int *Map, double *Phi, double *Gradient, int start, int finish, int Np, int Nx, int Ny, int Nz)
{
hipProfilerStart();
dvc_ScaLBL_D3Q19_MixedGradient<<<NBLOCKS,NTHREADS >>>(Map, Phi, Gradient, start, finish, Np, Nx, Ny, Nz);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q19_MixedGradient: %s \n",hipGetErrorString(err));
}
hipProfilerStop();
}

View File

@ -1,332 +0,0 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
int ijk;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//cudaProfilerStart();
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
}
//cudaProfilerStop();
}

705
hip/Poisson.hip Normal file
View File

@ -0,0 +1,705 @@
#include <stdio.h>
#include <math.h>
#include "hip/hip_runtime.h"
#define NBLOCKS 1024
#define NTHREADS 256
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int nread;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
nread = neighborList[n];
fq = dist[nread];
psi += fq;
// q=2
nread = neighborList[n+Np];
fq = dist[nread];
psi += fq;
// q=3
nread = neighborList[n+2*Np];
fq = dist[nread];
psi += fq;
// q = 4
nread = neighborList[n+3*Np];
fq = dist[nread];
psi += fq;
// q=5
nread = neighborList[n+4*Np];
fq = dist[nread];
psi += fq;
// q = 6
nread = neighborList[n+5*Np];
fq = dist[nread];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
double psi;//electric potential
double fq;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
// q=0
fq = dist[n];
psi = fq;
// q=1
fq = dist[2*Np+n];
psi += fq;
// q=2
fq = dist[1*Np+n];
psi += fq;
// q=3
fq = dist[4*Np+n];
psi += fq;
// q=4
fq = dist[3*Np+n];
psi += fq;
// q=5
fq = dist[6*Np+n];
psi += fq;
// q=6
fq = dist[5*Np+n];
psi += fq;
idx=Map[n];
Psi[idx] = psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
int nr1,nr2,nr3,nr4,nr5,nr6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
//rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
rho_e = Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n+Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n+2*Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n+3*Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n+4*Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n+5*Np];
f6 = dist[nr6];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[nr2] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[nr1] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[nr4] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[nr3] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[nr6] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[nr5] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
int n;
double psi;//electric potential
double Ex,Ey,Ez;//electric field
double rho_e;//local charge density
double f0,f1,f2,f3,f4,f5,f6;
double rlx=1.0/tau;
int idx;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = Den_charge[n] / epsilon_LB;
// rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
idx=Map[n];
psi = Psi[idx];
f0 = dist[n];
f1 = dist[2*Np+n];
f2 = dist[1*Np+n];
f3 = dist[4*Np+n];
f4 = dist[3*Np+n];
f5 = dist[6*Np+n];
f6 = dist[5*Np+n];
Ex = (f1-f2)*rlx*4.0;//NOTE the unit of electric field here is V/lu
Ey = (f3-f4)*rlx*4.0;//factor 4.0 is D3Q7 lattice speed of sound
Ez = (f5-f6)*rlx*4.0;
ElectricField[n+0*Np] = Ex;
ElectricField[n+1*Np] = Ey;
ElectricField[n+2*Np] = Ez;
// q = 0
dist[n] = f0*(1.0-rlx) + 0.25*(rlx*psi+rho_e);
// q = 1
dist[1*Np+n] = f1*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 2
dist[2*Np+n] = f2*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 3
dist[3*Np+n] = f3*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 4
dist[4*Np+n] = f4*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 5
dist[5*Np+n] = f5*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
// q = 6
dist[6*Np+n] = f6*(1.0-rlx) + 0.125*(rlx*psi+rho_e);
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
int n;
int ijk;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0*Np+n] = 0.25*Psi[ijk];
dist[1*Np+n] = 0.125*Psi[ijk];
dist[2*Np+n] = 0.125*Psi[ijk];
dist[3*Np+n] = 0.125*Psi[ijk];
dist[4*Np+n] = 0.125*Psi[ijk];
dist[5*Np+n] = 0.125*Psi[ijk];
dist[6*Np+n] = 0.125*Psi[ijk];
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
int nr1, nr2, nr3, nr4, nr5, nr6, nr7, nr8, nr9, nr10, nr11, nr12, nr13,
nr14, nr15, nr16, nr17, nr18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
// q=0
f0 = dist[n];
// q=1
nr1 = neighborList[n]; // neighbor 2 ( > 10Np => odd part of dist)
f1 = dist[nr1]; // reading the f1 data into register fq
nr2 = neighborList[n + Np]; // neighbor 1 ( < 10Np => even part of dist)
f2 = dist[nr2]; // reading the f2 data into register fq
// q=3
nr3 = neighborList[n + 2 * Np]; // neighbor 4
f3 = dist[nr3];
// q = 4
nr4 = neighborList[n + 3 * Np]; // neighbor 3
f4 = dist[nr4];
// q=5
nr5 = neighborList[n + 4 * Np];
f5 = dist[nr5];
// q = 6
nr6 = neighborList[n + 5 * Np];
f6 = dist[nr6];
// q=7
nr7 = neighborList[n + 6 * Np];
f7 = dist[nr7];
// q = 8
nr8 = neighborList[n + 7 * Np];
f8 = dist[nr8];
// q=9
nr9 = neighborList[n + 8 * Np];
f9 = dist[nr9];
// q = 10
nr10 = neighborList[n + 9 * Np];
f10 = dist[nr10];
// q=11
nr11 = neighborList[n + 10 * Np];
f11 = dist[nr11];
// q=12
nr12 = neighborList[n + 11 * Np];
f12 = dist[nr12];
// q=13
nr13 = neighborList[n + 12 * Np];
f13 = dist[nr13];
// q=14
nr14 = neighborList[n + 13 * Np];
f14 = dist[nr14];
// q=15
nr15 = neighborList[n + 14 * Np];
f15 = dist[nr15];
// q=16
nr16 = neighborList[n + 15 * Np];
f16 = dist[nr16];
// q=17
//fq = dist[18*Np+n];
nr17 = neighborList[n + 16 * Np];
f17 = dist[nr17];
// q=18
nr18 = neighborList[n + 17 * Np];
f18 = dist[nr18];
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
// q = 0
dist[n] = W0*psi; //f0 * (1.0 - rlx) - (1.0-0.5*rlx)*W0*rho_e;
// q = 1
dist[nr2] = W1*psi; //f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[nr1] = W1*psi; //f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[nr4] = W1*psi; //f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[nr3] = W1*psi; //f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[nr6] = W1*psi; //f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[nr5] = W1*psi; //f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
//........................................................................
// q = 7
dist[nr8] = W2*psi; //f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 8
dist[nr7] = W2*psi; //f8 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 9
dist[nr10] = W2*psi; //f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 10
dist[nr9] = W2*psi; //f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 11
dist[nr12] = W2*psi; //f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 12
dist[nr11] = W2*psi; //f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 13
dist[nr14] = W2*psi; //f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q= 14
dist[nr13] = W2*psi; //f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 15
dist[nr16] = W2*psi; //f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 16
dist[nr15] = W2*psi; //f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 17
dist[nr18] = W2*psi; //f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
// q = 18
dist[nr17] = W2*psi;
}
}
}
__global__ void dvc_ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
int n;
double psi; //electric potential
double Ex, Ey, Ez; //electric field
double rho_e; //local charge density
double f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15,
f16, f17, f18;
double error,sum_q;
double rlx = 1.0 / tau;
int idx;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
//Load data
//When Helmholtz-Smoluchowski slipping velocity BC is used, the bulk fluid is considered as electroneutral
//and thus the net space charge density is zero.
rho_e = (UseSlippingVelBC==1) ? 0.0 : Den_charge[n] / epsilon_LB;
f0 = dist[n];
f1 = dist[2 * Np + n];
f2 = dist[1 * Np + n];
f3 = dist[4 * Np + n];
f4 = dist[3 * Np + n];
f5 = dist[6 * Np + n];
f6 = dist[5 * Np + n];
f7 = dist[8 * Np + n];
f8 = dist[7 * Np + n];
f9 = dist[10 * Np + n];
f10 = dist[9 * Np + n];
f11 = dist[12 * Np + n];
f12 = dist[11 * Np + n];
f13 = dist[14 * Np + n];
f14 = dist[13 * Np + n];
f15 = dist[16 * Np + n];
f16 = dist[15 * Np + n];
f17 = dist[18 * Np + n];
f18 = dist[17 * Np + n];
Ex = (f1 - f2 + 0.5*(f7 - f8 + f9 - f10 + f11 - f12 + f13 - f14))*4.0; //NOTE the unit of electric field here is V/lu
Ey = (f3 - f4 + 0.5*(f7 - f8 - f9 + f10 + f15 - f16 + f17 - f18))*4.0;
Ez = (f5 - f6 + 0.5*(f11 - f12 - f13 + f14 + f15 - f16 - f17 + f18))*4.0;
ElectricField[n + 0 * Np] = Ex;
ElectricField[n + 1 * Np] = Ey;
ElectricField[n + 2 * Np] = Ez;
sum_q = f1+f2+f3+f4+f5+f6+f7+f8+f9+f10+f11+f12+f13+f14+f15+f16+f17+f18;
error = 8.0*(sum_q - f0) + rho_e;
psi = 2.0*(f0*(1.0 - rlx) + rlx*(sum_q + 0.125*rho_e));
idx = Map[n];
Psi[idx] = psi;
// q = 0
dist[n] = W0*psi;//
// q = 1
dist[1 * Np + n] = W1*psi;//f1 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 2
dist[2 * Np + n] = W1*psi;//f2 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 3
dist[3 * Np + n] = W1*psi;//f3 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 4
dist[4 * Np + n] = W1*psi;//f4 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 5
dist[5 * Np + n] = W1*psi;//f5 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
// q = 6
dist[6 * Np + n] = W1*psi;//f6 * (1.0 - rlx) +W1* (rlx * psi) - (1.0-0.5*rlx)*0.05555555555555555*rho_e;
dist[7 * Np + n] = W2*psi;//f7 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[8 * Np + n] = W2*psi;//f8* (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[9 * Np + n] = W2*psi;//f9 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[10 * Np + n] = W2*psi;//f10 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[11 * Np + n] = W2*psi;//f11 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[12 * Np + n] = W2*psi;//f12 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[13 * Np + n] = W2*psi;//f13 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[14 * Np + n] = W2*psi;//f14 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[15 * Np + n] = W2*psi;//f15 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[16 * Np + n] = W2*psi;//f16 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[17 * Np + n] = W2*psi;//f17 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
dist[18 * Np + n] = W2*psi;//f18 * (1.0 - rlx) +W2* (rlx * psi) - (1.0-0.5*rlx)*0.02777777777777778*rho_e;
//........................................................................
}
}
}
__global__ void dvc_ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np) {
int n;
int ijk;
double W0 = 0.5;
double W1 = 1.0/24.0;
double W2 = 1.0/48.0;
int S = Np/NBLOCKS/NTHREADS + 1;
for (int s=0; s<S; s++){
//........Get 1-D index for this thread....................
n = S*blockIdx.x*blockDim.x + s*blockDim.x + threadIdx.x + start;
if (n<finish) {
ijk = Map[n];
dist[0 * Np + n] = W0 * Psi[ijk];//3333333333333333* Psi[ijk];
dist[1 * Np + n] = W1 * Psi[ijk];
dist[2 * Np + n] = W1 * Psi[ijk];
dist[3 * Np + n] = W1 * Psi[ijk];
dist[4 * Np + n] = W1 * Psi[ijk];
dist[5 * Np + n] = W1 * Psi[ijk];
dist[6 * Np + n] = W1 * Psi[ijk];
dist[7 * Np + n] = W2* Psi[ijk];
dist[8 * Np + n] = W2* Psi[ijk];
dist[9 * Np + n] = W2* Psi[ijk];
dist[10 * Np + n] = W2* Psi[ijk];
dist[11 * Np + n] = W2* Psi[ijk];
dist[12 * Np + n] = W2* Psi[ijk];
dist[13 * Np + n] = W2* Psi[ijk];
dist[14 * Np + n] = W2* Psi[ijk];
dist[15 * Np + n] = W2* Psi[ijk];
dist[16 * Np + n] = W2* Psi[ijk];
dist[17 * Np + n] = W2* Psi[ijk];
dist[18 * Np + n] = W2* Psi[ijk];
}
}
}
extern "C" void ScaLBL_D3Q19_AAodd_Poisson(int *neighborList, int *Map,
double *dist, double *Den_charge,
double *Psi, double *ElectricField,
double tau, double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAodd_Poisson, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList, Map,
dist, Den_charge, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("Hip error in dvc_ScaLBL_D3Q19_AAodd_Poisson: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_AAeven_Poisson(int *Map, double *dist,
double *Den_charge, double *Psi,
double *ElectricField, double *Error, double tau,
double epsilon_LB, bool UseSlippingVelBC,
int start, int finish, int Np) {
hipFuncSetCacheConfig( (void*) dvc_ScaLBL_D3Q19_AAeven_Poisson, hipFuncCachePreferL1);
dvc_ScaLBL_D3Q19_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>( Map, dist, Den_charge, Psi,
ElectricField, Error, tau, epsilon_LB, UseSlippingVelBC, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("Hip error in dvc_ScaLBL_D3Q19_AAeven_Poisson: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q19_Poisson_Init(int *Map, double *dist, double *Psi,
int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q19_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map, dist, Psi, start, finish, Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("Hip error in ScaLBL_D3Q19_Poisson_Init: %s \n",hipGetErrorString(err));
}
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(int *neighborList,int *Map, double *dist, double *Psi, int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAodd_Poisson(int *neighborList, int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAodd_Poisson<<<NBLOCKS,NTHREADS >>>(neighborList,Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAodd_Poisson: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_AAeven_Poisson(int *Map, double *dist, double *Den_charge, double *Psi, double *ElectricField, double tau, double epsilon_LB,bool UseSlippingVelBC,int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_AAeven_Poisson<<<NBLOCKS,NTHREADS >>>(Map,dist,Den_charge,Psi,ElectricField,tau,epsilon_LB,UseSlippingVelBC,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_AAeven_Poisson: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}
extern "C" void ScaLBL_D3Q7_Poisson_Init(int *Map, double *dist, double *Psi, int start, int finish, int Np){
//hipProfilerStart();
dvc_ScaLBL_D3Q7_Poisson_Init<<<NBLOCKS,NTHREADS >>>(Map,dist,Psi,start,finish,Np);
hipError_t err = hipGetLastError();
if (hipSuccess != err){
printf("hip error in ScaLBL_D3Q7_Poisson_Init: %s \n",hipGetErrorString(err));
}
//hipProfilerStop();
}

538
models/BGKModel.cpp Normal file
View File

@ -0,0 +1,538 @@
/*
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
Copyright Equnior ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Multi-relaxation time LBM Model
*/
#include "models/BGKModel.h"
#include "analysis/distance.h"
#include "common/ReadMicroCT.h"
ScaLBL_BGKModel::ScaLBL_BGKModel(int RANK, int NP, const Utilities::MPI &COMM)
: rank(RANK), nprocs(NP), Restart(0), timestep(0), timestepMax(0), tau(0),
Fx(0), Fy(0), Fz(0), flux(0), din(0), dout(0), mu(0), Nx(0), Ny(0), Nz(0),
N(0), Np(0), nprocx(0), nprocy(0), nprocz(0), BoundaryCondition(0), Lx(0),
Ly(0), Lz(0), comm(COMM) {}
ScaLBL_BGKModel::~ScaLBL_BGKModel() {}
void ScaLBL_BGKModel::ReadParams(string filename) {
// read the input database
db = std::make_shared<Database>(filename);
domain_db = db->getDatabase("Domain");
mrt_db = db->getDatabase("BGK");
vis_db = db->getDatabase("Visualization");
tau = 1.0;
timestepMax = 100000;
ANALYSIS_INTERVAL = 1000;
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
dout = 1.0;
din = 1.0;
// Color Model parameters
if (mrt_db->keyExists("timestepMax")) {
timestepMax = mrt_db->getScalar<int>("timestepMax");
}
if (mrt_db->keyExists("analysis_interval")) {
ANALYSIS_INTERVAL = mrt_db->getScalar<int>("analysis_interval");
}
if (mrt_db->keyExists("tolerance")) {
tolerance = mrt_db->getScalar<double>("tolerance");
}
if (mrt_db->keyExists("tau")) {
tau = mrt_db->getScalar<double>("tau");
}
if (mrt_db->keyExists("F")) {
Fx = mrt_db->getVector<double>("F")[0];
Fy = mrt_db->getVector<double>("F")[1];
Fz = mrt_db->getVector<double>("F")[2];
}
if (mrt_db->keyExists("Restart")) {
Restart = mrt_db->getScalar<bool>("Restart");
}
if (mrt_db->keyExists("din")) {
din = mrt_db->getScalar<double>("din");
}
if (mrt_db->keyExists("dout")) {
dout = mrt_db->getScalar<double>("dout");
}
if (mrt_db->keyExists("flux")) {
flux = mrt_db->getScalar<double>("flux");
}
// Read domain parameters
if (mrt_db->keyExists("BoundaryCondition")) {
BoundaryCondition = mrt_db->getScalar<int>("BC");
} else if (domain_db->keyExists("BC")) {
BoundaryCondition = domain_db->getScalar<int>("BC");
}
mu = (tau - 0.5) / 3.0;
}
void ScaLBL_BGKModel::SetDomain() {
Dm = std::shared_ptr<Domain>(
new Domain(domain_db, comm)); // full domain for analysis
Mask = std::shared_ptr<Domain>(
new Domain(domain_db, comm)); // mask domain removes immobile phases
// domain parameters
Nx = Dm->Nx;
Ny = Dm->Ny;
Nz = Dm->Nz;
Lx = Dm->Lx;
Ly = Dm->Ly;
Lz = Dm->Lz;
N = Nx * Ny * Nz;
Distance.resize(Nx, Ny, Nz);
Velocity_x.resize(Nx, Ny, Nz);
Velocity_y.resize(Nx, Ny, Nz);
Velocity_z.resize(Nx, Ny, Nz);
for (int i = 0; i < Nx * Ny * Nz; i++)
Dm->id[i] = 1; // initialize this way
//Averages = std::shared_ptr<TwoPhase> ( new TwoPhase(Dm) ); // TwoPhase analysis object
comm.barrier();
Dm->CommInit();
comm.barrier();
rank = Dm->rank();
nprocx = Dm->nprocx();
nprocy = Dm->nprocy();
nprocz = Dm->nprocz();
}
void ScaLBL_BGKModel::ReadInput() {
sprintf(LocalRankString, "%05d", Dm->rank());
sprintf(LocalRankFilename, "%s%s", "ID.", LocalRankString);
sprintf(LocalRestartFile, "%s%s", "Restart.", LocalRankString);
if (domain_db->keyExists("Filename")) {
auto Filename = domain_db->getScalar<std::string>("Filename");
Mask->Decomp(Filename);
} else if (domain_db->keyExists("GridFile")) {
// Read the local domain data
auto input_id = readMicroCT(*domain_db, comm);
// Fill the halo (assuming GCW of 1)
array<int, 3> size0 = {(int)input_id.size(0), (int)input_id.size(1),
(int)input_id.size(2)};
ArraySize size1 = {(size_t)Mask->Nx, (size_t)Mask->Ny,
(size_t)Mask->Nz};
ASSERT((int)size1[0] == size0[0] + 2 && (int)size1[1] == size0[1] + 2 &&
(int)size1[2] == size0[2] + 2);
fillHalo<signed char> fill(comm, Mask->rank_info, size0, {1, 1, 1}, 0,
1);
Array<signed char> id_view;
id_view.viewRaw(size1, Mask->id.data());
fill.copy(input_id, id_view);
fill.fill(id_view);
} else {
Mask->ReadIDs();
}
// Generate the signed distance map
// Initialize the domain and communication
Array<char> id_solid(Nx, Ny, Nz);
// Solve for the position of the solid phase
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
int n = k * Nx * Ny + j * Nx + i;
// Initialize the solid phase
if (Mask->id[n] > 0)
id_solid(i, j, k) = 1;
else
id_solid(i, j, k) = 0;
}
}
}
// Initialize the signed distance function
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
// Initialize distance to +/- 1
Distance(i, j, k) = 2.0 * double(id_solid(i, j, k)) - 1.0;
}
}
}
// MeanFilter(Averages->SDs);
if (rank == 0)
printf("Initialized solid phase -- Converting to Signed Distance "
"function \n");
CalcDist(Distance, id_solid, *Dm);
if (rank == 0)
cout << "Domain set." << endl;
}
void ScaLBL_BGKModel::Create() {
/*
* This function creates the variables needed to run a LBM
*/
int rank = Mask->rank();
//.........................................................
// Initialize communication structures in averaging domain
for (int i = 0; i < Nx * Ny * Nz; i++)
Dm->id[i] = Mask->id[i];
Mask->CommInit();
Np = Mask->PoreCount();
//...........................................................................
if (rank == 0)
printf("Create ScaLBL_Communicator \n");
// Create a communicator for the device (will use optimized layout)
// ScaLBL_Communicator ScaLBL_Comm(Mask); // original
ScaLBL_Comm =
std::shared_ptr<ScaLBL_Communicator>(new ScaLBL_Communicator(Mask));
int Npad = (Np / 16 + 2) * 16;
if (rank == 0)
printf("Set up memory efficient layout \n");
Map.resize(Nx, Ny, Nz);
Map.fill(-2);
auto neighborList = new int[18 * Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map, neighborList,
Mask->id.data(), Np, 1);
comm.barrier();
//...........................................................................
// MAIN VARIABLES ALLOCATED HERE
//...........................................................................
// LBM variables
if (rank == 0)
printf("Allocating distributions \n");
//......................device distributions.................................
int dist_mem_size = Np * sizeof(double);
int neighborSize = 18 * (Np * sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **)&fq, 19 * dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **)&Pressure, sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Velocity, 3 * sizeof(double) * Np);
//...........................................................................
// Update GPU data structures
if (rank == 0)
printf("Setting up device map and neighbor list \n");
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
comm.barrier();
double MLUPS = ScaLBL_Comm->GetPerformance(NeighborList, fq, Np);
printf(" MLPUS=%f from rank %i\n", MLUPS, rank);
}
void ScaLBL_BGKModel::Initialize() {
/*
* This function initializes model
*/
if (rank == 0)
printf("Initializing distributions \n");
ScaLBL_D3Q19_Init(fq, Np);
}
void ScaLBL_BGKModel::Run() {
double rlx = 1.0 / tau;
Minkowski Morphology(Mask);
if (rank == 0) {
bool WriteHeader = false;
FILE *log_file = fopen("Permeability.csv", "r");
if (log_file != NULL)
fclose(log_file);
else
WriteHeader = true;
if (WriteHeader) {
log_file = fopen("Permeability.csv", "a+");
fprintf(log_file, "time Fx Fy Fz mu Vs As Js Xs vx vy vz k\n");
fclose(log_file);
}
}
//.......create and start timer............
ScaLBL_DeviceBarrier();
comm.barrier();
if (rank == 0)
printf("Beginning AA timesteps, timestepMax = %i \n", timestepMax);
if (rank == 0)
printf("********************************************************\n");
timestep = 0;
double error = 1.0;
double flow_rate_previous = 0.0;
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
/* timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAodd_BGK(NeighborList, dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrier(comm);
timestep++;
ScaLBL_Comm.SendD3Q19AA(dist); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_BGK(dist, ScaLBL_Comm.first_interior, ScaLBL_Comm.last_interior, Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm.RecvD3Q19AA(dist); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAeven_BGK(dist, 0, ScaLBL_Comm.next, Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier(); MPI_Barrie
*/
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_BGK(NeighborList, fq, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3) {
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 4) {
din =
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 5) {
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAodd_BGK(NeighborList, fq, 0, ScaLBL_Comm->LastExterior(),
Np, rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier();
comm.barrier();
timestep++;
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
ScaLBL_D3Q19_AAeven_BGK(fq, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np, rlx, Fx, Fy, Fz);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// Set boundary conditions
if (BoundaryCondition == 3) {
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, din, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 4) {
din =
ScaLBL_Comm->D3Q19_Flux_BC_z(NeighborList, fq, flux, timestep);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, dout, timestep);
} else if (BoundaryCondition == 5) {
ScaLBL_Comm->D3Q19_Reflection_BC_z(fq);
ScaLBL_Comm->D3Q19_Reflection_BC_Z(fq);
}
ScaLBL_D3Q19_AAeven_BGK(fq, 0, ScaLBL_Comm->LastExterior(), Np,
rlx, Fx, Fy, Fz);
ScaLBL_DeviceBarrier();
comm.barrier();
//************************************************************************/
if (timestep % ANALYSIS_INTERVAL == 0) {
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
ScaLBL_DeviceBarrier();
comm.barrier();
ScaLBL_Comm->RegularLayout(Map, &Velocity[0], Velocity_x);
ScaLBL_Comm->RegularLayout(Map, &Velocity[Np], Velocity_y);
ScaLBL_Comm->RegularLayout(Map, &Velocity[2 * Np], Velocity_z);
double count_loc = 0;
double count;
double vax, vay, vaz;
double vax_loc, vay_loc, vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
if (Distance(i, j, k) > 0) {
vax_loc += Velocity_x(i, j, k);
vay_loc += Velocity_y(i, j, k);
vaz_loc += Velocity_z(i, j, k);
count_loc += 1.0;
}
}
}
}
vax = Dm->Comm.sumReduce(vax_loc);
vay = Dm->Comm.sumReduce(vay_loc);
vaz = Dm->Comm.sumReduce(vaz_loc);
count = Dm->Comm.sumReduce(count_loc);
vax /= count;
vay /= count;
vaz /= count;
double force_mag = sqrt(Fx * Fx + Fy * Fy + Fz * Fz);
double dir_x = Fx / force_mag;
double dir_y = Fy / force_mag;
double dir_z = Fz / force_mag;
if (force_mag == 0.0) {
// default to z direction
dir_x = 0.0;
dir_y = 0.0;
dir_z = 1.0;
force_mag = 1.0;
}
double flow_rate = (vax * dir_x + vay * dir_y + vaz * dir_z);
error = fabs(flow_rate - flow_rate_previous) / fabs(flow_rate);
flow_rate_previous = flow_rate;
//if (rank==0) printf("Computing Minkowski functionals \n");
Morphology.ComputeScalar(Distance, 0.f);
//Morphology.PrintAll();
double mu = (tau - 0.5) / 3.f;
double Vs = Morphology.V();
double As = Morphology.A();
double Hs = Morphology.H();
double Xs = Morphology.X();
Vs = Dm->Comm.sumReduce(Vs);
As = Dm->Comm.sumReduce(As);
Hs = Dm->Comm.sumReduce(Hs);
Xs = Dm->Comm.sumReduce(Xs);
double h = Dm->voxel_length;
double absperm =
h * h * mu * Mask->Porosity() * flow_rate / force_mag;
if (rank == 0) {
printf(" %f\n", absperm);
FILE *log_file = fopen("Permeability.csv", "a");
fprintf(log_file,
"%i %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g "
"%.8g %.8g\n",
timestep, Fx, Fy, Fz, mu, h * h * h * Vs, h * h * As,
h * Hs, Xs, vax, vay, vaz, absperm);
fclose(log_file);
}
}
}
//************************************************************************/
if (rank == 0)
printf("---------------------------------------------------------------"
"----\n");
// Compute the walltime per timestep
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>(t2 - t1).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np) / cputime / 1000000;
if (rank == 0)
printf("********************************************************\n");
if (rank == 0)
printf("CPU time = %f \n", cputime);
if (rank == 0)
printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank == 0)
printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank == 0)
printf("********************************************************\n");
}
void ScaLBL_BGKModel::VelocityField() {
auto format = vis_db->getWithDefault<string>("format", "silo");
/* memcpy(Morphology.SDn.data(), Distance.data(), Nx*Ny*Nz*sizeof(double));
Morphology.Initialize();
Morphology.UpdateMeshValues();
Morphology.ComputeLocal();
Morphology.Reduce();
double count_loc=0;
double count;
double vax,vay,vaz;
double vax_loc,vay_loc,vaz_loc;
vax_loc = vay_loc = vaz_loc = 0.f;
for (int n=0; n<ScaLBL_Comm->LastExterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
for (int n=ScaLBL_Comm->FirstInterior(); n<ScaLBL_Comm->LastInterior(); n++){
vax_loc += VELOCITY[n];
vay_loc += VELOCITY[Np+n];
vaz_loc += VELOCITY[2*Np+n];
count_loc+=1.0;
}
MPI_Allreduce(&vax_loc,&vax,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vay_loc,&vay,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&vaz_loc,&vaz,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
MPI_Allreduce(&count_loc,&count,1,MPI_DOUBLE,MPI_SUM,Mask->Comm);
vax /= count;
vay /= count;
vaz /= count;
double mu = (tau-0.5)/3.f;
if (rank==0) printf("Fx Fy Fz mu Vs As Js Xs vx vy vz\n");
if (rank==0) printf("%.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g %.8g\n",Fx, Fy, Fz, mu,
Morphology.V(),Morphology.A(),Morphology.J(),Morphology.X(),vax,vay,vaz);
*/
vis_db = db->getDatabase("Visualization");
if (vis_db->getWithDefault<bool>("write_silo", false)) {
std::vector<IO::MeshDataStruct> visData;
fillHalo<double> fillData(Dm->Comm, Dm->rank_info,
{Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2},
{1, 1, 1}, 0, 1);
auto VxVar = std::make_shared<IO::Variable>();
auto VyVar = std::make_shared<IO::Variable>();
auto VzVar = std::make_shared<IO::Variable>();
auto SignDistVar = std::make_shared<IO::Variable>();
IO::initialize("", format, "false");
// Create the MeshDataStruct
visData.resize(1);
visData[0].meshName = "domain";
visData[0].mesh = std::make_shared<IO::DomainMesh>(
Dm->rank_info, Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2, Dm->Lx, Dm->Ly,
Dm->Lz);
SignDistVar->name = "SignDist";
SignDistVar->type = IO::VariableType::VolumeVariable;
SignDistVar->dim = 1;
SignDistVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(SignDistVar);
VxVar->name = "Velocity_x";
VxVar->type = IO::VariableType::VolumeVariable;
VxVar->dim = 1;
VxVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(VxVar);
VyVar->name = "Velocity_y";
VyVar->type = IO::VariableType::VolumeVariable;
VyVar->dim = 1;
VyVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(VyVar);
VzVar->name = "Velocity_z";
VzVar->type = IO::VariableType::VolumeVariable;
VzVar->dim = 1;
VzVar->data.resize(Dm->Nx - 2, Dm->Ny - 2, Dm->Nz - 2);
visData[0].vars.push_back(VzVar);
Array<double> &SignData = visData[0].vars[0]->data;
Array<double> &VelxData = visData[0].vars[1]->data;
Array<double> &VelyData = visData[0].vars[2]->data;
Array<double> &VelzData = visData[0].vars[3]->data;
ASSERT(visData[0].vars[0]->name == "SignDist");
ASSERT(visData[0].vars[1]->name == "Velocity_x");
ASSERT(visData[0].vars[2]->name == "Velocity_y");
ASSERT(visData[0].vars[3]->name == "Velocity_z");
fillData.copy(Distance, SignData);
fillData.copy(Velocity_x, VelxData);
fillData.copy(Velocity_y, VelyData);
fillData.copy(Velocity_z, VelzData);
IO::writeData(timestep, visData, Dm->Comm);
}
}

94
models/BGKModel.h Normal file
View File

@ -0,0 +1,94 @@
/*
Copyright 2013--2018 James E. McClure, Virginia Polytechnic & State University
Copyright Equnior ASA
This file is part of the Open Porous Media project (OPM).
OPM is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OPM is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with OPM. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* Multi-relaxation time LBM Model
*/
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <iostream>
#include <exception>
#include <stdexcept>
#include <fstream>
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/MPI.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
class ScaLBL_BGKModel {
public:
ScaLBL_BGKModel(int RANK, int NP, const Utilities::MPI &COMM);
~ScaLBL_BGKModel();
// functions in they should be run
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void ReadInput();
void Create();
void Initialize();
void Run();
void VelocityField();
bool Restart, pBC;
int timestep, timestepMax;
int ANALYSIS_INTERVAL;
int BoundaryCondition;
double tau, mu;
double Fx, Fy, Fz, flux;
double din, dout;
double tolerance;
int Nx, Ny, Nz, N, Np;
int rank, nprocx, nprocy, nprocz, nprocs;
double Lx, Ly, Lz;
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> mrt_db;
std::shared_ptr<Database> vis_db;
IntArray Map;
DoubleArray Distance;
int *NeighborList;
double *fq;
double *Velocity;
double *Pressure;
//Minkowski Morphology;
DoubleArray Velocity_x;
DoubleArray Velocity_y;
DoubleArray Velocity_z;
private:
Utilities::MPI comm;
// filenames
char LocalRankString[8];
char LocalRankFilename[40];
char LocalRestartFile[40];
//int rank,nprocs;
void LoadParams(std::shared_ptr<Database> db0);
};

View File

@ -115,8 +115,7 @@ void ScaLBL_ColorModel::ReadParams(string filename) {
inletB = 0.f;
outletA = 0.f;
outletB = 1.f;
BoundaryCondition = 0;
if (color_db->keyExists("BC")) {
BoundaryCondition = color_db->getScalar<int>("BC");
@ -388,6 +387,10 @@ void ScaLBL_ColorModel::AssignComponentLabels(double *phase) {
AFFINITY, volume_fraction);
}
}
// clean up
delete [] label_count;
delete [] label_count_global;
}
void ScaLBL_ColorModel::Create() {
@ -483,12 +486,22 @@ void ScaLBL_ColorModel::Create() {
// copy the neighbor list
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
ScaLBL_Comm->Barrier();
delete[] neighborList;
// initialize phi based on PhaseLabel (include solid component labels)
double *PhaseLabel;
PhaseLabel = new double[N];
PhaseLabel = new double[Nx*Ny*Nz];
ScaLBL_Comm->Barrier();
AssignComponentLabels(PhaseLabel);
ScaLBL_CopyToDevice(Phi, PhaseLabel, N * sizeof(double));
ScaLBL_Comm->Barrier();
ScaLBL_CopyToDevice(Phi, PhaseLabel, Nx*Ny*Nz * sizeof(double));
ScaLBL_Comm->Barrier();
if (rank == 0)
printf("Model created \n");
delete[] PhaseLabel;
}
@ -815,28 +828,28 @@ double ScaLBL_ColorModel::Run(int returntime) {
}
if (TRIGGER_FORCE_RESCALE) {
RESCALE_FORCE = false;
TRIGGER_FORCE_RESCALE = false;
double RESCALE_FORCE_FACTOR = capillary_number / Ca;
if (RESCALE_FORCE_FACTOR > 2.0)
RESCALE_FORCE_FACTOR = 2.0;
if (RESCALE_FORCE_FACTOR < 0.5)
RESCALE_FORCE_FACTOR = 0.5;
Fx *= RESCALE_FORCE_FACTOR;
Fy *= RESCALE_FORCE_FACTOR;
Fz *= RESCALE_FORCE_FACTOR;
force_mag = sqrt(Fx * Fx + Fy * Fy + Fz * Fz);
if (force_mag > 1e-3) {
Fx *= 1e-3 / force_mag; // impose ceiling for stability
Fy *= 1e-3 / force_mag;
Fz *= 1e-3 / force_mag;
}
if (rank == 0)
printf(" -- adjust force by factor %f \n ",
capillary_number / Ca);
Averages->SetParams(rhoA, rhoB, tauA, tauB, Fx, Fy, Fz, alpha,
beta);
color_db->putVector<double>("F", {Fx, Fy, Fz});
RESCALE_FORCE = false;
TRIGGER_FORCE_RESCALE = false;
double RESCALE_FORCE_FACTOR = capillary_number / Ca;
if (RESCALE_FORCE_FACTOR > 2.0)
RESCALE_FORCE_FACTOR = 2.0;
if (RESCALE_FORCE_FACTOR < 0.5)
RESCALE_FORCE_FACTOR = 0.5;
Fx *= RESCALE_FORCE_FACTOR;
Fy *= RESCALE_FORCE_FACTOR;
Fz *= RESCALE_FORCE_FACTOR;
force_mag = sqrt(Fx * Fx + Fy * Fy + Fz * Fz);
if (force_mag > 1e-3) {
Fx *= 1e-3 / force_mag; // impose ceiling for stability
Fy *= 1e-3 / force_mag;
Fz *= 1e-3 / force_mag;
}
if (rank == 0)
printf(" -- adjust force by factor %f \n ",
capillary_number / Ca);
Averages->SetParams(rhoA, rhoB, tauA, tauB, Fx, Fy, Fz, alpha,
beta);
color_db->putVector<double>("F", {Fx, Fy, Fz});
}
if (isSteady) {
Averages->Full();

View File

@ -13,7 +13,20 @@ ScaLBL_IonModel::ScaLBL_IonModel(int RANK, int NP, const Utilities::MPI &COMM)
nprocy(0), nprocz(0), fluidVelx_dummy(0), fluidVely_dummy(0),
fluidVelz_dummy(0), BoundaryConditionInlet(0), BoundaryConditionOutlet(0),
BoundaryConditionSolid(0), Lx(0), Ly(0), Lz(0), comm(COMM) {}
ScaLBL_IonModel::~ScaLBL_IonModel() {}
ScaLBL_IonModel::~ScaLBL_IonModel() {
ScaLBL_FreeDeviceMemory(NeighborList);
ScaLBL_FreeDeviceMemory(dvcMap);
ScaLBL_FreeDeviceMemory(fq);
ScaLBL_FreeDeviceMemory(Ci);
ScaLBL_FreeDeviceMemory(ChargeDensity);
ScaLBL_FreeDeviceMemory(FluxDiffusive);
ScaLBL_FreeDeviceMemory(FluxAdvective);
ScaLBL_FreeDeviceMemory(FluxElectrical);
ScaLBL_FreeDeviceMemory(IonSolid);
ScaLBL_FreeDeviceMemory(FluidVelocityDummy);
}
void ScaLBL_IonModel::ReadParams(string filename, vector<int> &num_iter) {
@ -48,8 +61,13 @@ void ScaLBL_IonModel::ReadParams(string filename, vector<int> &num_iter) {
Ex_dummy = 0.0; //for debugging, unit [V/m]
Ey_dummy = 0.0; //for debugging, unit [V/m]
Ez_dummy = 0.0; //for debugging, unit [V/m]
sprintf(LocalRankString, "%05d", rank);
sprintf(LocalRestartFile, "%s%s", "Restart.", LocalRankString);
//--------------------------------------------------------------------------//
BoundaryConditionSolid = 0;
// Read domain parameters
if (domain_db->keyExists("voxel_length")) { //default unit: um/lu
h = domain_db->getScalar<double>("voxel_length");
@ -170,7 +188,7 @@ void ScaLBL_IonModel::ReadParams(string filename, vector<int> &num_iter) {
BoundaryConditionSolid = ion_db->getScalar<int>("BC_Solid");
}
// Read boundary condition for ion transport
// BC = 0: normal periodic BC
// BC = 0: zero-flux bounce-back BC
// BC = 1: fixed ion concentration; unit=[mol/m^3]
// BC = 2: fixed ion flux (inward flux); unit=[mol/m^2/sec]
BoundaryConditionInlet.push_back(0);
@ -282,7 +300,7 @@ void ScaLBL_IonModel::ReadParams(string filename, vector<int> &num_iter) {
void ScaLBL_IonModel::ReadParams(string filename) {
//NOTE: the maximum iteration timesteps for ions are left unspecified
// it relies on the multiphys controller to compute the max timestep
USE_MEMBRANE = true;
// read the input database
db = std::make_shared<Database>(filename);
domain_db = db->getDatabase("Domain");
@ -314,13 +332,18 @@ void ScaLBL_IonModel::ReadParams(string filename) {
Ex_dummy = 0.0; //for debugging, unit [V/m]
Ey_dummy = 0.0; //for debugging, unit [V/m]
Ez_dummy = 0.0; //for debugging, unit [V/m]
sprintf(LocalRankString, "%05d", rank);
sprintf(LocalRestartFile, "%s%s", "Restart.", LocalRankString);
//--------------------------------------------------------------------------//
// Read domain parameters
if (domain_db->keyExists("voxel_length")) { //default unit: um/lu
h = domain_db->getScalar<double>("voxel_length");
}
if (ion_db->keyExists("use_membrane")) {
USE_MEMBRANE = ion_db->getScalar<bool>("use_membrane");
}
// LB-Ion Model parameters
//if (ion_db->keyExists( "timestepMax" )){
// timestepMax = ion_db->getScalar<int>( "timestepMax" );
@ -328,6 +351,9 @@ void ScaLBL_IonModel::ReadParams(string filename) {
if (ion_db->keyExists("tolerance")) {
tolerance = ion_db->getScalar<double>("tolerance");
}
if (ion_db->keyExists("Restart")) {
Restart = ion_db->getScalar<bool>("Restart");
}
if (ion_db->keyExists("temperature")) {
T = ion_db->getScalar<int>("temperature");
//re-calculate thermal voltage
@ -421,14 +447,113 @@ void ScaLBL_IonModel::ReadParams(string filename) {
1.0e-18); //LB ion concentration has unit [mol/lu^3]
}
}
if (USE_MEMBRANE){
membrane_db = db->getDatabase("Membrane");
/* get membrane permeability parameters*/
if (membrane_db->keyExists("MassFractionIn")) {
if (rank == 0) printf(".... Read membrane permeability (MassFractionIn) \n");
MassFractionIn.clear();
MassFractionIn = membrane_db->getVector<double>("MassFractionIn");
if (MassFractionIn.size() != number_ion_species) {
ERROR("Error: number_ion_species and membrane permeability (MassFractionIn) must be "
"the same length! \n");
}
}
else{
MassFractionIn.resize(IonConcentration.size());
for (size_t i = 0; i < IonConcentration.size(); i++) {
MassFractionIn[i] = 0.0;
}
}
if (membrane_db->keyExists("MassFractionOut")) {
if (rank == 0) printf(".... Read membrane permeability (MassFractionOut) \n");
MassFractionOut.clear();
MassFractionOut = membrane_db->getVector<double>("MassFractionOut");
if (MassFractionIn.size() != number_ion_species) {
ERROR("Error: number_ion_species and membrane permeability (MassFractionOut) must be "
"the same length! \n");
}
}
else{
MassFractionOut.resize(IonConcentration.size());
for (size_t i = 0; i < IonConcentration.size(); i++) {
MassFractionOut[i] = 0.0;
}
}
if (membrane_db->keyExists("ThresholdMassFractionIn")) {
if (rank == 0) printf(".... Read membrane permeability (ThresholdMassFractionIn) \n");
ThresholdMassFractionIn.clear();
ThresholdMassFractionIn = membrane_db->getVector<double>("ThresholdMassFractionIn");
if (ThresholdMassFractionIn.size() != number_ion_species) {
ERROR("Error: number_ion_species and membrane permeability (ThresholdMassFractionIn) must be "
"the same length! \n");
}
}
else{
ThresholdMassFractionIn.resize(IonConcentration.size());
for (size_t i = 0; i < IonConcentration.size(); i++) {
ThresholdMassFractionIn[i] = 0.0;
}
}
if (membrane_db->keyExists("ThresholdMassFractionOut")) {
if (rank == 0) printf(".... Read membrane permeability (ThresholdMassFractionOut) \n");
ThresholdMassFractionOut.clear();
ThresholdMassFractionOut = membrane_db->getVector<double>("ThresholdMassFractionOut");
if (ThresholdMassFractionOut.size() != number_ion_species) {
ERROR("Error: number_ion_species and membrane permeability (ThresholdMassFractionOut) must be "
"the same length! \n");
}
}
else{
ThresholdMassFractionOut.resize(IonConcentration.size());
for (size_t i = 0; i < IonConcentration.size(); i++) {
ThresholdMassFractionOut[i] = 0.0;
}
}
if (membrane_db->keyExists("ThresholdVoltage")) {
if (rank == 0) printf(".... Read membrane threshold (ThresholdVoltage) \n");
ThresholdVoltage.clear();
ThresholdVoltage = membrane_db->getVector<double>("ThresholdVoltage");
if (ThresholdVoltage.size() != number_ion_species) {
ERROR("Error: number_ion_species and membrane voltage threshold (ThresholdVoltage) must be "
"the same length! \n");
}
}
else{
ThresholdVoltage.resize(IonConcentration.size());
for (size_t i = 0; i < IonConcentration.size(); i++) {
ThresholdVoltage[i] = 0.0;
}
}
if (ion_db->keyExists("MembraneIonConcentrationList")) {
if (rank == 0) printf(".... Read MembraneIonConcentrationList \n");
MembraneIonConcentration.clear();
MembraneIonConcentration = ion_db->getVector<double>("MembraneIonConcentrationList");
if (MembraneIonConcentration.size() != number_ion_species) {
ERROR("Error: number_ion_species and MembraneIonConcentrationList must be "
"the same length! \n");
}
else {
for (size_t i = 0; i < MembraneIonConcentration.size(); i++) {
MembraneIonConcentration[i] =
MembraneIonConcentration[i] *
(h * h * h *
1.0e-18); //LB ion concentration has unit [mol/lu^3]
}
}
}
}
//Read solid boundary condition specific to Ion model
BoundaryConditionSolid = 0;
if (ion_db->keyExists("BC_Solid")) {
BoundaryConditionSolid = ion_db->getScalar<int>("BC_Solid");
}
// Read boundary condition for ion transport
// BC = 0: normal periodic BC
// BC = 0: zero-flux bounce-back BC
// BC = 1: fixed ion concentration; unit=[mol/m^3]
// BC = 2: fixed ion flux (inward flux); unit=[mol/m^2/sec]
BoundaryConditionInlet.push_back(0);
@ -583,6 +708,72 @@ void ScaLBL_IonModel::SetDomain() {
nprocz = Dm->nprocz();
}
void ScaLBL_IonModel::SetMembrane() {
membrane_db = db->getDatabase("Membrane");
/* set distance based on labels inside the membrane (all other labels will be outside) */
auto MembraneLabels = membrane_db->getVector<int>("MembraneLabels");
IonMembrane = std::shared_ptr<Membrane>(new Membrane(ScaLBL_Comm, NeighborList, Np));
size_t NLABELS = MembraneLabels.size();
signed char LABEL = 0;
double *label_count;
double *label_count_global;
Array<char> membrane_id(Nx,Ny,Nz);
label_count = new double[NLABELS];
label_count_global = new double[NLABELS];
// Assign the labels
for (size_t idx = 0; idx < NLABELS; idx++)
label_count[idx] = 0;
/* set the distance to the membrane */
MembraneDistance.resize(Nx, Ny, Nz);
MembraneDistance.fill(0);
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
membrane_id(i,j,k) = 1; // default value
LABEL = Dm->id[k*Nx*Ny + j*Nx + i];
for (size_t m=0; m<MembraneLabels.size(); m++){
if (LABEL == MembraneLabels[m]) {
label_count[m] += 1.0;
membrane_id(i,j,k) = 0; // inside
m = MembraneLabels.size(); //exit loop
}
}
}
}
}
for (size_t m=0; m<MembraneLabels.size(); m++){
label_count_global[m] = Dm->Comm.sumReduce(label_count[m]);
}
if (rank == 0) {
printf(" Membrane labels: %lu \n", MembraneLabels.size());
for (size_t m=0; m<MembraneLabels.size(); m++){
LABEL = MembraneLabels[m];
double volume_fraction = double(label_count_global[m]) /
double((Nx - 2) * (Ny - 2) * (Nz - 2) * nprocs);
printf(" label=%d, volume fraction = %f\n", LABEL, volume_fraction);
}
}
/* signed distance to the membrane ( - inside / + outside) */
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
MembraneDistance(i, j, k) = 2.0 * double(membrane_id(i, j, k)) - 1.0;
}
}
}
CalcDist(MembraneDistance, membrane_id, *Dm);
/* create the membrane data structure */
if (rank==0) printf("Creating membrane data structure...\n");
MembraneCount = IonMembrane->Create(MembraneDistance, Map);
// clean up
delete [] label_count;
delete [] label_count_global;
}
void ScaLBL_IonModel::ReadInput() {
sprintf(LocalRankString, "%05d", Dm->rank());
@ -709,6 +900,33 @@ void ScaLBL_IonModel::AssignSolidBoundary(double *ion_solid) {
}
}
void ScaLBL_IonModel::AssignIonConcentrationMembrane( double *Ci, int ic) {
// double *Ci, const vector<double> MembraneIonConcentration, const vector<double> IonConcentration, int ic) {
double VALUE = 0.f;
if (rank == 0){
printf(".... Set concentration(%i): inside=%.6g [mol/m^3], outside=%.6g [mol/m^3] \n", ic,
MembraneIonConcentration[ic]/(h*h*h*1.0e-18), IonConcentration[ic]/(h*h*h*1.0e-18));
}
for (int k = 0; k < Nz; k++) {
for (int j = 0; j < Ny; j++) {
for (int i = 0; i < Nx; i++) {
int idx = Map(i, j, k);
if (!(idx < 0)) {
if (MembraneDistance(i,j,k) < 0.0) {
VALUE = MembraneIonConcentration[ic];//* (h * h * h * 1.0e-18);
} else {
VALUE = IonConcentration[ic];//* (h * h * h * 1.0e-18);
}
Ci[idx] = VALUE;
}
}
}
}
}
void ScaLBL_IonModel::AssignIonConcentration_FromFile(
double *Ci, const vector<std::string> &File_ion, int ic) {
double *Ci_host;
@ -764,7 +982,7 @@ void ScaLBL_IonModel::Create() {
Map.fill(-2);
auto neighborList = new int[18 * Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map, neighborList,
Mask->id.data(), Np, 1);
Mask->id.data(), Npad, 1);
comm.barrier();
//...........................................................................
@ -778,6 +996,7 @@ void ScaLBL_IonModel::Create() {
int neighborSize = 18 * (Np * sizeof(int));
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **)&NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **)&dvcMap, sizeof(int) * Np);
ScaLBL_AllocateDeviceMemory((void **)&fq,
number_ion_species * 7 * dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **)&Ci,
@ -794,6 +1013,36 @@ void ScaLBL_IonModel::Create() {
if (rank == 0)
printf("LB Ion Solver: Setting up device map and neighbor list \n");
// copy the neighbor list
int *TmpMap;
TmpMap = new int[Np];
for (int k = 1; k < Nz - 1; k++) {
for (int j = 1; j < Ny - 1; j++) {
for (int i = 1; i < Nx - 1; i++) {
int idx = Map(i, j, k);
if (!(idx < 0))
TmpMap[idx] = k * Nx * Ny + j * Nx + i;
}
}
}
// check that TmpMap is valid
for (int idx = 0; idx < ScaLBL_Comm->LastExterior(); idx++) {
auto n = TmpMap[idx];
if (n > Nx * Ny * Nz) {
printf("Bad value! idx=%i \n", n);
TmpMap[idx] = Nx * Ny * Nz - 1;
}
}
for (int idx = ScaLBL_Comm->FirstInterior();
idx < ScaLBL_Comm->LastInterior(); idx++) {
auto n = TmpMap[idx];
if (n > Nx * Ny * Nz) {
printf("Bad value! idx=%i \n", n);
TmpMap[idx] = Nx * Ny * Nz - 1;
}
}
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int) * Np);
ScaLBL_Comm->Barrier();
ScaLBL_CopyToDevice(NeighborList, neighborList, neighborSize);
comm.barrier();
@ -814,6 +1063,13 @@ void ScaLBL_IonModel::Create() {
ScaLBL_Comm->Barrier();
delete[] IonSolid_host;
}
else {
IonSolid = NULL;
}
delete[] TmpMap;
delete[] neighborList;
}
void ScaLBL_IonModel::Initialize() {
@ -822,10 +1078,26 @@ void ScaLBL_IonModel::Initialize() {
*/
if (rank == 0)
printf("LB Ion Solver: initializing D3Q7 distributions\n");
if (ion_db->keyExists("IonConcentrationFile")) {
//USE_MEMBRANE = true;
if (USE_MEMBRANE){
double *Ci_host;
if (rank == 0)
printf(" ...initializing based on membrane list \n");
Ci_host = new double[number_ion_species * Np];
for (size_t ic = 0; ic < number_ion_species; ic++) {
AssignIonConcentrationMembrane( &Ci_host[ic * Np], ic);
}
ScaLBL_CopyToDevice(Ci, Ci_host, number_ion_species * sizeof(double) * Np);
comm.barrier();
for (size_t ic = 0; ic < number_ion_species; ic++) {
ScaLBL_D3Q7_Ion_Init_FromFile(&fq[ic * Np * 7], &Ci[ic * Np], Np);
}
delete[] Ci_host;
}
else if (ion_db->keyExists("IonConcentrationFile")) {
//NOTE: "IonConcentrationFile" is a vector, including "file_name, datatype"
auto File_ion = ion_db->getVector<std::string>("IonConcentrationFile");
if (File_ion.size() == 2 * number_ion_species) {
if (File_ion.size() == 2*number_ion_species) {
double *Ci_host;
Ci_host = new double[number_ion_species * Np];
for (size_t ic = 0; ic < number_ion_species; ic++) {
@ -844,12 +1116,50 @@ void ScaLBL_IonModel::Initialize() {
ERROR("Error: Number of user-input ion concentration files should "
"be equal to number of ion species!\n");
}
} else {
}
else {
for (size_t ic = 0; ic < number_ion_species; ic++) {
ScaLBL_D3Q7_Ion_Init(&fq[ic * Np * 7], &Ci[ic * Np],
IonConcentration[ic], Np);
}
}
/** RESTART **/
if (Restart == true) {
if (rank == 0) {
printf(" ION MODEL: Reading restart file! \n");
}
double*cDist;
double *Ci_host;
cDist = new double[7 * number_ion_species * Np];
Ci_host = new double[number_ion_species * Np];
ifstream File(LocalRestartFile, ios::binary);
int idx;
double value,sum;
// Read the distributions
for (size_t ic = 0; ic < number_ion_species; ic++){
for (int n = 0; n < Np; n++) {
sum = 0.0;
for (int q = 0; q < 7; q++) {
File.read((char *)&value, sizeof(value));
cDist[ic * 7 * Np + q * Np + n] = value;
sum += value;
}
Ci_host[ic * Np + n] = sum;
}
}
File.close();
// Copy the restart data to the GPU
ScaLBL_CopyToDevice(Ci, Ci_host, Np * number_ion_species* sizeof(double));
ScaLBL_CopyToDevice(fq, cDist, 7 * Np * number_ion_species *sizeof(double));
ScaLBL_Comm->Barrier();
comm.barrier();
delete[] Ci_host;
delete[] cDist;
}
/** END RESTART **/
if (rank == 0)
printf("LB Ion Solver: initializing charge density\n");
for (size_t ic = 0; ic < number_ion_species; ic++) {
@ -984,6 +1294,7 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField) {
//ScaLBL_Comm->Barrier(); comm.barrier();
//auto t1 = std::chrono::system_clock::now();
auto t1 = std::chrono::system_clock::now();
for (size_t ic = 0; ic < number_ion_species; ic++) {
timestep = 0;
while (timestep < timestepMax[ic]) {
@ -1052,13 +1363,13 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField) {
ScaLBL_Comm->LastExterior(), Np);
//LB-Ion collison
ScaLBL_D3Q7_AAodd_Ion(
ScaLBL_D3Q7_AAodd_Ion_v0(
NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
IonDiffusivity[ic], IonValence[ic], rlx[ic], Vt,
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAodd_Ion(
ScaLBL_D3Q7_AAodd_Ion_v0(
NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
@ -1136,13 +1447,13 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField) {
Np);
//LB-Ion collison
ScaLBL_D3Q7_AAeven_Ion(
ScaLBL_D3Q7_AAeven_Ion_v0(
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
rlx[ic], Vt, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAeven_Ion(
ScaLBL_D3Q7_AAeven_Ion_v0(
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
@ -1166,6 +1477,154 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField) {
ScaLBL_Comm->LastExterior(), Np);
}
//************************************************************************/
if (rank == 0)
printf("---------------------------------------------------------------"
"----\n");
// Compute the walltime per timestep
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>(t2 - t1).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np) / cputime / 1000000;
if (rank == 0)
printf("********************************************************\n");
if (rank == 0)
printf("CPU time = %f \n", cputime);
if (rank == 0)
printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank == 0)
printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank == 0)
printf("********************************************************\n");
}
void ScaLBL_IonModel::RunMembrane(double *Velocity, double *ElectricField, double *Psi) {
//Input parameter:
//1. Velocity is from StokesModel
//2. ElectricField is from Poisson model
//LB-related parameter
vector<double> rlx;
for (size_t ic = 0; ic < tau.size(); ic++) {
rlx.push_back(1.0 / tau[ic]);
}
//.......create and start timer............
//double starttime,stoptime,cputime;
//ScaLBL_Comm->Barrier(); comm.barrier();
//auto t1 = std::chrono::system_clock::now();
for (size_t ic = 0; ic < number_ion_species; ic++) {
/* set the mass transfer coefficients for the membrane */
IonMembrane->AssignCoefficients(dvcMap, Psi, ThresholdVoltage[ic],MassFractionIn[ic],
MassFractionOut[ic],ThresholdMassFractionIn[ic],ThresholdMassFractionOut[ic]);
timestep = 0;
while (timestep < timestepMax[ic]) {
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
//LB-Ion collison
IonMembrane->SendD3Q7AA(&fq[ic * Np * 7]); //READ FORM NORMAL
ScaLBL_D3Q7_AAodd_Ion(
IonMembrane->NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
IonDiffusivity[ic], IonValence[ic], rlx[ic], Vt,
ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
IonMembrane->RecvD3Q7AA(&fq[ic * Np * 7]); //WRITE INTO OPPOSITE
ScaLBL_D3Q7_AAodd_Ion(
IonMembrane->NeighborList, &fq[ic * Np * 7], &Ci[ic * Np],
&FluxDiffusive[3 * ic * Np], &FluxAdvective[3 * ic * Np],
&FluxElectrical[3 * ic * Np], Velocity, ElectricField,
IonDiffusivity[ic], IonValence[ic], rlx[ic], Vt, 0,
ScaLBL_Comm->LastExterior(), Np);
IonMembrane->IonTransport(&fq[ic * Np * 7],&Ci[ic * Np]);
/* if (BoundaryConditionSolid == 1) {
//TODO IonSolid may also be species-dependent
ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic * Np * 7], IonSolid);
}
ScaLBL_Comm->Barrier();
comm.barrier();
*/
// *************EVEN TIMESTEP*************//
timestep++;
//LB-Ion collison
IonMembrane->SendD3Q7AA(&fq[ic * Np * 7]); //READ FORM NORMAL
ScaLBL_D3Q7_AAeven_Ion(
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
rlx[ic], Vt, ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np);
IonMembrane->RecvD3Q7AA(&fq[ic * Np * 7]); //WRITE INTO OPPOSITE
ScaLBL_D3Q7_AAeven_Ion(
&fq[ic * Np * 7], &Ci[ic * Np], &FluxDiffusive[3 * ic * Np],
&FluxAdvective[3 * ic * Np], &FluxElectrical[3 * ic * Np],
Velocity, ElectricField, IonDiffusivity[ic], IonValence[ic],
rlx[ic], Vt, 0, ScaLBL_Comm->LastExterior(), Np);
IonMembrane->IonTransport(&fq[ic * Np * 7],&Ci[ic * Np]);
ScaLBL_Comm->Barrier();
comm.barrier();
/*
if (BoundaryConditionSolid == 1) {
//TODO IonSolid may also be species-dependent
ScaLBL_Comm->SolidDirichletD3Q7(&fq[ic * Np * 7], IonSolid);
}
ScaLBL_Comm->Barrier();
comm.barrier();
*/
}
}
//Compute charge density for Poisson equation
for (size_t ic = 0; ic < number_ion_species; ic++) {
int Valence = IonValence[ic];
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, Valence, ic,
ScaLBL_Comm->FirstInterior(),
ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_Ion_ChargeDensity(Ci, ChargeDensity, Valence, ic, 0,
ScaLBL_Comm->LastExterior(), Np);
}
/* DoubleArray Charge(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map, ChargeDensity, Charge);
double charge_sum=0.0;
double charge_sum_total=0.0;
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
charge_sum += Charge(i,j,k);
}
}
}
printf(" Local charge value = %.8g (rank=%i)\n",charge_sum, rank);
ScaLBL_Comm->Barrier();
comm.barrier();
*/
ScaLBL_Comm->Barrier();
comm.barrier();
//if (rank==0) printf(" IonMembrane: completeted full step \n");
//fflush(stdout);
//************************************************************************/
//if (rank==0) printf("-------------------------------------------------------------------\n");
//// Compute the walltime per timestep
//auto t2 = std::chrono::system_clock::now();
@ -1181,6 +1640,33 @@ void ScaLBL_IonModel::Run(double *Velocity, double *ElectricField) {
//if (rank==0) printf("********************************************************\n");
}
void ScaLBL_IonModel::Checkpoint(){
if (rank == 0) {
printf(" ION MODEL: Writing restart file! \n");
}
int idx;
double value,sum;
double*cDist;
cDist = new double[7 * number_ion_species * Np];
ScaLBL_CopyToHost(cDist, fq, 7 * Np * number_ion_species *sizeof(double));
ofstream File(LocalRestartFile, ios::binary);
for (size_t ic = 0; ic < number_ion_species; ic++){
for (int n = 0; n < Np; n++) {
// Write the distributions
for (int q = 0; q < 7; q++) {
value = cDist[ic * Np * 7 + q * Np + n];
File.write((char *)&value, sizeof(value));
}
}
}
File.close();
delete[] cDist;
}
void ScaLBL_IonModel::getIonConcentration(DoubleArray &IonConcentration,
const size_t ic) {
//This function wirte out the data in a normal layout (by aggregating all decomposed domains)

View File

@ -1,7 +1,6 @@
/*
* Ion transporte LB Model
*/
#ifndef ScaLBL_IonModel_INC
#define ScaLBL_IonModel_INC
@ -16,6 +15,7 @@
#include "common/ScaLBL.h"
#include "common/Communication.h"
#include "common/Membrane.h"
#include "common/MPI.h"
#include "analysis/Minkowski.h"
#include "ProfilerApp.h"
@ -30,10 +30,12 @@ public:
void ReadParams(string filename);
void ReadParams(std::shared_ptr<Database> db0);
void SetDomain();
void SetMembrane();
void ReadInput();
void Create();
void Initialize();
void Run(double *Velocity, double *ElectricField);
void RunMembrane(double *Velocity, double *ElectricField, double *Psi);
void getIonConcentration(DoubleArray &IonConcentration, const size_t ic);
void getIonConcentration_debug(int timestep);
void getIonFluxDiffusive(DoubleArray &IonFlux_x, DoubleArray &IonFlux_y,
@ -47,9 +49,10 @@ public:
void getIonFluxElectrical_debug(int timestep);
void DummyFluidVelocity();
void DummyElectricField();
void Checkpoint();
double CalIonDenConvergence(vector<double> &ci_avg_previous);
//bool Restart,pBC;
bool Restart;
int timestep;
vector<int> timestepMax;
int BoundaryConditionSolid;
@ -66,10 +69,14 @@ public:
vector<double> IonDiffusivity; //User input unit [m^2/sec]
vector<int> IonValence;
vector<double> IonConcentration; //unit [mol/m^3]
vector<double>
Cin; //inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double>
Cout; //outlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> MembraneIonConcentration; //unit [mol/m^3]
vector<double> ThresholdVoltage;
vector<double> MassFractionIn;
vector<double> MassFractionOut;
vector<double> ThresholdMassFractionIn;
vector<double> ThresholdMassFractionOut;
vector<double> Cin; //inlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> Cout; //outlet boundary value, can be either concentration [mol/m^3] or flux [mol/m^2/sec]
vector<double> tau;
vector<double> time_conv;
@ -80,7 +87,7 @@ public:
std::shared_ptr<Domain> Dm; // this domain is for analysis
std::shared_ptr<Domain> Mask; // this domain is for lbm
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm;
// input database
// input databaseF
std::shared_ptr<Database> db;
std::shared_ptr<Database> domain_db;
std::shared_ptr<Database> ion_db;
@ -88,6 +95,7 @@ public:
IntArray Map;
DoubleArray Distance;
int *NeighborList;
int *dvcMap;
double *fq;
double *Ci;
double *ChargeDensity;
@ -97,7 +105,14 @@ public:
double *FluxDiffusive;
double *FluxAdvective;
double *FluxElectrical;
/* these support membrane capabilities */
bool USE_MEMBRANE;
std::shared_ptr<Database> membrane_db;
std::shared_ptr<Membrane> IonMembrane;
DoubleArray MembraneDistance;
int MembraneCount; // number of links the cross the membrane
private:
Utilities::MPI comm;
@ -113,6 +128,8 @@ private:
void AssignIonConcentration_FromFile(double *Ci,
const vector<std::string> &File_ion,
int ic);
void AssignIonConcentrationMembrane( double *Ci, int ic);
void IonConcentration_LB_to_Phys(DoubleArray &Den_reg);
void IonFlux_LB_to_Phys(DoubleArray &Den_reg, const size_t ic);
};

View File

@ -41,6 +41,7 @@ void ScaLBL_MRTModel::ReadParams(string filename) {
tau = 1.0;
timestepMax = 100000;
ANALYSIS_INTERVAL = 1000;
tolerance = 1.0e-8;
Fx = Fy = 0.0;
Fz = 1.0e-5;
@ -51,6 +52,9 @@ void ScaLBL_MRTModel::ReadParams(string filename) {
if (mrt_db->keyExists("timestepMax")) {
timestepMax = mrt_db->getScalar<int>("timestepMax");
}
if (mrt_db->keyExists("analysis_interval")) {
ANALYSIS_INTERVAL = mrt_db->getScalar<int>("analysis_interval");
}
if (mrt_db->keyExists("tolerance")) {
tolerance = mrt_db->getScalar<double>("tolerance");
}
@ -323,7 +327,7 @@ void ScaLBL_MRTModel::Run() {
comm.barrier();
//************************************************************************/
if (timestep % 1000 == 0) {
if (timestep % ANALYSIS_INTERVAL == 0) {
ScaLBL_D3Q19_Momentum(fq, Velocity, Np);
ScaLBL_DeviceBarrier();
comm.barrier();

View File

@ -48,6 +48,7 @@ public:
bool Restart, pBC;
int timestep, timestepMax;
int ANALYSIS_INTERVAL;
int BoundaryCondition;
double tau, mu;
double Fx, Fy, Fz, flux;

View File

@ -4,7 +4,7 @@ ScaLBL_Multiphys_Controller::ScaLBL_Multiphys_Controller(
int RANK, int NP, const Utilities::MPI &COMM)
: rank(RANK), nprocs(NP), Restart(0), timestepMax(0), num_iter_Stokes(0),
num_iter_Ion(0), analysis_interval(0), visualization_interval(0),
tolerance(0), time_conv_max(0), comm(COMM) {}
tolerance(0), time_conv_max(0), time_conv_MainLoop(0), comm(COMM) {}
ScaLBL_Multiphys_Controller::~ScaLBL_Multiphys_Controller() {}
void ScaLBL_Multiphys_Controller::ReadParams(string filename) {
@ -16,12 +16,14 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename) {
// Default parameters
timestepMax = 10000;
Restart = false;
restart_interval = 100000;
num_iter_Stokes = 1;
num_iter_Ion.push_back(1);
analysis_interval = 500;
visualization_interval = 10000;
tolerance = 1.0e-6;
time_conv_max = 0.0;
time_conv_MainLoop = 0.0;
// load input parameters
if (study_db->keyExists("timestepMax")) {
@ -34,6 +36,9 @@ void ScaLBL_Multiphys_Controller::ReadParams(string filename) {
visualization_interval =
study_db->getScalar<int>("visualization_interval");
}
if (study_db->keyExists("restart_interval")) {
restart_interval = study_db->getScalar<int>("restart_interval");
}
if (study_db->keyExists("tolerance")) {
tolerance = study_db->getScalar<double>("tolerance");
}
@ -150,6 +155,53 @@ vector<int> ScaLBL_Multiphys_Controller::getIonNumIter_PNP_coupling(
return num_iter_ion;
}
vector<int> ScaLBL_Multiphys_Controller::getIonNumIter_NernstPlanck_coupling(
const vector<double> &IonTimeConv) {
//Return number of internal iterations for the Ion transport solver
vector<double> TimeConv;
TimeConv.assign(IonTimeConv.begin(), IonTimeConv.end());
vector<int> num_iter_ion;
vector<double>::iterator it_max = max_element(TimeConv.begin(), TimeConv.end());
unsigned int idx_max = distance(TimeConv.begin(), it_max);
if (idx_max == 0) {
num_iter_ion.push_back(2);
for (unsigned int idx = 1; idx < TimeConv.size(); idx++) {
double temp =
2 * TimeConv[idx_max] /
TimeConv
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
}
} else if (idx_max == TimeConv.size() - 1) {
for (unsigned int idx = 0; idx < TimeConv.size() - 1; idx++) {
double temp =
2 * TimeConv[idx_max] /
TimeConv
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
}
num_iter_ion.push_back(2);
} else {
for (unsigned int idx = 0; idx < idx_max; idx++) {
double temp =
2 * TimeConv[idx_max] /
TimeConv
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
}
num_iter_ion.push_back(2);
for (unsigned int idx = idx_max + 1; idx < TimeConv.size(); idx++) {
double temp =
2 * TimeConv[idx_max] /
TimeConv
[idx]; //the factor 2 is the number of iterations for the element has max time_conv
num_iter_ion.push_back(int(round(temp / 2) * 2));
}
}
return num_iter_ion;
}
void ScaLBL_Multiphys_Controller::getTimeConvMax_PNP_coupling(
double StokesTimeConv, const vector<double> &IonTimeConv) {
//Return maximum of the time converting factor from Stokes and ion solvers

View File

@ -28,7 +28,7 @@ public:
const vector<double> &IonTimeConv);
vector<int> getIonNumIter_PNP_coupling(double StokesTimeConv,
const vector<double> &IonTimeConv);
//void getIonNumIter_PNP_coupling(double StokesTimeConv,vector<double> &IonTimeConv,vector<int> &IonTimeMax);
vector<int> getIonNumIter_NernstPlanck_coupling(const vector<double> &IonTimeConv);
void getTimeConvMax_PNP_coupling(double StokesTimeConv,
const vector<double> &IonTimeConv);
@ -38,8 +38,10 @@ public:
vector<int> num_iter_Ion;
int analysis_interval;
int visualization_interval;
int restart_interval;
double tolerance;
double time_conv_max;
double time_conv_MainLoop;
//double SchmidtNum;//Schmidt number = kinematic_viscosity/mass_diffusivity
int rank, nprocs;

View File

@ -32,6 +32,14 @@ ScaLBL_Poisson::ScaLBL_Poisson(int RANK, int NP, const Utilities::MPI& COMM):
}
ScaLBL_Poisson::~ScaLBL_Poisson()
{
ScaLBL_FreeDeviceMemory(NeighborList);
ScaLBL_FreeDeviceMemory(dvcMap);
ScaLBL_FreeDeviceMemory(Psi);
ScaLBL_FreeDeviceMemory(Psi_BCLabel);
ScaLBL_FreeDeviceMemory(ElectricField);
ScaLBL_FreeDeviceMemory(ResidualError);
ScaLBL_FreeDeviceMemory(fq);
if ( TIMELOG )
fclose( TIMELOG );
}
@ -42,7 +50,7 @@ void ScaLBL_Poisson::ReadParams(string filename){
domain_db = db->getDatabase( "Domain" );
electric_db = db->getDatabase( "Poisson" );
k2_inv = 4.0;//speed of sound for D3Q7 lattice
k2_inv = 3.0;//speed of sound for D3Q19 lattice
tau = 0.5+k2_inv;
timestepMax = 100000;
tolerance = 1.0e-6;//stopping criterion for obtaining steady-state electricla potential
@ -58,11 +66,18 @@ void ScaLBL_Poisson::ReadParams(string filename){
TestPeriodicTime = 1.0;//unit: [sec]
TestPeriodicTimeConv = 0.01; //unit [sec/lt]
TestPeriodicSaveInterval = 0.1; //unit [sec]
Restart = "false";
// LB-Poisson Model parameters
if (electric_db->keyExists( "Restart" )){
Restart = electric_db->getScalar<bool>("Restart");
}
if (electric_db->keyExists( "timestepMax" )){
timestepMax = electric_db->getScalar<int>( "timestepMax" );
}
if (electric_db->keyExists( "tau" )){
tau = electric_db->getScalar<double>( "tau" );
}
if (electric_db->keyExists( "analysis_interval" )){
analysis_interval = electric_db->getScalar<int>( "analysis_interval" );
}
@ -71,6 +86,7 @@ void ScaLBL_Poisson::ReadParams(string filename){
}
//'tolerance_method' can be {"MSE","MSE_max"}
tolerance_method = electric_db->getWithDefault<std::string>( "tolerance_method", "MSE" );
lattice_scheme = electric_db->getWithDefault<std::string>( "lattice_scheme", "D3Q7" );
if (electric_db->keyExists( "epsilonR" )){
epsilonR = electric_db->getScalar<double>( "epsilonR" );
}
@ -122,6 +138,10 @@ void ScaLBL_Poisson::ReadParams(string filename){
//Re-calcualte model parameters if user updates input
epsilon0_LB = epsilon0*(h*1.0e-6);//unit:[C/(V*lu)]
epsilon_LB = epsilon0_LB*epsilonR;//electric permittivity
/* restart string */
sprintf(LocalRankString, "%05d", rank);
sprintf(LocalRestartFile, "%s%s", "Psi.", LocalRankString);
if (rank==0) printf("***********************************************************************************\n");
if (rank==0) printf("LB-Poisson Solver: steady-state MaxTimeStep = %i; steady-state tolerance = %.3g \n", timestepMax,tolerance);
@ -136,6 +156,15 @@ void ScaLBL_Poisson::ReadParams(string filename){
else{
if (rank==0) printf("LB-Poisson Solver: tolerance_method=%s cannot be identified!\n",tolerance_method.c_str());
}
if (lattice_scheme.compare("D3Q7")==0){
if (rank==0) printf("LB-Poisson Solver: Use D3Q7 lattice structure.\n");
}
else if (lattice_scheme.compare("D3Q19")==0){
if (rank==0) printf("LB-Poisson Solver: Use D3Q19 lattice structure.\n");
}
else{
if (rank==0) printf("LB-Poisson Solver: lattice_scheme=%s cannot be identified!\n",lattice_scheme.c_str());
}
}
void ScaLBL_Poisson::SetDomain(){
@ -182,7 +211,7 @@ void ScaLBL_Poisson::ReadInput(){
sprintf(LocalRankString,"%05d",Dm->rank());
sprintf(LocalRankFilename,"%s%s","ID.",LocalRankString);
sprintf(LocalRestartFile,"%s%s","Restart.",LocalRankString);
sprintf(LocalRestartFile,"%s%s","Psi.",LocalRankString);
if (domain_db->keyExists( "Filename" )){
@ -330,7 +359,7 @@ void ScaLBL_Poisson::Create(){
if (rank==0) printf ("LB-Poisson Solver: Set up memory efficient layout \n");
Map.resize(Nx,Ny,Nz); Map.fill(-2);
auto neighborList= new int[18*Npad];
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Np,1);
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Mask->id.data(),Npad,1);
comm.barrier();
//...........................................................................
@ -345,11 +374,16 @@ void ScaLBL_Poisson::Create(){
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Np);
//ScaLBL_AllocateDeviceMemory((void **) &dvcID, sizeof(signed char)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size);
ScaLBL_AllocateDeviceMemory((void **) &Psi, sizeof(double)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &Psi_BCLabel, sizeof(int)*Nx*Ny*Nz);
ScaLBL_AllocateDeviceMemory((void **) &ElectricField, 3*sizeof(double)*Np);
ScaLBL_AllocateDeviceMemory((void **) &ResidualError, sizeof(double)*Np);
if (lattice_scheme.compare("D3Q7")==0){
ScaLBL_AllocateDeviceMemory((void **) &fq, 7*dist_mem_size);
}
else if (lattice_scheme.compare("D3Q19")==0){
ScaLBL_AllocateDeviceMemory((void **) &fq, 19*dist_mem_size);
}
//...........................................................................
// Update GPU data structures
@ -366,6 +400,8 @@ void ScaLBL_Poisson::Create(){
}
}
}
comm.barrier();
if (rank==0) printf (" .... LB-Poisson Solver: check neighbor list \n");
// check that TmpMap is valid
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
auto n = TmpMap[idx];
@ -381,6 +417,8 @@ void ScaLBL_Poisson::Create(){
TmpMap[idx] = Nx*Ny*Nz-1;
}
}
comm.barrier();
if (rank==0) printf (" .... LB-Poisson Solver: copy neighbor list to GPU \n");
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
ScaLBL_Comm->Barrier();
delete [] TmpMap;
@ -394,6 +432,7 @@ void ScaLBL_Poisson::Create(){
//ScaLBL_Comm->Barrier();
//Initialize solid boundary for electric potential
// DON'T USE WITH CELLULAR SYSTEM (NO SOLID -- NEED Membrane SOLUTION)
ScaLBL_Comm->SetupBounceBackList(Map, Mask->id.data(), Np);
comm.barrier();
}
@ -407,7 +446,57 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){
Vin = 1.0; //Boundary-z (inlet) electric potential
Vout = 1.0; //Boundary-Z (outlet) electric potential
if (BoundaryConditionInlet>0){
if (BoundaryConditionInlet==0 && BoundaryConditionOutlet==0){
signed char VALUE=0;
double AFFINITY=0.f;
auto LabelList = electric_db->getVector<int>( "InitialValueLabels" );
auto AffinityList = electric_db->getVector<double>( "InitialValues" );
size_t NLABELS = LabelList.size();
if (NLABELS != AffinityList.size()){
ERROR("Error: LB-Poisson Solver: InitialValueLabels and InitialValues must be of the same length! \n");
}
std::vector<double> label_count( NLABELS, 0.0 );
std::vector<double> label_count_global( NLABELS, 0.0 );
for (int k=0;k<Nz;k++){
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
VALUE=Mask->id[n];
AFFINITY=0.f;
// Assign the affinity from the paired list
for (unsigned int idx=0; idx < NLABELS; idx++){
if (VALUE == LabelList[idx]){
AFFINITY=AffinityList[idx];
label_count[idx] += 1.0;
idx = NLABELS;
}
}
int idx=Map(i,j,k);
if (!(idx<0)) psi_init[n] = AFFINITY;
}
}
}
for (size_t idx=0; idx<NLABELS; idx++)
label_count_global[idx]=Dm->Comm.sumReduce( label_count[idx]);
if (rank==0){
printf("LB-Poisson Solver: number of Poisson initial-value labels: %lu \n",NLABELS);
for (unsigned int idx=0; idx<NLABELS; idx++){
VALUE=LabelList[idx];
AFFINITY=AffinityList[idx];
double volume_fraction = double(label_count_global[idx])/double((Nx-2)*(Ny-2)*(Nz-2)*nprocs);
printf(" label=%d, initial potential=%.3g [V], volume fraction=%.2g\n",VALUE,AFFINITY,volume_fraction);
}
}
}
else if (BoundaryConditionInlet>0 && BoundaryConditionOutlet>0){
//read input parameters for inlet
switch (BoundaryConditionInlet){
case 1:
if (electric_db->keyExists( "Vin" )){
@ -431,8 +520,7 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){
}
break;
}
}
if (BoundaryConditionOutlet>0){
//read input parameters for outlet
switch (BoundaryConditionOutlet){
case 1:
if (electric_db->keyExists( "Vout" )){
@ -456,31 +544,51 @@ void ScaLBL_Poisson::Potential_Init(double *psi_init){
}
break;
}
}
//By default only periodic BC is applied and Vin=Vout=1.0, i.e. there is no potential gradient along Z-axis
if (BoundaryConditionInlet==2) Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,0);
if (BoundaryConditionOutlet==2) Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,0);
double slope = (Vout-Vin)/(Nz-2);
double psi_linearized;
for (int k=0;k<Nz;k++){
if (k==0 || k==1){
psi_linearized = Vin;
}
else if (k==Nz-1 || k==Nz-2){
psi_linearized = Vout;
}
else{
psi_linearized = slope*(k-1)+Vin;
}
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
if (Mask->id[n]>0){
psi_init[n] = psi_linearized;
//calcualte inlet/outlet voltage for the case of BCInlet/Outlet=2
if (BoundaryConditionInlet==2) Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,0);
if (BoundaryConditionOutlet==2) Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,0);
//initialize a linear electrical potential between inlet and outlet
double slope = (Vout-Vin)/(Nz-2);
double psi_linearized;
for (int k=0;k<Nz;k++){
if (k==0 || k==1){
psi_linearized = Vin;
}
else if (k==Nz-1 || k==Nz-2){
psi_linearized = Vout;
}
else{
psi_linearized = slope*(k-1)+Vin;
}
for (int j=0;j<Ny;j++){
for (int i=0;i<Nx;i++){
int n = k*Nx*Ny+j*Nx+i;
if (Mask->id[n]>0){
psi_init[n] = psi_linearized;
}
}
}
}
}
else{//mixed periodic and non-periodic BCs are not supported!
ERROR("Error: check the type of inlet and outlet boundary condition! Mixed periodic and non-periodic BCs are found!\n");
}
/** RESTART **/
if (Restart == true) {
if (rank == 0) {
printf(" POISSON MODEL: Reading restart file! \n");
}
ifstream File(LocalRestartFile, ios::binary);
double value;
// Read the distributions
for (int n = 0; n < Nx*Ny*Nz; n++) {
File.read((char *)&value, sizeof(value));
psi_init[n] = value;
}
File.close();
}
/** END RESTART **/
}
double ScaLBL_Poisson::getBoundaryVoltagefromPeriodicBC(double V0, double freq, double phase_shift, int time_step){
@ -493,7 +601,12 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
* "time_conv_from_Study" is the phys to LB time conversion factor, unit=[sec/lt]
* which is used for periodic voltage input for inlet and outlet boundaries
*/
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n");
if (lattice_scheme.compare("D3Q7")==0){
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q7 distributions\n");
}
else if (lattice_scheme.compare("D3Q19")==0){
if (rank==0) printf ("LB-Poisson Solver: initializing D3Q19 distributions\n");
}
//NOTE the initialization involves two steps:
//1. assign solid boundary value (surface potential or surface change density)
//2. Initialize electric potential for pore nodes
@ -507,8 +620,15 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
ScaLBL_CopyToDevice(Psi, psi_host, Nx*Ny*Nz*sizeof(double));
ScaLBL_CopyToDevice(Psi_BCLabel, psi_BCLabel_host, Nx*Ny*Nz*sizeof(int));
ScaLBL_Comm->Barrier();
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
if (lattice_scheme.compare("D3Q7")==0){
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
}
else if (lattice_scheme.compare("D3Q19")==0){
/* switch to d3Q19 model */
ScaLBL_D3Q19_Poisson_Init(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q19_Poisson_Init(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
}
delete [] psi_host;
delete [] psi_BCLabel_host;
@ -523,136 +643,226 @@ void ScaLBL_Poisson::Initialize(double time_conv_from_Study){
//}
}
//void ScaLBL_Poisson::Run(double *ChargeDensity, bool UseSlippingVelBC, int timestep_from_Study){
//
// //.......create and start timer............
// //double starttime,stoptime,cputime;
// //comm.barrier();
// //auto t1 = std::chrono::system_clock::now();
// double *host_Error;
// host_Error = new double [Np];
//
// timestep=0;
// double error = 1.0;
// while (timestep < timestepMax && error > tolerance) {
// //************************************************************************/
// // *************ODD TIMESTEP*************//
// timestep++;
// //SolveElectricPotentialAAodd(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
// SolvePoissonAAodd(ChargeDensity, UseSlippingVelBC);//perform collision
// ScaLBL_Comm->Barrier(); comm.barrier();
//
// // *************EVEN TIMESTEP*************//
// timestep++;
// //SolveElectricPotentialAAeven(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
// SolvePoissonAAeven(ChargeDensity, UseSlippingVelBC);//perform collision
// ScaLBL_Comm->Barrier(); comm.barrier();
// //************************************************************************/
//
//
// // Check convergence of steady-state solution
// if (timestep==2){
// //save electric potential for convergence check
// }
// if (timestep%analysis_interval==0){
// /* get the elecric potential */
// ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
// if (rank==0) printf(" ... getting Poisson solver error \n");
// double err = 0.0;
// double max_error = 0.0;
// ScaLBL_CopyToHost(host_Error,ResidualError,sizeof(double)*Np);
// for (int idx=0; idx<Np; idx++){
// err = host_Error[idx]*host_Error[idx];
// if (err > max_error ){
// max_error = err;
// }
// }
// error=Dm->Comm.maxReduce(max_error);
//
// /* compute the eletric field */
// //ScaLBL_D3Q19_Poisson_getElectricField(fq, ElectricField, tau, Np);
//
// }
// }
// if(WriteLog==true){
// getConvergenceLog(timestep,error);
// }
//
// //************************************************************************/
// ////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n");
// ////if (rank==0) printf("---------------------------------------------------------------------------\n");
// //// Compute the walltime per timestep
// //auto t2 = std::chrono::system_clock::now();
// //double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
// //// Performance obtained from each node
// //double MLUPS = double(Np)/cputime/1000000;
//
// //if (rank==0) printf("******************* LB-Poisson Solver Statistics ********************\n");
// //if (rank==0) printf("CPU time = %f \n", cputime);
// //if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
// //MLUPS *= nprocs;
// //if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
// //if (rank==0) printf("*********************************************************************\n");
//
//}
void ScaLBL_Poisson::Run(double *ChargeDensity, bool UseSlippingVelBC, int timestep_from_Study){
//.......create and start timer............
//double starttime,stoptime,cputime;
//comm.barrier();
//auto t1 = std::chrono::system_clock::now();
double error = 1.0;
if (lattice_scheme.compare("D3Q7")==0){
timestep=0;
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
SolveElectricPotentialAAodd(timestep_from_Study);//update electric potential
SolvePoissonAAodd(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
timestep=0;
double error = 1.0;
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
SolveElectricPotentialAAodd(timestep_from_Study);//update electric potential
SolvePoissonAAodd(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
// *************EVEN TIMESTEP*************//
timestep++;
SolveElectricPotentialAAeven(timestep_from_Study);//update electric potential
SolvePoissonAAeven(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
//************************************************************************/
// *************EVEN TIMESTEP*************//
timestep++;
SolveElectricPotentialAAeven(timestep_from_Study);//update electric potential
SolvePoissonAAeven(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
//************************************************************************/
// Check convergence of steady-state solution
if (timestep==2){
//save electric potential for convergence check
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
}
if (timestep%analysis_interval==0){
if (tolerance_method.compare("MSE")==0){
double count_loc=0;
double count;
double MSE_loc=0.0;
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
MSE_loc += (Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k));
count_loc+=1.0;
// Check convergence of steady-state solution
if (timestep==2){
//save electric potential for convergence check
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
}
if (timestep%analysis_interval==0){
if (tolerance_method.compare("MSE")==0){
double count_loc=0;
double count;
double MSE_loc=0.0;
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
MSE_loc += (Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k));
count_loc+=1.0;
}
}
}
}
error=Dm->Comm.sumReduce(MSE_loc);
count=Dm->Comm.sumReduce(count_loc);
error /= count;
}
error=Dm->Comm.sumReduce(MSE_loc);
count=Dm->Comm.sumReduce(count_loc);
error /= count;
}
else if (tolerance_method.compare("MSE_max")==0){
vector<double>MSE_loc;
double MSE_loc_max;
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
MSE_loc.push_back((Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k)));
else if (tolerance_method.compare("MSE_max")==0){
vector<double>MSE_loc;
double MSE_loc_max;
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
for (int k=1; k<Nz-1; k++){
for (int j=1; j<Ny-1; j++){
for (int i=1; i<Nx-1; i++){
if (Distance(i,j,k) > 0){
MSE_loc.push_back((Psi_host(i,j,k) - Psi_previous(i,j,k))*(Psi_host(i,j,k) - Psi_previous(i,j,k)));
}
}
}
}
vector<double>::iterator it_max = max_element(MSE_loc.begin(),MSE_loc.end());
unsigned int idx_max=distance(MSE_loc.begin(),it_max);
MSE_loc_max=MSE_loc[idx_max];
error=Dm->Comm.maxReduce(MSE_loc_max);
}
vector<double>::iterator it_max = max_element(MSE_loc.begin(),MSE_loc.end());
unsigned int idx_max=distance(MSE_loc.begin(),it_max);
MSE_loc_max=MSE_loc[idx_max];
error=Dm->Comm.maxReduce(MSE_loc_max);
else{
ERROR("Error: user-specified tolerance_method cannot be identified; check you input database! \n");
}
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
}
else{
ERROR("Error: user-specified tolerance_method cannot be identified; check you input database! \n");
}
ScaLBL_CopyToHost(Psi_previous.data(),Psi,sizeof(double)*Nx*Ny*Nz);
//legacy code that tried to use residual to check convergence
//ScaLBL_D3Q7_PoissonResidualError(NeighborList,dvcMap,ResidualError,Psi,ChargeDensity,epsilon_LB,Nx,Nx*Ny,ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior());
//ScaLBL_D3Q7_PoissonResidualError(NeighborList,dvcMap,ResidualError,Psi,ChargeDensity,epsilon_LB,Nx,Nx*Ny,0, ScaLBL_Comm->LastExterior());
//ScaLBL_Comm->Barrier(); comm.barrier();
//vector<double> ResidualError_host(Np);
//double error_loc_max;
////calculate the maximum residual error
//ScaLBL_CopyToHost(&ResidualError_host[0],ResidualError,sizeof(double)*Np);
//vector<double>::iterator it_temp1,it_temp2;
//it_temp1=ResidualError_host.begin();
//advance(it_temp1,ScaLBL_Comm->LastExterior());
//vector<double>::iterator it_max = max_element(ResidualError_host.begin(),it_temp1);
//unsigned int idx_max1 = distance(ResidualError_host.begin(),it_max);
//it_temp1=ResidualError_host.begin();
//it_temp2=ResidualError_host.begin();
//advance(it_temp1,ScaLBL_Comm->FirstInterior());
//advance(it_temp2,ScaLBL_Comm->LastInterior());
//it_max = max_element(it_temp1,it_temp2);
//unsigned int idx_max2 = distance(ResidualError_host.begin(),it_max);
//if (ResidualError_host[idx_max1]>ResidualError_host[idx_max2]){
// error_loc_max=ResidualError_host[idx_max1];
//}
//else{
// error_loc_max=ResidualError_host[idx_max2];
//}
//error = Dm->Comm.maxReduce(error_loc_max);
}
}
}
else if (lattice_scheme.compare("D3Q19")==0){
double *host_Error;
host_Error = new double [Np];
timestep=0;
auto t1 = std::chrono::system_clock::now();
while (timestep < timestepMax && error > tolerance) {
//************************************************************************/
// *************ODD TIMESTEP*************//
timestep++;
//SolveElectricPotentialAAodd(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
SolvePoissonAAodd(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
// *************EVEN TIMESTEP*************//
timestep++;
//SolveElectricPotentialAAeven(timestep_from_Study,ChargeDensity, UseSlippingVelBC);//update electric potential
SolvePoissonAAeven(ChargeDensity, UseSlippingVelBC);//perform collision
ScaLBL_Comm->Barrier(); comm.barrier();
//************************************************************************/
// Check convergence of steady-state solution
if (timestep==2){
//save electric potential for convergence check
}
if (timestep%analysis_interval==0){
/* get the elecric potential */
ScaLBL_CopyToHost(Psi_host.data(),Psi,sizeof(double)*Nx*Ny*Nz);
if (rank==0) printf(" ... getting Poisson solver error \n");
double err = 0.0;
double max_error = 0.0;
ScaLBL_CopyToHost(host_Error,ResidualError,sizeof(double)*Np);
for (int idx=0; idx<Np; idx++){
err = host_Error[idx]*host_Error[idx];
if (err > max_error ){
max_error = err;
}
}
error=Dm->Comm.maxReduce(max_error);
/* compute the eletric field */
//ScaLBL_D3Q19_Poisson_getElectricField(fq, ElectricField, tau, Np);
}
}
if (rank == 0)
printf("---------------------------------------------------------------"
"----\n");
// Compute the walltime per timestep
auto t2 = std::chrono::system_clock::now();
double cputime = std::chrono::duration<double>(t2 - t1).count() / timestep;
// Performance obtained from each node
double MLUPS = double(Np) / cputime / 1000000;
if (rank == 0)
printf("********************************************************\n");
if (rank == 0)
printf("CPU time = %f \n", cputime);
if (rank == 0)
printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
MLUPS *= nprocs;
if (rank == 0)
printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
if (rank == 0)
printf("********************************************************\n");
delete [] host_Error;
}
//************************************************************************/
if(WriteLog==true){
getConvergenceLog(timestep,error);
}
//************************************************************************/
////if (rank==0) printf("LB-Poission Solver: a steady-state solution is obtained\n");
////if (rank==0) printf("---------------------------------------------------------------------------\n");
//// Compute the walltime per timestep
//auto t2 = std::chrono::system_clock::now();
//double cputime = std::chrono::duration<double>( t2 - t1 ).count() / timestep;
//// Performance obtained from each node
//double MLUPS = double(Np)/cputime/1000000;
//if (rank==0) printf("******************* LB-Poisson Solver Statistics ********************\n");
//if (rank==0) printf("CPU time = %f \n", cputime);
//if (rank==0) printf("Lattice update rate (per core)= %f MLUPS \n", MLUPS);
//MLUPS *= nprocs;
//if (rank==0) printf("Lattice update rate (total)= %f MLUPS \n", MLUPS);
//if (rank==0) printf("*********************************************************************\n");
}
void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){
if ( rank == 0 ) {
fprintf(TIMELOG,"%i %.5g\n",timestep,error);
@ -661,94 +871,217 @@ void ScaLBL_Poisson::getConvergenceLog(int timestep,double error){
}
void ScaLBL_Poisson::SolveElectricPotentialAAodd(int timestep_from_Study){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
if (lattice_scheme.compare("D3Q7")==0){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FROM NORMAL
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
}
}
}
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
}
}
}
//-------------------------//
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
//-------------------------//
ScaLBL_D3Q7_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
}
else if (lattice_scheme.compare("D3Q19")==0){
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
//ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,timestep_from_Study);
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, Vin, timestep);
break;
}
}
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,timestep_from_Study);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, Vout, timestep);
break;
}
}
//-------------------------//
//ScaLBL_D3Q19_AAodd_Poisson_ElectricPotential(NeighborList, dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
}
}
void ScaLBL_Poisson::SolveElectricPotentialAAeven(int timestep_from_Study){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
if (lattice_scheme.compare("D3Q7")==0){
ScaLBL_Comm->SendD3Q7AA(fq, 0); //READ FORM NORMAL
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q7AA(fq, 0); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_z(NeighborList, fq, Vin, timestep);
break;
}
}
}
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,timestep_from_Study);
ScaLBL_Comm->D3Q7_Poisson_Potential_BC_Z(NeighborList, fq, Vout, timestep);
break;
}
}
}
//-------------------------//
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
//-------------------------//
ScaLBL_D3Q7_AAeven_Poisson_ElectricPotential(dvcMap, fq, Psi, 0, ScaLBL_Comm->LastExterior(), Np);
}
else if (lattice_scheme.compare("D3Q19")==0){
ScaLBL_Comm->SendD3Q19AA(fq); //READ FORM NORMAL
//ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC,
// ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_Comm->Barrier();
// Set boundary conditions
if (BoundaryConditionInlet > 0){
switch (BoundaryConditionInlet){
case 1:
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, Vin, timestep);
break;
case 2:
Vin = getBoundaryVoltagefromPeriodicBC(Vin0,freqIn,PhaseShift_In,timestep_from_Study);
ScaLBL_Comm->D3Q19_Pressure_BC_z(NeighborList, fq, Vin, timestep);
break;
}
}
if (BoundaryConditionOutlet > 0){
switch (BoundaryConditionOutlet){
case 1:
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, Vout, timestep);
break;
case 2:
Vout = getBoundaryVoltagefromPeriodicBC(Vout0,freqOut,PhaseShift_Out,timestep_from_Study);
ScaLBL_Comm->D3Q19_Pressure_BC_Z(NeighborList, fq, Vout, timestep);
break;
}
}
//-------------------------//
//ScaLBL_D3Q19_AAeven_Poisson_ElectricPotential(dvcMap, fq, ChargeDensity, Psi, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
}
}
void ScaLBL_Poisson::SolvePoissonAAodd(double *ChargeDensity, bool UseSlippingVelBC){
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
//TODO: perhaps add another ScaLBL_Comm routine to update Psi values on solid boundary nodes.
//something like:
//ScaLBL_Comm->SolidDirichletBoundaryUpdates(Psi, Psi_BCLabel, timestep);
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
//if (BoundaryConditionSolid==1){
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
//}
//else if (BoundaryConditionSolid==2){
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
//}
if (lattice_scheme.compare("D3Q7")==0){
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
//TODO: perhaps add another ScaLBL_Comm routine to update Psi values on solid boundary nodes.
//something like:
//ScaLBL_Comm->SolidDirichletBoundaryUpdates(Psi, Psi_BCLabel, timestep);
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
//if (BoundaryConditionSolid==1){
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
//}
//else if (BoundaryConditionSolid==2){
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
//}
}
else if (lattice_scheme.compare("D3Q19")==0){
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
//ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAodd_Poisson(NeighborList, dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
//TODO: perhaps add another ScaLBL_Comm routine to update Psi values on solid boundary nodes.
//something like:
//ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
}
}
void ScaLBL_Poisson::SolvePoissonAAeven(double *ChargeDensity, bool UseSlippingVelBC){
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
//if (BoundaryConditionSolid==1){
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
//}
//else if (BoundaryConditionSolid==2){
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
//}
if (lattice_scheme.compare("D3Q7")==0){
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_D3Q7_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
//if (BoundaryConditionSolid==1){
// ScaLBL_Comm->SolidDirichletD3Q7(fq, Psi);
//}
//else if (BoundaryConditionSolid==2){
// ScaLBL_Comm->SolidNeumannD3Q7(fq, Psi);
//}
}
else if (lattice_scheme.compare("D3Q19")==0){
ScaLBL_Comm->SendD3Q19AA(fq); //READ FROM NORMAL
ScaLBL_D3Q19_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, ResidualError, tau, epsilon_LB, UseSlippingVelBC, ScaLBL_Comm->FirstInterior(), ScaLBL_Comm->LastInterior(), Np);
ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
// ScaLBL_Comm->RecvD3Q19AA(fq); //WRITE INTO OPPOSITE
ScaLBL_D3Q19_AAeven_Poisson(dvcMap, fq, ChargeDensity, Psi, ElectricField, ResidualError, tau, epsilon_LB, UseSlippingVelBC, 0, ScaLBL_Comm->LastExterior(), Np);
ScaLBL_Comm->Barrier();
//ScaLBL_Comm->SolidDirichletAndNeumannD3Q7(fq, Psi, Psi_BCLabel);
}
}
void ScaLBL_Poisson::Checkpoint(){
if (rank == 0) {
printf(" POISSON MODEL: Writing restart file! \n");
}
double value;
double *cPsi;
cPsi = new double[Nx*Ny*Nz];
ScaLBL_CopyToHost(cPsi, Psi, Nx*Ny*Nz *sizeof(double));
ofstream File(LocalRestartFile, ios::binary);
for (int n = 0; n < Nx*Ny*Nz; n++) {
value = cPsi[n];
File.write((char *)&value, sizeof(value));
}
File.close();
delete[] cPsi;
}
void ScaLBL_Poisson::DummyChargeDensity(){

View File

@ -40,9 +40,11 @@ public:
void getElectricField(DoubleArray &Values_x, DoubleArray &Values_y,
DoubleArray &Values_z);
void getElectricField_debug(int timestep);
void Checkpoint();
void DummyChargeDensity(); //for debugging
//bool Restart,pBC;
bool Restart;
int timestep, timestepMax;
int analysis_interval;
int BoundaryConditionInlet;
@ -51,6 +53,7 @@ public:
double tau;
double tolerance;
std::string tolerance_method;
std::string lattice_scheme;
double k2_inv;
double epsilon0, epsilon0_LB, epsilonR, epsilon_LB;
double Vin, Vout;
@ -108,8 +111,8 @@ private:
void AssignSolidBoundary(double *poisson_solid, int *poisson_solid_label);
void Potential_Init(double *psi_init);
void ElectricField_LB_to_Phys(DoubleArray &Efield_reg);
void SolveElectricPotentialAAodd(int timestep_from_Study);
void SolveElectricPotentialAAeven(int timestep_from_Study);
void SolveElectricPotentialAAodd(int timestep_from_Study);
//void SolveElectricField();
void SolvePoissonAAodd(double *ChargeDensity, bool UseSlippingVelBC);
void SolvePoissonAAeven(double *ChargeDensity, bool UseSlippingVelBC);

View File

@ -0,0 +1,47 @@
#module load cmake/3.21.3
#module load PrgEnv-gnu
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
# Need a new version of cmake
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
#-I${MPICH_DIR}/include
#-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
HIPFLAGS = --amdgpu-target=gfx90a
# configure
rm -rf CMake*
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D CMAKE_C_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D CMAKE_CXX_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_HIP=0 \
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
-D USE_DOXYGEN:BOOL=false \
~/LBPM-WIA

View File

@ -0,0 +1,52 @@
#module load cmake/3.21.3
#module load PrgEnv-gnu
module load PrgEnv-amd
module load rocm/4.5.0
module load cray-mpich
module load cray-hdf5-parallel
module load craype-accel-amd-gfx908
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx90a="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx90a="-lmpi_gtl_hsa"
export LD_LIBRARY_PATH=${CRAY_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}
# Need a new version of cmake
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
#-I${MPICH_DIR}/include
#-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
#export HIPFLAGS="--amdgpu-target=gfx90a --save-temps"
#--amdgpu-spill-vgpr-to-agpr=0
#THIS IS HOW TO CHECK FOR SPILLS (example)
# hipcc -c -g -ggdb --save-temps Color.hip
# -munsafe-fp-atomics
# configure
rm -rf CMake*
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D CMAKE_C_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D CMAKE_CXX_FLAGS="-L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -I${HDF5_DIR}/include" \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_HIP=1 \
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
-D USE_DOXYGEN:BOOL=false \
~/LBPM-WIA

View File

@ -1,14 +1,13 @@
export TPL_ROOT=/ccs/home/mbt/repos
export TPL_BUILDER=/ccs/home/mbt/repos/TPL-builder
export TPL_ROOT=/ccs/proj/csc380/mcclurej/spock
export TPL_BUILDER=/ccs/home/mcclurej/tpl-builder
export TPL_WEBPAGE=http://bitbucket.org/AdvancedMultiPhysics/tpl-builder/downloads
export INSTALL_DIR=/ccs/home/mbt/spock/install
export INSTALL_DIR=/ccs/proj/csc380/mcclurej/spock/install
module load cmake
module load llvm-amdgpu
module load hip
cmake \
-D CMAKE_BUILD_TYPE=Release \
-D CXX_STD=14 \
@ -24,7 +23,7 @@ cmake \
-D ENABLE_SHARED:BOOL=OFF \
-D PROCS_INSTALL=8 \
-D TPL_LIST:STRING="TIMER;ZLIB;HDF5;SILO" \
-D TIMER_URL="${TPL_ROOT}/TimerUtility" \
-D TIMER_URL="${TPL_ROOT}/timerutility" \
-D ZLIB_URL="http://zlib.net/zlib-1.2.11.tar.gz" \
-D HDF5_URL="${TPL_ROOT}/hdf5-1.8.12.tar.gz" \
-D BUILD_TYPE=x86_64 \

View File

@ -1,30 +1,40 @@
module load cmake
module load llvm-amdgpu
module load hip
module load PrgEnv-gnu
module load rocm/4.2.0
module load cray-mpich
module load cray-hdf5-parallel
#module load craype-accel-amd-gfx908
export TPL_DIR=/gpfs/alpine/stf006/proj-shared/mbt/spock/install
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx908="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx908="-lmpi_gtl_hsa"
# Need a new version of cmake
export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.0/bin
# configure
rm -rf CMake*
cmake \
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_C_COMPILER:PATH=cc \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_HIP=1 \
-D LINK_LIBRARIES=${HIP_PATH}/lib/libamdhip64.so \
-D USE_CUDA=0 \
-D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \
-D CMAKE_CUDA_HOST_COMPILER="gcc" \
-D USE_MPI=0 \
-D CMAKE_HIP_COMPILER_TOOLKIT_ROOT=$ROCM_PATH/hip \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${TPL_DIR}/hdf5" \
-D USE_SILO=0 \
-D SILO_DIRECTORY="${TPL_DIR}/silo" \
-D USE_DOXYGEN:BOOL=false \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
~/repos/LBPM-WIA
-D USE_DOXYGEN:BOOL=false \
~/LBPM-WIA

View File

@ -0,0 +1,39 @@
## Load the desired modules
module load PrgEnv-gcc
module load rocm/4.3.0
module load cray-mpich
module load cray-hdf5-parallel
## These must be set before compiling so the executable picks up GTL
export PE_MPICH_GTL_DIR_amd_gfx908="-L${CRAY_MPICH_ROOTDIR}/gtl/lib"
export PE_MPICH_GTL_LIBS_amd_gfx908="-lmpi_gtl_hsa"
## These must be set before running
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=CMA
#export CMAKE_DIR=/gpfs/alpine/csc380/proj-shared/LBPM/cmake-3.21.3/bin
export CMAKE_DIR=/ccs/home/mbt/spock/cmake-3.21.3/bin
# configure
rm -rf CMake*
${CMAKE_DIR}/cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_CXX_COMPILER:PATH=CC \
-D CMAKE_CXX_STANDARD=14 \
-D DISABLE_GOLD:BOOL=TRUE \
-D DISABLE_LTO:BOOL=TRUE \
-D USE_HIP=1 \
-D LINK_LIBRARIES="${ROCM_PATH}/lib/libamdhip64.so;${CRAY_MPICH_ROOTDIR}/gtl/lib/libmpi_gtl_hsa.so" \
-D USE_MPI=1 \
-D MPI_SKIP_SEARCH=1 \
-D MPIEXEC="srun" \
-D USE_HDF5=1 \
-D HDF5_DIRECTORY="${HDF5_DIR}" \
-D USE_SILO=0 \
-D USE_TIMER=0 \
-D USE_DOXYGEN:BOOL=false \
~/repos/LBPM-WIA

View File

@ -1,19 +1,28 @@
# load the module for cmake
#module load cmake
module load cmake
# gcc/7.5.0
module load gcc/7.5.0
module load cuda/10.2.89
module load hdf5/1.10.7
#source /gpfs/gpfs_stage1/b6p315aa/setup/setup-mpi.sh
module load cmake gcc/7.5.0
module load cuda
module load hdf5
#/ccs/proj/csc380/mcclurej
#export HDF5_DIR=/ccs/proj/csc380/mcclurej/install/hdf5/1.8.12/
#export SILO_DIR=/ccs/proj/csc380/mcclurej/install/silo/4.10.2/
#export NETCDF_DIR=/ccs/proj/geo136/install/netcdf/4.6.1
export HDF5_DIR="$OLCF_HDF5_ROOT"
# configure
rm -rf CMake*
cmake \
-D CMAKE_BUILD_TYPE:STRING=Release \
-D CMAKE_C_COMPILER:PATH=mpicc \
@ -22,7 +31,7 @@ cmake \
-D CMAKE_CXX_STANDARD=14 \
-D USE_CUDA=1 \
-D CMAKE_CUDA_FLAGS="-arch sm_70 -Xptxas=-v -Xptxas -dlcm=cg -lineinfo" \
-D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/6.4.0/bin/gcc" \
-D CMAKE_CUDA_HOST_COMPILER="/sw/summit/gcc/7.5.0-2/bin/gcc" \
-D USE_MPI=1 \
-D MPIEXEC=mpirun \
-D USE_EXT_MPI_FOR_SERIAL_TESTS:BOOL=TRUE \
@ -38,4 +47,6 @@ cmake \
-D USE_TIMER=0 \
~/LBPM-WIA
make VERBOSE=1 -j1 && make install
make VERBOSE=1 -j8 && make install

View File

@ -6,9 +6,12 @@ ADD_LBPM_EXECUTABLE( lbpm_permeability_simulator )
ADD_LBPM_EXECUTABLE( lbpm_greyscale_simulator )
ADD_LBPM_EXECUTABLE( lbpm_greyscaleColor_simulator )
ADD_LBPM_EXECUTABLE( lbpm_electrokinetic_SingleFluid_simulator )
ADD_LBPM_EXECUTABLE( lbpm_nernst_planck_simulator )
ADD_LBPM_EXECUTABLE( lbpm_nernst_planck_cell_simulator )
ADD_LBPM_EXECUTABLE( lbpm_cell_simulator )
ADD_LBPM_EXECUTABLE( lbpm_freelee_simulator )
ADD_LBPM_EXECUTABLE( lbpm_freelee_SingleFluidBGK_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
ADD_LBPM_EXECUTABLE( lbpm_BGK_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_color_macro_simulator )
ADD_LBPM_EXECUTABLE( lbpm_dfh_simulator )
#ADD_LBPM_EXECUTABLE( lbpm_sphere_pp )
@ -47,7 +50,6 @@ ADD_LBPM_EXECUTABLE( TestPNP_Stokes )
ADD_LBPM_EXECUTABLE( TestMixedGrad )
CONFIGURE_FILE( ${CMAKE_CURRENT_SOURCE_DIR}/cylindertest ${CMAKE_CURRENT_BINARY_DIR}/cylindertest COPYONLY )
# Add the tests
@ -59,6 +61,7 @@ ADD_LBPM_TEST( TestTopo3D )
ADD_LBPM_TEST( TestFluxBC )
ADD_LBPM_TEST( TestFlowAdaptor )
ADD_LBPM_TEST( TestMap )
ADD_LBPM_TEST( TestMembrane )
#ADD_LBPM_TEST( TestMRT )
#ADD_LBPM_TEST( TestColorGrad )
ADD_LBPM_TEST( TestWideHalo )

View File

@ -183,11 +183,12 @@ int main(int argc, char **argv)
int i,j,k;
// Load inputs
auto db = loadInputs( nprocs );
/* auto filename = argv[1];
auto input_db = std::make_shared<Database>( filename );
auto db = input_db->getDatabase( "Domain" );
*/
auto filename = argv[1];
auto input_db = std::make_shared<Database>( filename );
auto db = input_db->getDatabase( "Domain" );
//else {
// auto db = loadInputs( nprocs );
//}
int Nx = db->getVector<int>( "n" )[0];
int Ny = db->getVector<int>( "n" )[1];
int Nz = db->getVector<int>( "n" )[2];
@ -269,17 +270,19 @@ int main(int argc, char **argv)
//.......................................................................
//...........................................................................
comm.barrier();
//comm.barrier();
if (rank == 0) cout << "Domain set." << endl;
//...........................................................................
cout << flush;
//...........................................................................
if (rank==0) printf ("Create ScaLBL_Communicator \n");
cout << flush;
// Create a communicator for the device (will use optimized layout)
ScaLBL_Communicator ScaLBL_Comm(Dm);
int Npad=(Np/16 + 2)*16;
if (rank==0) printf ("Set up memory efficient layout, %i | %i | %i \n", Np, Npad, N);
cout << flush;
auto neighborList= new int[18*Npad];
IntArray Map(Nx,Ny,Nz);
Map.fill(-2);
@ -290,7 +293,8 @@ int main(int argc, char **argv)
//......................device distributions.................................
dist_mem_size = Np*sizeof(double);
if (rank==0) printf ("Allocating distributions \n");
cout << flush;
int *NeighborList;
int *dvcMap;
double *fq;
@ -320,6 +324,9 @@ int main(int argc, char **argv)
ScaLBL_DeviceBarrier();
delete [] TmpMap;
if (rank==0) printf("Map is copied to GPU \n");
cout << flush;
//...........................................................................
/* // Write the communcation structure into a file for debugging
@ -351,11 +358,13 @@ int main(int argc, char **argv)
fclose(CommFile);
*/
if (rank==0) printf("Setting the distributions, size = : %i\n", Np);
cout << flush;
//...........................................................................
GlobalFlipScaLBL_D3Q19_Init(fq_host, Map, Np, Nx-2, Ny-2, Nz-2, iproc,jproc,kproc,nprocx,nprocy,nprocz);
ScaLBL_CopyToDevice(fq, fq_host, 19*dist_mem_size);
ScaLBL_DeviceBarrier();
comm.barrier();
//comm.barrier();
//*************************************************************************
// First timestep
ScaLBL_Comm.SendD3Q19AA(fq); //READ FROM NORMAL
@ -375,6 +384,7 @@ int main(int argc, char **argv)
int timestep = 0;
if (rank==0) printf("********************************************************\n");
if (rank==0) printf("No. of timesteps for timing: %i \n", 100);
cout << flush;
//.......create and start timer............
double starttime,stoptime,cputime;
@ -420,13 +430,16 @@ int main(int argc, char **argv)
// 18 reads and 18 writes for each lattice site
double MemoryRefs = double(Np)*36;
// number of memory references for the swap algorithm - GigaBytes / second
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)*1e-9);
if (rank==0) printf("DRAM bandwidth (per process)= %f GB/sec \n",MemoryRefs*8*double(timestep)/cputime*1e-9);
// Report bandwidth in Gigabits per second
// communication bandwidth includes both send and recieve
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/1e9);
if (rank==0) printf("Communication bandwidth (per process)= %f Gbit/sec \n",ScaLBL_Comm.CommunicationCount*64*timestep/cputime*1e-9);
if (rank==0) printf("Aggregated communication bandwidth = %f Gbit/sec \n",nprocs*ScaLBL_Comm.CommunicationCount*64*timestep/cputime*1e-9);
cout << flush;
}
// ****************************************************
//cout << fflush;
comm.barrier();
Utilities::shutdown();
// ****************************************************

348
tests/TestMembrane.cpp Normal file
View File

@ -0,0 +1,348 @@
//*************************************************************************
// Lattice Boltzmann Simulator for Single Phase Flow in Porous Media
// James E. McCLure
//*************************************************************************
#include <stdio.h>
#include <iostream>
#include <fstream>
#include "common/MPI.h"
#include "common/Membrane.h"
#include "common/ScaLBL.h"
using namespace std;
std::shared_ptr<Database> loadInputs( int nprocs )
{
//auto db = std::make_shared<Database>( "Domain.in" );
auto db = std::make_shared<Database>();
db->putScalar<int>( "BC", 0 );
db->putVector<int>( "nproc", { 1, 1, 1 } );
db->putVector<int>( "n", { 32, 32, 32 } );
db->putScalar<int>( "nspheres", 1 );
db->putVector<double>( "L", { 1, 1, 1 } );
return db;
}
//***************************************************************************************
int main(int argc, char **argv)
{
// Initialize MPI
Utilities::startup( argc, argv );
Utilities::MPI comm( MPI_COMM_WORLD );
int check=0;
{
int i,j,k,n;
int rank = comm.getRank();
if (rank == 0){
printf("********************************************************\n");
printf("Running unit test: TestMembrane \n");
printf("********************************************************\n");
}
// Load inputs
auto db = loadInputs( comm.getSize() );
int Nx = db->getVector<int>( "n" )[0];
int Ny = db->getVector<int>( "n" )[1];
int Nz = db->getVector<int>( "n" )[2];
auto Dm = std::make_shared<Domain>(db,comm);
Nx += 2;
Ny += 2;
Nz += 2;
int N = Nx*Ny*Nz;
//.......................................................................
int Np = 0;
double distance,radius;
DoubleArray Distance(Nx,Ny,Nz);
for (k=0;k<Nz;k++){
for (j=0;j<Ny;j++){
for (i=0;i<Nx;i++){
n = k*Nx*Ny+j*Nx+i;
Dm->id[n] = 1;
radius = double(Nx)/4;
distance = sqrt(double((i-0.5*Nx)*(i-0.5*Nx)+ (j-0.5*Ny)*(j-0.5*Ny)+ (k-0.5*Nz)*(k-0.5*Nz)))-radius;
if (distance < 0.0 ){
Dm->id[n] = 1;
}
Distance(i,j,k) = distance;
Np++;
}
}
}
Dm->CommInit();
// Create a communicator for the device (will use optimized layout)
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm(new ScaLBL_Communicator(Dm));
//Create a second communicator based on the regular data layout
std::shared_ptr<ScaLBL_Communicator> ScaLBL_Comm_Regular(new ScaLBL_Communicator(Dm));
if (rank==0){
printf("Total domain size = %i \n",N);
printf("Reduced domain size = %i \n",Np);
}
// LBM variables
if (rank==0) printf ("Set up the neighborlist \n");
int Npad=Np+32;
int neighborSize=18*Npad*sizeof(int);
int *neighborList;
IntArray Map(Nx,Ny,Nz);
neighborList= new int[18*Npad];
//......................device distributions.................................
int *NeighborList;
int *dvcMap;
//...........................................................................
ScaLBL_AllocateDeviceMemory((void **) &NeighborList, neighborSize);
ScaLBL_AllocateDeviceMemory((void **) &dvcMap, sizeof(int)*Npad);
Np = ScaLBL_Comm->MemoryOptimizedLayoutAA(Map,neighborList,Dm->id.data(),Np,1);
comm.barrier();
ScaLBL_CopyToDevice(NeighborList, neighborList, 18*Np*sizeof(int));
double *dist;
dist = new double [19*Np];
// Check the neighborlist
printf("Check neighborlist: exterior %i, first interior %i last interior %i \n",ScaLBL_Comm->LastExterior(),ScaLBL_Comm->FirstInterior(),ScaLBL_Comm->LastInterior());
for (int idx=0; idx<ScaLBL_Comm->LastExterior(); idx++){
for (int q=0; q<18; q++){
int nn = neighborList[q*Np+idx]%Np;
if (nn>Np) printf("neighborlist error (exterior) at q=%i, idx=%i \n",q,idx);
dist[q*Np + idx] = 0.0;
}
}
for (int idx=ScaLBL_Comm->FirstInterior(); idx<ScaLBL_Comm->LastInterior(); idx++){
for (int q=0; q<18; q++){
int nn = neighborList[q*Np+idx]%Np;
if (nn>Np) printf("neighborlist error (exterior) at q=%i, idx=%i \n",q,idx);
dist[q*Np + idx] = 0.0;
}
}
/* create a membrane data structure */
Membrane M(ScaLBL_Comm, NeighborList, Np);
int MembraneCount = M.Create(Distance, Map);
if (rank==0) printf (" Number of membrane links: %i \n", MembraneCount);
/* create a tagged array to show where the mebrane is*/
double *MembraneLinks;
MembraneLinks = new double [Nx*Ny*Nz];
for (int n=0; n<Nx*Ny*Nz; n++) {
MembraneLinks[n] = 0.0;
}
for (int mlink=0; mlink<MembraneCount; mlink++){
int iq = M.membraneLinks[2*mlink];
int jq = M.membraneLinks[2*mlink+1];
dist[iq] = -1.0; // set these distributions to non-zero
dist[jq] = 1.0;
}
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
int idx = Map(i,j,k);
double sum = 0.0;
for (int q=0; q<19; q++){
sum += dist[q*Np + idx];
}
int n = k*Nx*Ny + j*Nx + i;
MembraneLinks[n] = sum;
if (sum > 0.f){
Dm->id[n] = 127;
}
if (sum < 0.f){
Dm->id[n] = 64;
}
}
}
}
if (argc > 1)
Dm->AggregateLabels("membrane.raw");
/* create a pair of distributions to test membrane mass transport routine */
double *fq, *gq, *Ci, *Cj, *Psi, *Ci_host;
Ci_host = new double [Np];
ScaLBL_AllocateDeviceMemory((void **)&fq, 19 * sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&gq, 19 * sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Ci, sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Cj, sizeof(double) * Np);
ScaLBL_AllocateDeviceMemory((void **)&Psi, sizeof(double) * Np);
/* initialize concentration inside membrane */
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n = k*Nx*Ny+j*Nx+i;
int idx = Map(i,j,k);
if (Distance(i,j,k) > 0.0)
Ci_host[idx] = 1.0;
else
Ci_host[idx] = 0.0;
}
}
}
ScaLBL_CopyToDevice(Ci, Ci_host, sizeof(double) * Np);
/* initialize the distributions */
ScaLBL_D3Q7_Ion_Init_FromFile(fq, Ci, Np);
ScaLBL_D3Q7_Ion_Init_FromFile(gq, Ci, Np);
/* Streaming with the usual neighborlist */
ScaLBL_D3Q19_AAodd_Compact(NeighborList, fq, Np);
/* Streaming with the membrane neighborlist*/
ScaLBL_D3Q19_AAodd_Compact(M.NeighborList, gq, Np);
/* explicit mass transfer step with the membrane*/
M.AssignCoefficients(dvcMap, Psi, 0.0, 1.0, 1.0, 1.0, 1.0);
M.IonTransport(gq, Cj);
ScaLBL_CopyToHost(Ci_host, Cj, sizeof(double) * Np);
double ionError = 0.0;
for (int n=0; n<Np; n++){
ionError += Ci_host[n];
}
if (fabs(ionError) > 1e-12) {
printf(" Failed error tolerance in membrane ion transport routine! \n");
check = 2;
}
DoubleArray Ions(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map, Cj, Ions);
if (argc > 1)
Dm->AggregateLabels("membrane2.raw",Ions);
/* now compare streaming */
ScaLBL_D3Q7_Ion_Init_FromFile(gq, Ci, Np);
M.IonTransport(gq, Cj);
ScaLBL_D3Q19_AAodd_Compact(M.NeighborList, gq, Np);
M.IonTransport(gq, Cj);
/* now check that the two results agree*/
double *fq_h, *gq_h;
fq_h = new double [7*Np];
gq_h = new double [7*Np];
ScaLBL_CopyToHost(fq_h, fq, 7*sizeof(double) * Np);
ScaLBL_CopyToHost(gq_h, gq, 7*sizeof(double) * Np);
for (int n = 0; n<Np; n++){
for (int q=0; q<7; q++){
double gval = gq_h[q*Np + n];
double fval = fq_h[q*Np + n];
if (gval != fval ){
printf(" Membrane streaming mismatch at q=%i, n=%i \n",q,n);
printf(" .... gq = %f, fq = %f \n",gval, fval);
printf(" (unit test will fail) \n");
check = 3;
}
}
}
DoubleArray MembraneErrors(Nx,Ny,Nz);
for (k=1;k<Nz-1;k++){
for (j=1;j<Ny-1;j++){
for (i=1;i<Nx-1;i++){
n = k*Nx*Ny+j*Nx+i;
int idx = Map(i,j,k);
MembraneErrors(i,j,k) = 0.0;
for (int q=0; q<7; q++){
double gval = gq_h[q*Np + idx];
double fval = fq_h[q*Np + idx];
MembraneErrors(i,j,k) += gval - fval;
}
}
}
}
Dm->AggregateLabels("membrane3.raw",MembraneErrors);
//...........................................................................
// Update GPU data structures
if (rank==0) printf ("Setting up device map and neighbor list \n");
int *TmpMap;
TmpMap=new int[Np*sizeof(int)];
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
if (!(idx < 0))
TmpMap[idx] = k*Nx*Ny+j*Nx+i;
}
}
}
ScaLBL_CopyToDevice(dvcMap, TmpMap, sizeof(int)*Np);
ScaLBL_DeviceBarrier();
// Create a dummy distribution data structure
double *fq_host;
fq_host = new double[19*Np];
if (rank==0) printf ("Setting up Np=%i distributions \n",Np);
for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
if (!(idx<0)){
for (int q=0; q<19; q++){
fq_host[q*Np+idx]=(k*Nx*Ny+j*Nx+i)+0.01*q;
}
}
}
}
}
/* Run dummy communications */
/*initialize fq from host data */
ScaLBL_CopyToDevice(fq, fq_host, sizeof(double)*7*Np);
M.SendD3Q7AA(&fq[0]);
M.RecvD3Q7AA(&gq[0]);
// this has only the communicated values
//ScaLBL_CopyToHost(fq_host, gq, sizeof(double)*7*Np);
if (rank==0) printf ("Sum result \n");
ScaLBL_D3Q7_AAeven_IonConcentration(&gq[0 * Np * 7], &Ci[0 * Np],
0, ScaLBL_Comm->LastExterior(),
Np);
DoubleArray Result(Nx,Ny,Nz);
ScaLBL_Comm->RegularLayout(Map, Ci, Result);
/* for (k=1; k<Nz-1; k++){
for (j=1; j<Ny-1; j++){
for (i=1; i<Nx-1; i++){
int idx=Map(i,j,k);
double sum = 0.0;
if (!(idx<0)){
for (int q=1; q<3; q++){
sum += fq_host[q*Np+idx];
}
Result[k*Nx*Ny+j*Nx+i] = sum;
}
}
}
}
*/
FILE *OUTFILE;
OUTFILE = fopen("D3Q7.raw","wb");
fwrite(Result.data(),8,Nx*Ny*Nz,OUTFILE);
fclose(OUTFILE);
FILE *MAPFILE;
MAPFILE = fopen("Map.raw","wb");
fwrite(Map.data(),4,Nx*Ny*Nz,MAPFILE);
fclose(MAPFILE);
delete [] TmpMap;
delete [] fq_host;
}
Utilities::shutdown();
return check;
}

View File

@ -168,6 +168,7 @@ int main(int argc, char **argv)
// Averages->Reduce();
Object->MeasureObject();
//Object->MeasureConnectedPathway();
double Vi = Object->V();
double Ai = Object->A();
double Hi = Object->H();

Some files were not shown because too many files have changed in this diff Show More